LLVM 22.0.0git
AArch64LegalizerInfo.cpp
Go to the documentation of this file.
1//===- AArch64LegalizerInfo.cpp ----------------------------------*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the Machinelegalizer class for
10/// AArch64.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
15#include "AArch64Subtarget.h"
16#include "llvm/ADT/STLExtras.h"
27#include "llvm/IR/Intrinsics.h"
28#include "llvm/IR/IntrinsicsAArch64.h"
29#include "llvm/IR/Type.h"
31#include <initializer_list>
32
33#define DEBUG_TYPE "aarch64-legalinfo"
34
35using namespace llvm;
36using namespace LegalizeActions;
37using namespace LegalizeMutations;
38using namespace LegalityPredicates;
39using namespace MIPatternMatch;
40
42 : ST(&ST) {
43 using namespace TargetOpcode;
44 const LLT p0 = LLT::pointer(0, 64);
45 const LLT s8 = LLT::scalar(8);
46 const LLT s16 = LLT::scalar(16);
47 const LLT s32 = LLT::scalar(32);
48 const LLT s64 = LLT::scalar(64);
49 const LLT s128 = LLT::scalar(128);
50 const LLT v16s8 = LLT::fixed_vector(16, 8);
51 const LLT v8s8 = LLT::fixed_vector(8, 8);
52 const LLT v4s8 = LLT::fixed_vector(4, 8);
53 const LLT v2s8 = LLT::fixed_vector(2, 8);
54 const LLT v8s16 = LLT::fixed_vector(8, 16);
55 const LLT v4s16 = LLT::fixed_vector(4, 16);
56 const LLT v2s16 = LLT::fixed_vector(2, 16);
57 const LLT v2s32 = LLT::fixed_vector(2, 32);
58 const LLT v4s32 = LLT::fixed_vector(4, 32);
59 const LLT v2s64 = LLT::fixed_vector(2, 64);
60 const LLT v2p0 = LLT::fixed_vector(2, p0);
61
62 const LLT nxv16s8 = LLT::scalable_vector(16, s8);
63 const LLT nxv8s16 = LLT::scalable_vector(8, s16);
64 const LLT nxv4s32 = LLT::scalable_vector(4, s32);
65 const LLT nxv2s64 = LLT::scalable_vector(2, s64);
66
67 std::initializer_list<LLT> PackedVectorAllTypeList = {/* Begin 128bit types */
68 v16s8, v8s16, v4s32,
69 v2s64, v2p0,
70 /* End 128bit types */
71 /* Begin 64bit types */
72 v8s8, v4s16, v2s32};
73 std::initializer_list<LLT> ScalarAndPtrTypesList = {s8, s16, s32, s64, p0};
74 SmallVector<LLT, 8> PackedVectorAllTypesVec(PackedVectorAllTypeList);
75 SmallVector<LLT, 8> ScalarAndPtrTypesVec(ScalarAndPtrTypesList);
76
77 const TargetMachine &TM = ST.getTargetLowering()->getTargetMachine();
78
79 // FIXME: support subtargets which have neon/fp-armv8 disabled.
80 if (!ST.hasNEON() || !ST.hasFPARMv8()) {
82 return;
83 }
84
85 // Some instructions only support s16 if the subtarget has full 16-bit FP
86 // support.
87 const bool HasFP16 = ST.hasFullFP16();
88 const LLT &MinFPScalar = HasFP16 ? s16 : s32;
89
90 const bool HasCSSC = ST.hasCSSC();
91 const bool HasRCPC3 = ST.hasRCPC3();
92 const bool HasSVE = ST.hasSVE();
93
95 {G_IMPLICIT_DEF, G_FREEZE, G_CONSTANT_FOLD_BARRIER})
96 .legalFor({p0, s8, s16, s32, s64})
97 .legalFor({v2s8, v4s8, v8s8, v16s8, v2s16, v4s16, v8s16, v2s32, v4s32,
98 v2s64, v2p0})
99 .widenScalarToNextPow2(0)
100 .clampScalar(0, s8, s64)
103 .clampNumElements(0, v8s8, v16s8)
104 .clampNumElements(0, v4s16, v8s16)
105 .clampNumElements(0, v2s32, v4s32)
106 .clampMaxNumElements(0, s64, 2)
107 .clampMaxNumElements(0, p0, 2)
109
111 .legalFor({p0, s16, s32, s64})
112 .legalFor(PackedVectorAllTypeList)
116 .clampScalar(0, s16, s64)
117 .clampNumElements(0, v8s8, v16s8)
118 .clampNumElements(0, v4s16, v8s16)
119 .clampNumElements(0, v2s32, v4s32)
120 .clampMaxNumElements(0, s64, 2)
121 .clampMaxNumElements(0, p0, 2);
122
124 .legalIf(all(typeInSet(0, {s32, s64, p0}), typeInSet(1, {s8, s16, s32}),
125 smallerThan(1, 0)))
126 .widenScalarToNextPow2(0)
127 .clampScalar(0, s32, s64)
129 .minScalar(1, s8)
130 .maxScalarIf(typeInSet(0, {s32}), 1, s16)
131 .maxScalarIf(typeInSet(0, {s64, p0}), 1, s32);
132
134 .legalIf(all(typeInSet(0, {s16, s32, s64, p0}),
135 typeInSet(1, {s32, s64, s128, p0}), smallerThan(0, 1)))
136 .widenScalarToNextPow2(1)
137 .clampScalar(1, s32, s128)
139 .minScalar(0, s16)
140 .maxScalarIf(typeInSet(1, {s32}), 0, s16)
141 .maxScalarIf(typeInSet(1, {s64, p0}), 0, s32)
142 .maxScalarIf(typeInSet(1, {s128}), 0, s64);
143
144 getActionDefinitionsBuilder({G_ADD, G_SUB, G_AND, G_OR, G_XOR})
145 .legalFor({s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32, v2s64})
146 .legalFor(HasSVE, {nxv16s8, nxv8s16, nxv4s32, nxv2s64})
147 .widenScalarToNextPow2(0)
148 .clampScalar(0, s32, s64)
149 .clampMaxNumElements(0, s8, 16)
150 .clampMaxNumElements(0, s16, 8)
151 .clampNumElements(0, v2s32, v4s32)
152 .clampNumElements(0, v2s64, v2s64)
154 [=](const LegalityQuery &Query) {
155 return Query.Types[0].getNumElements() <= 2;
156 },
157 0, s32)
158 .minScalarOrEltIf(
159 [=](const LegalityQuery &Query) {
160 return Query.Types[0].getNumElements() <= 4;
161 },
162 0, s16)
163 .minScalarOrEltIf(
164 [=](const LegalityQuery &Query) {
165 return Query.Types[0].getNumElements() <= 16;
166 },
167 0, s8)
168 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
170
172 .legalFor({s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32, v2s64})
173 .widenScalarToNextPow2(0)
174 .clampScalar(0, s32, s64)
175 .clampMaxNumElements(0, s8, 16)
176 .clampMaxNumElements(0, s16, 8)
177 .clampNumElements(0, v2s32, v4s32)
178 .clampNumElements(0, v2s64, v2s64)
180 [=](const LegalityQuery &Query) {
181 return Query.Types[0].getNumElements() <= 2;
182 },
183 0, s32)
184 .minScalarOrEltIf(
185 [=](const LegalityQuery &Query) {
186 return Query.Types[0].getNumElements() <= 4;
187 },
188 0, s16)
189 .minScalarOrEltIf(
190 [=](const LegalityQuery &Query) {
191 return Query.Types[0].getNumElements() <= 16;
192 },
193 0, s8)
194 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
196
197 getActionDefinitionsBuilder({G_SHL, G_ASHR, G_LSHR})
198 .customIf([=](const LegalityQuery &Query) {
199 const auto &SrcTy = Query.Types[0];
200 const auto &AmtTy = Query.Types[1];
201 return !SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
202 AmtTy.getSizeInBits() == 32;
203 })
204 .legalFor({
205 {s32, s32},
206 {s32, s64},
207 {s64, s64},
208 {v8s8, v8s8},
209 {v16s8, v16s8},
210 {v4s16, v4s16},
211 {v8s16, v8s16},
212 {v2s32, v2s32},
213 {v4s32, v4s32},
214 {v2s64, v2s64},
215 })
216 .widenScalarToNextPow2(0)
217 .clampScalar(1, s32, s64)
218 .clampScalar(0, s32, s64)
219 .clampNumElements(0, v8s8, v16s8)
220 .clampNumElements(0, v4s16, v8s16)
221 .clampNumElements(0, v2s32, v4s32)
222 .clampNumElements(0, v2s64, v2s64)
224 .minScalarSameAs(1, 0)
228
230 .legalFor({{p0, s64}, {v2p0, v2s64}})
231 .clampScalarOrElt(1, s64, s64)
232 .clampNumElements(0, v2p0, v2p0);
233
234 getActionDefinitionsBuilder(G_PTRMASK).legalFor({{p0, s64}});
235
236 getActionDefinitionsBuilder({G_SDIV, G_UDIV})
237 .legalFor({s32, s64})
238 .libcallFor({s128})
239 .clampScalar(0, s32, s64)
241 .scalarize(0);
242
243 getActionDefinitionsBuilder({G_SREM, G_UREM, G_SDIVREM, G_UDIVREM})
244 .lowerFor({s8, s16, s32, s64, v2s32, v4s32, v2s64})
245 .libcallFor({s128})
247 .minScalarOrElt(0, s32)
248 .clampNumElements(0, v2s32, v4s32)
249 .clampNumElements(0, v2s64, v2s64)
250 .scalarize(0);
251
252 getActionDefinitionsBuilder({G_SMULO, G_UMULO})
253 .widenScalarToNextPow2(0, /*Min = */ 32)
254 .clampScalar(0, s32, s64)
255 .lower();
256
257 getActionDefinitionsBuilder({G_SMULH, G_UMULH})
258 .legalFor({s64, v16s8, v8s16, v4s32})
259 .lower();
260
261 getActionDefinitionsBuilder({G_SMIN, G_SMAX, G_UMIN, G_UMAX})
262 .legalFor({v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
263 .legalFor(HasCSSC, {s32, s64})
264 .minScalar(HasCSSC, 0, s32)
265 .clampNumElements(0, v8s8, v16s8)
266 .clampNumElements(0, v4s16, v8s16)
267 .clampNumElements(0, v2s32, v4s32)
268 .lower();
269
270 // FIXME: Legal vector types are only legal with NEON.
272 .legalFor(HasCSSC, {s32, s64})
273 .legalFor(PackedVectorAllTypeList)
274 .customIf([=](const LegalityQuery &Q) {
275 // TODO: Fix suboptimal codegen for 128+ bit types.
276 LLT SrcTy = Q.Types[0];
277 return SrcTy.isScalar() && SrcTy.getSizeInBits() < 128;
278 })
279 .widenScalarIf(
280 [=](const LegalityQuery &Query) { return Query.Types[0] == v4s8; },
281 [=](const LegalityQuery &Query) { return std::make_pair(0, v4s16); })
282 .widenScalarIf(
283 [=](const LegalityQuery &Query) { return Query.Types[0] == v2s16; },
284 [=](const LegalityQuery &Query) { return std::make_pair(0, v2s32); })
285 .clampNumElements(0, v8s8, v16s8)
286 .clampNumElements(0, v4s16, v8s16)
287 .clampNumElements(0, v2s32, v4s32)
288 .clampNumElements(0, v2s64, v2s64)
290 .lower();
291
293 {G_ABDS, G_ABDU, G_UAVGFLOOR, G_UAVGCEIL, G_SAVGFLOOR, G_SAVGCEIL})
294 .legalFor({v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
295 .lower();
296
298 {G_SADDE, G_SSUBE, G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_UADDO, G_USUBO})
299 .legalFor({{s32, s32}, {s64, s32}})
300 .clampScalar(0, s32, s64)
301 .clampScalar(1, s32, s64)
303
304 getActionDefinitionsBuilder({G_FSHL, G_FSHR})
305 .customFor({{s32, s32}, {s32, s64}, {s64, s64}})
306 .lower();
307
309 .legalFor({{s32, s64}, {s64, s64}})
310 .customIf([=](const LegalityQuery &Q) {
311 return Q.Types[0].isScalar() && Q.Types[1].getScalarSizeInBits() < 64;
312 })
313 .lower();
315
316 getActionDefinitionsBuilder({G_SBFX, G_UBFX})
317 .customFor({{s32, s32}, {s64, s64}});
318
319 auto always = [=](const LegalityQuery &Q) { return true; };
321 .legalFor(HasCSSC, {{s32, s32}, {s64, s64}})
322 .legalFor({{v8s8, v8s8}, {v16s8, v16s8}})
323 .customFor(!HasCSSC, {{s32, s32}, {s64, s64}})
324 .customFor({{s128, s128},
325 {v4s16, v4s16},
326 {v8s16, v8s16},
327 {v2s32, v2s32},
328 {v4s32, v4s32},
329 {v2s64, v2s64}})
330 .clampScalar(0, s32, s128)
332 .minScalarEltSameAsIf(always, 1, 0)
333 .maxScalarEltSameAsIf(always, 1, 0)
334 .clampNumElements(0, v8s8, v16s8)
335 .clampNumElements(0, v4s16, v8s16)
336 .clampNumElements(0, v2s32, v4s32)
337 .clampNumElements(0, v2s64, v2s64)
340
342 .legalFor({{s32, s32},
343 {s64, s64},
344 {v8s8, v8s8},
345 {v16s8, v16s8},
346 {v4s16, v4s16},
347 {v8s16, v8s16},
348 {v2s32, v2s32},
349 {v4s32, v4s32}})
350 .widenScalarToNextPow2(1, /*Min=*/32)
351 .clampScalar(1, s32, s64)
352 .clampNumElements(0, v8s8, v16s8)
353 .clampNumElements(0, v4s16, v8s16)
354 .clampNumElements(0, v2s32, v4s32)
357 .scalarSameSizeAs(0, 1);
358
359 getActionDefinitionsBuilder(G_CTLZ_ZERO_UNDEF).lower();
360
362 .lowerIf(isVector(0))
363 .widenScalarToNextPow2(1, /*Min=*/32)
364 .clampScalar(1, s32, s64)
365 .scalarSameSizeAs(0, 1)
366 .legalFor(HasCSSC, {s32, s64})
367 .customFor(!HasCSSC, {s32, s64});
368
369 getActionDefinitionsBuilder(G_CTTZ_ZERO_UNDEF).lower();
370
371 getActionDefinitionsBuilder(G_BITREVERSE)
372 .legalFor({s32, s64, v8s8, v16s8})
373 .widenScalarToNextPow2(0, /*Min = */ 32)
375 .clampScalar(0, s32, s64)
376 .clampNumElements(0, v8s8, v16s8)
377 .clampNumElements(0, v4s16, v8s16)
378 .clampNumElements(0, v2s32, v4s32)
379 .clampNumElements(0, v2s64, v2s64)
382 .lower();
383
385 .legalFor({s32, s64, v4s16, v8s16, v2s32, v4s32, v2s64})
387 .clampScalar(0, s32, s64)
388 .clampNumElements(0, v4s16, v8s16)
389 .clampNumElements(0, v2s32, v4s32)
390 .clampNumElements(0, v2s64, v2s64)
392
393 getActionDefinitionsBuilder({G_UADDSAT, G_SADDSAT, G_USUBSAT, G_SSUBSAT})
394 .legalFor({v8s8, v16s8, v4s16, v8s16, v2s32, v4s32, v2s64})
395 .legalFor(HasSVE, {nxv16s8, nxv8s16, nxv4s32, nxv2s64})
396 .clampNumElements(0, v8s8, v16s8)
397 .clampNumElements(0, v4s16, v8s16)
398 .clampNumElements(0, v2s32, v4s32)
399 .clampMaxNumElements(0, s64, 2)
402 .lower();
403
405 {G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FMA, G_FSQRT, G_FMAXNUM, G_FMINNUM,
406 G_FMAXIMUM, G_FMINIMUM, G_FCEIL, G_FFLOOR, G_FRINT, G_FNEARBYINT,
407 G_INTRINSIC_TRUNC, G_INTRINSIC_ROUND, G_INTRINSIC_ROUNDEVEN})
408 .legalFor({s32, s64, v2s32, v4s32, v2s64})
409 .legalFor(HasFP16, {s16, v4s16, v8s16})
410 .libcallFor({s128})
411 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
412 .minScalarOrElt(0, MinFPScalar)
413 .clampNumElements(0, v4s16, v8s16)
414 .clampNumElements(0, v2s32, v4s32)
415 .clampNumElements(0, v2s64, v2s64)
417
418 getActionDefinitionsBuilder({G_FABS, G_FNEG})
419 .legalFor({s32, s64, v2s32, v4s32, v2s64})
420 .legalFor(HasFP16, {s16, v4s16, v8s16})
421 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
423 .clampNumElements(0, v4s16, v8s16)
424 .clampNumElements(0, v2s32, v4s32)
425 .clampNumElements(0, v2s64, v2s64)
427 .lowerFor({s16, v4s16, v8s16});
428
430 .libcallFor({s32, s64, s128})
431 .minScalar(0, s32)
432 .scalarize(0);
433
434 getActionDefinitionsBuilder({G_INTRINSIC_LRINT, G_INTRINSIC_LLRINT})
435 .legalFor({{s64, MinFPScalar}, {s64, s32}, {s64, s64}})
436 .libcallFor({{s64, s128}})
437 .minScalarOrElt(1, MinFPScalar);
438
439 getActionDefinitionsBuilder({G_FCOS, G_FSIN, G_FPOW, G_FLOG, G_FLOG2,
440 G_FLOG10, G_FTAN, G_FEXP, G_FEXP2, G_FEXP10,
441 G_FACOS, G_FASIN, G_FATAN, G_FATAN2, G_FCOSH,
442 G_FSINH, G_FTANH, G_FMODF})
443 // We need a call for these, so we always need to scalarize.
444 .scalarize(0)
445 // Regardless of FP16 support, widen 16-bit elements to 32-bits.
446 .minScalar(0, s32)
447 .libcallFor({s32, s64, s128});
448 getActionDefinitionsBuilder({G_FPOWI, G_FLDEXP})
449 .scalarize(0)
450 .minScalar(0, s32)
451 .libcallFor({{s32, s32}, {s64, s32}, {s128, s32}});
452
453 getActionDefinitionsBuilder({G_LROUND, G_LLROUND})
454 .legalFor({{s64, s32}, {s64, s64}})
455 .legalFor(HasFP16, {{s64, s16}})
456 .minScalar(0, s64)
457 .minScalar(1, s32)
458 .libcallFor({{s64, s128}});
459
460 // TODO: Custom legalization for mismatched types.
461 getActionDefinitionsBuilder(G_FCOPYSIGN)
463 [](const LegalityQuery &Query) { return Query.Types[0].isScalar(); },
464 [=](const LegalityQuery &Query) {
465 const LLT Ty = Query.Types[0];
466 return std::pair(0, LLT::fixed_vector(Ty == s16 ? 4 : 2, Ty));
467 })
468 .lower();
469
471
472 for (unsigned Op : {G_SEXTLOAD, G_ZEXTLOAD}) {
473 auto &Actions = getActionDefinitionsBuilder(Op);
474
475 if (Op == G_SEXTLOAD)
477
478 // Atomics have zero extending behavior.
479 Actions
480 .legalForTypesWithMemDesc({{s32, p0, s8, 8},
481 {s32, p0, s16, 8},
482 {s32, p0, s32, 8},
483 {s64, p0, s8, 2},
484 {s64, p0, s16, 2},
485 {s64, p0, s32, 4},
486 {s64, p0, s64, 8},
487 {p0, p0, s64, 8},
488 {v2s32, p0, s64, 8}})
489 .widenScalarToNextPow2(0)
490 .clampScalar(0, s32, s64)
491 // TODO: We could support sum-of-pow2's but the lowering code doesn't know
492 // how to do that yet.
493 .unsupportedIfMemSizeNotPow2()
494 // Lower anything left over into G_*EXT and G_LOAD
495 .lower();
496 }
497
498 auto IsPtrVecPred = [=](const LegalityQuery &Query) {
499 const LLT &ValTy = Query.Types[0];
500 return ValTy.isPointerVector() && ValTy.getAddressSpace() == 0;
501 };
502
504 .customIf([=](const LegalityQuery &Query) {
505 return HasRCPC3 && Query.Types[0] == s128 &&
506 Query.MMODescrs[0].Ordering == AtomicOrdering::Acquire;
507 })
508 .customIf([=](const LegalityQuery &Query) {
509 return Query.Types[0] == s128 &&
510 Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic;
511 })
512 .legalForTypesWithMemDesc({{s8, p0, s8, 8},
513 {s16, p0, s16, 8},
514 {s32, p0, s32, 8},
515 {s64, p0, s64, 8},
516 {p0, p0, s64, 8},
517 {s128, p0, s128, 8},
518 {v8s8, p0, s64, 8},
519 {v16s8, p0, s128, 8},
520 {v4s16, p0, s64, 8},
521 {v8s16, p0, s128, 8},
522 {v2s32, p0, s64, 8},
523 {v4s32, p0, s128, 8},
524 {v2s64, p0, s128, 8}})
525 // These extends are also legal
526 .legalForTypesWithMemDesc(
527 {{s32, p0, s8, 8}, {s32, p0, s16, 8}, {s64, p0, s32, 8}})
528 .legalForTypesWithMemDesc({
529 // SVE vscale x 128 bit base sizes
530 {nxv16s8, p0, nxv16s8, 8},
531 {nxv8s16, p0, nxv8s16, 8},
532 {nxv4s32, p0, nxv4s32, 8},
533 {nxv2s64, p0, nxv2s64, 8},
534 })
535 .widenScalarToNextPow2(0, /* MinSize = */ 8)
536 .clampMaxNumElements(0, s8, 16)
537 .clampMaxNumElements(0, s16, 8)
538 .clampMaxNumElements(0, s32, 4)
539 .clampMaxNumElements(0, s64, 2)
540 .clampMaxNumElements(0, p0, 2)
542 .clampScalar(0, s8, s64)
544 [=](const LegalityQuery &Query) {
545 // Clamp extending load results to 32-bits.
546 return Query.Types[0].isScalar() &&
547 Query.Types[0] != Query.MMODescrs[0].MemoryTy &&
548 Query.Types[0].getSizeInBits() > 32;
549 },
550 changeTo(0, s32))
551 // TODO: Use BITCAST for v2i8, v2i16 after G_TRUNC gets sorted out
552 .bitcastIf(typeInSet(0, {v4s8}),
553 [=](const LegalityQuery &Query) {
554 const LLT VecTy = Query.Types[0];
555 return std::pair(0, LLT::scalar(VecTy.getSizeInBits()));
556 })
557 .customIf(IsPtrVecPred)
558 .scalarizeIf(typeInSet(0, {v2s16, v2s8}), 0)
559 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0);
560
562 .customIf([=](const LegalityQuery &Query) {
563 return HasRCPC3 && Query.Types[0] == s128 &&
564 Query.MMODescrs[0].Ordering == AtomicOrdering::Release;
565 })
566 .customIf([=](const LegalityQuery &Query) {
567 return Query.Types[0] == s128 &&
568 Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic;
569 })
570 .legalForTypesWithMemDesc(
571 {{s8, p0, s8, 8}, {s16, p0, s8, 8}, // truncstorei8 from s16
572 {s32, p0, s8, 8}, // truncstorei8 from s32
573 {s64, p0, s8, 8}, // truncstorei8 from s64
574 {s16, p0, s16, 8}, {s32, p0, s16, 8}, // truncstorei16 from s32
575 {s64, p0, s16, 8}, // truncstorei16 from s64
576 {s32, p0, s8, 8}, {s32, p0, s16, 8}, {s32, p0, s32, 8},
577 {s64, p0, s64, 8}, {s64, p0, s32, 8}, // truncstorei32 from s64
578 {p0, p0, s64, 8}, {s128, p0, s128, 8}, {v16s8, p0, s128, 8},
579 {v8s8, p0, s64, 8}, {v4s16, p0, s64, 8}, {v8s16, p0, s128, 8},
580 {v2s32, p0, s64, 8}, {v4s32, p0, s128, 8}, {v2s64, p0, s128, 8}})
581 .legalForTypesWithMemDesc({
582 // SVE vscale x 128 bit base sizes
583 // TODO: Add nxv2p0. Consider bitcastIf.
584 // See #92130
585 // https://github.com/llvm/llvm-project/pull/92130#discussion_r1616888461
586 {nxv16s8, p0, nxv16s8, 8},
587 {nxv8s16, p0, nxv8s16, 8},
588 {nxv4s32, p0, nxv4s32, 8},
589 {nxv2s64, p0, nxv2s64, 8},
590 })
591 .clampScalar(0, s8, s64)
592 .minScalarOrElt(0, s8)
593 .lowerIf([=](const LegalityQuery &Query) {
594 return Query.Types[0].isScalar() &&
595 Query.Types[0] != Query.MMODescrs[0].MemoryTy;
596 })
597 // Maximum: sN * k = 128
598 .clampMaxNumElements(0, s8, 16)
599 .clampMaxNumElements(0, s16, 8)
600 .clampMaxNumElements(0, s32, 4)
601 .clampMaxNumElements(0, s64, 2)
602 .clampMaxNumElements(0, p0, 2)
604 // TODO: Use BITCAST for v2i8, v2i16 after G_TRUNC gets sorted out
605 .bitcastIf(all(typeInSet(0, {v4s8}),
606 LegalityPredicate([=](const LegalityQuery &Query) {
607 return Query.Types[0].getSizeInBits() ==
608 Query.MMODescrs[0].MemoryTy.getSizeInBits();
609 })),
610 [=](const LegalityQuery &Query) {
611 const LLT VecTy = Query.Types[0];
612 return std::pair(0, LLT::scalar(VecTy.getSizeInBits()));
613 })
614 .customIf(IsPtrVecPred)
615 .scalarizeIf(typeInSet(0, {v2s16, v2s8}), 0)
616 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
617 .lower();
618
619 getActionDefinitionsBuilder(G_INDEXED_STORE)
620 // Idx 0 == Ptr, Idx 1 == Val
621 // TODO: we can implement legalizations but as of now these are
622 // generated in a very specific way.
624 {p0, s8, s8, 8},
625 {p0, s16, s16, 8},
626 {p0, s32, s8, 8},
627 {p0, s32, s16, 8},
628 {p0, s32, s32, 8},
629 {p0, s64, s64, 8},
630 {p0, p0, p0, 8},
631 {p0, v8s8, v8s8, 8},
632 {p0, v16s8, v16s8, 8},
633 {p0, v4s16, v4s16, 8},
634 {p0, v8s16, v8s16, 8},
635 {p0, v2s32, v2s32, 8},
636 {p0, v4s32, v4s32, 8},
637 {p0, v2s64, v2s64, 8},
638 {p0, v2p0, v2p0, 8},
639 {p0, s128, s128, 8},
640 })
641 .unsupported();
642
643 auto IndexedLoadBasicPred = [=](const LegalityQuery &Query) {
644 LLT LdTy = Query.Types[0];
645 LLT PtrTy = Query.Types[1];
646 if (!llvm::is_contained(PackedVectorAllTypesVec, LdTy) &&
647 !llvm::is_contained(ScalarAndPtrTypesVec, LdTy) && LdTy != s128)
648 return false;
649 if (PtrTy != p0)
650 return false;
651 return true;
652 };
653 getActionDefinitionsBuilder(G_INDEXED_LOAD)
656 .legalIf(IndexedLoadBasicPred)
657 .unsupported();
658 getActionDefinitionsBuilder({G_INDEXED_SEXTLOAD, G_INDEXED_ZEXTLOAD})
659 .unsupportedIf(
661 .legalIf(all(typeInSet(0, {s16, s32, s64}),
662 LegalityPredicate([=](const LegalityQuery &Q) {
663 LLT LdTy = Q.Types[0];
664 LLT PtrTy = Q.Types[1];
665 LLT MemTy = Q.MMODescrs[0].MemoryTy;
666 if (PtrTy != p0)
667 return false;
668 if (LdTy == s16)
669 return MemTy == s8;
670 if (LdTy == s32)
671 return MemTy == s8 || MemTy == s16;
672 if (LdTy == s64)
673 return MemTy == s8 || MemTy == s16 || MemTy == s32;
674 return false;
675 })))
676 .unsupported();
677
678 // Constants
680 .legalFor({p0, s8, s16, s32, s64})
681 .widenScalarToNextPow2(0)
682 .clampScalar(0, s8, s64);
683 getActionDefinitionsBuilder(G_FCONSTANT)
684 // Always legalize s16 to prevent G_FCONSTANT being widened to G_CONSTANT
685 .legalFor({s16, s32, s64, s128})
686 .clampScalar(0, MinFPScalar, s128);
687
688 // FIXME: fix moreElementsToNextPow2
690 .legalFor({{s32, s32}, {s32, s64}, {s32, p0}})
692 .clampScalar(1, s32, s64)
693 .clampScalar(0, s32, s32)
696 [=](const LegalityQuery &Query) {
697 const LLT &Ty = Query.Types[0];
698 const LLT &SrcTy = Query.Types[1];
699 return Ty.isVector() && !SrcTy.isPointerVector() &&
700 Ty.getElementType() != SrcTy.getElementType();
701 },
702 0, 1)
703 .minScalarOrEltIf(
704 [=](const LegalityQuery &Query) { return Query.Types[1] == v2s16; },
705 1, s32)
706 .minScalarOrEltIf(
707 [=](const LegalityQuery &Query) {
708 return Query.Types[1].isPointerVector();
709 },
710 0, s64)
712 .clampNumElements(1, v8s8, v16s8)
713 .clampNumElements(1, v4s16, v8s16)
714 .clampNumElements(1, v2s32, v4s32)
715 .clampNumElements(1, v2s64, v2s64)
716 .clampNumElements(1, v2p0, v2p0)
717 .customIf(isVector(0));
718
720 .legalFor({{s32, s32},
721 {s32, s64},
722 {v4s32, v4s32},
723 {v2s32, v2s32},
724 {v2s64, v2s64}})
725 .legalFor(HasFP16, {{s32, s16}, {v4s16, v4s16}, {v8s16, v8s16}})
727 .clampScalar(0, s32, s32)
728 .minScalarOrElt(1, MinFPScalar)
731 [=](const LegalityQuery &Query) {
732 const LLT &Ty = Query.Types[0];
733 const LLT &SrcTy = Query.Types[1];
734 return Ty.isVector() && !SrcTy.isPointerVector() &&
735 Ty.getElementType() != SrcTy.getElementType();
736 },
737 0, 1)
738 .clampNumElements(1, v4s16, v8s16)
739 .clampNumElements(1, v2s32, v4s32)
740 .clampMaxNumElements(1, s64, 2)
742 .libcallFor({{s32, s128}});
743
744 // Extensions
745 auto ExtLegalFunc = [=](const LegalityQuery &Query) {
746 unsigned DstSize = Query.Types[0].getSizeInBits();
747
748 // Handle legal vectors using legalFor
749 if (Query.Types[0].isVector())
750 return false;
751
752 if (DstSize < 8 || DstSize >= 128 || !isPowerOf2_32(DstSize))
753 return false; // Extending to a scalar s128 needs narrowing.
754
755 const LLT &SrcTy = Query.Types[1];
756
757 // Make sure we fit in a register otherwise. Don't bother checking that
758 // the source type is below 128 bits. We shouldn't be allowing anything
759 // through which is wider than the destination in the first place.
760 unsigned SrcSize = SrcTy.getSizeInBits();
761 if (SrcSize < 8 || !isPowerOf2_32(SrcSize))
762 return false;
763
764 return true;
765 };
766 getActionDefinitionsBuilder({G_ZEXT, G_SEXT, G_ANYEXT})
767 .legalIf(ExtLegalFunc)
768 .legalFor({{v8s16, v8s8}, {v4s32, v4s16}, {v2s64, v2s32}})
769 .clampScalar(0, s64, s64) // Just for s128, others are handled above.
771 .clampMaxNumElements(1, s8, 8)
772 .clampMaxNumElements(1, s16, 4)
773 .clampMaxNumElements(1, s32, 2)
774 // Tries to convert a large EXTEND into two smaller EXTENDs
775 .lowerIf([=](const LegalityQuery &Query) {
776 return (Query.Types[0].getScalarSizeInBits() >
777 Query.Types[1].getScalarSizeInBits() * 2) &&
778 Query.Types[0].isVector() &&
779 (Query.Types[1].getScalarSizeInBits() == 8 ||
780 Query.Types[1].getScalarSizeInBits() == 16);
781 })
782 .clampMinNumElements(1, s8, 8)
783 .clampMinNumElements(1, s16, 4)
785
787 .legalFor({{v8s8, v8s16}, {v4s16, v4s32}, {v2s32, v2s64}})
789 .clampMaxNumElements(0, s8, 8)
790 .clampMaxNumElements(0, s16, 4)
791 .clampMaxNumElements(0, s32, 2)
793 [=](const LegalityQuery &Query) { return Query.Types[0].isVector(); },
794 0, s8)
795 .lowerIf([=](const LegalityQuery &Query) {
796 LLT DstTy = Query.Types[0];
797 LLT SrcTy = Query.Types[1];
798 return DstTy.isVector() && SrcTy.getSizeInBits() > 128 &&
799 DstTy.getScalarSizeInBits() * 2 <= SrcTy.getScalarSizeInBits();
800 })
801 .clampMinNumElements(0, s8, 8)
802 .clampMinNumElements(0, s16, 4)
803 .alwaysLegal();
804
805 getActionDefinitionsBuilder({G_TRUNC_SSAT_S, G_TRUNC_SSAT_U, G_TRUNC_USAT_U})
806 .legalFor({{v8s8, v8s16}, {v4s16, v4s32}, {v2s32, v2s64}});
807
808 getActionDefinitionsBuilder(G_SEXT_INREG)
809 .legalFor({s32, s64})
810 .legalFor(PackedVectorAllTypeList)
811 .maxScalar(0, s64)
812 .clampNumElements(0, v8s8, v16s8)
813 .clampNumElements(0, v4s16, v8s16)
814 .clampNumElements(0, v2s32, v4s32)
815 .clampMaxNumElements(0, s64, 2)
816 .lower();
817
818 // FP conversions
820 .legalFor(
821 {{s16, s32}, {s16, s64}, {s32, s64}, {v4s16, v4s32}, {v2s32, v2s64}})
822 .libcallFor({{s16, s128}, {s32, s128}, {s64, s128}})
823 .clampNumElements(0, v4s16, v4s16)
824 .clampNumElements(0, v2s32, v2s32)
825 .scalarize(0);
826
828 .legalFor(
829 {{s32, s16}, {s64, s16}, {s64, s32}, {v4s32, v4s16}, {v2s64, v2s32}})
830 .libcallFor({{s128, s64}, {s128, s32}, {s128, s16}})
833 [](const LegalityQuery &Q) {
834 LLT DstTy = Q.Types[0];
835 LLT SrcTy = Q.Types[1];
836 return SrcTy.isVector() && DstTy.isVector() &&
837 SrcTy.getScalarSizeInBits() == 16 &&
838 DstTy.getScalarSizeInBits() == 64;
839 },
840 changeElementTo(1, s32))
841 .clampNumElements(0, v4s32, v4s32)
842 .clampNumElements(0, v2s64, v2s64)
843 .scalarize(0);
844
845 // Conversions
846 getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
847 .legalFor({{s32, s32},
848 {s64, s32},
849 {s32, s64},
850 {s64, s64},
851 {v2s32, v2s32},
852 {v4s32, v4s32},
853 {v2s64, v2s64}})
854 .legalFor(HasFP16,
855 {{s32, s16}, {s64, s16}, {v4s16, v4s16}, {v8s16, v8s16}})
856 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
858 // The range of a fp16 value fits into an i17, so we can lower the width
859 // to i64.
861 [=](const LegalityQuery &Query) {
862 return Query.Types[1] == s16 && Query.Types[0].getSizeInBits() > 64;
863 },
864 changeTo(0, s64))
867 .minScalar(0, s32)
868 .widenScalarOrEltToNextPow2OrMinSize(1, /*MinSize=*/HasFP16 ? 16 : 32)
870 [=](const LegalityQuery &Query) {
871 return Query.Types[0].getScalarSizeInBits() <= 64 &&
872 Query.Types[0].getScalarSizeInBits() >
873 Query.Types[1].getScalarSizeInBits();
874 },
876 .widenScalarIf(
877 [=](const LegalityQuery &Query) {
878 return Query.Types[1].getScalarSizeInBits() <= 64 &&
879 Query.Types[0].getScalarSizeInBits() <
880 Query.Types[1].getScalarSizeInBits();
881 },
883 .clampNumElements(0, v4s16, v8s16)
884 .clampNumElements(0, v2s32, v4s32)
885 .clampMaxNumElements(0, s64, 2)
886 .libcallFor(
887 {{s32, s128}, {s64, s128}, {s128, s128}, {s128, s32}, {s128, s64}});
888
889 getActionDefinitionsBuilder({G_FPTOSI_SAT, G_FPTOUI_SAT})
890 .legalFor({{s32, s32},
891 {s64, s32},
892 {s32, s64},
893 {s64, s64},
894 {v2s32, v2s32},
895 {v4s32, v4s32},
896 {v2s64, v2s64}})
897 .legalFor(
898 HasFP16,
899 {{s16, s16}, {s32, s16}, {s64, s16}, {v4s16, v4s16}, {v8s16, v8s16}})
900 // Handle types larger than i64 by scalarizing/lowering.
901 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
903 // The range of a fp16 value fits into an i17, so we can lower the width
904 // to i64.
906 [=](const LegalityQuery &Query) {
907 return Query.Types[1] == s16 && Query.Types[0].getSizeInBits() > 64;
908 },
909 changeTo(0, s64))
910 .lowerIf(::any(scalarWiderThan(0, 64), scalarWiderThan(1, 64)), 0)
912 .widenScalarToNextPow2(0, /*MinSize=*/32)
913 .minScalar(0, s32)
914 .widenScalarOrEltToNextPow2OrMinSize(1, /*MinSize=*/HasFP16 ? 16 : 32)
916 [=](const LegalityQuery &Query) {
917 unsigned ITySize = Query.Types[0].getScalarSizeInBits();
918 return (ITySize == 16 || ITySize == 32 || ITySize == 64) &&
919 ITySize > Query.Types[1].getScalarSizeInBits();
920 },
922 .widenScalarIf(
923 [=](const LegalityQuery &Query) {
924 unsigned FTySize = Query.Types[1].getScalarSizeInBits();
925 return (FTySize == 16 || FTySize == 32 || FTySize == 64) &&
926 Query.Types[0].getScalarSizeInBits() < FTySize;
927 },
930 .clampNumElements(0, v4s16, v8s16)
931 .clampNumElements(0, v2s32, v4s32)
932 .clampMaxNumElements(0, s64, 2);
933
934 getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
935 .legalFor({{s32, s32},
936 {s64, s32},
937 {s32, s64},
938 {s64, s64},
939 {v2s32, v2s32},
940 {v4s32, v4s32},
941 {v2s64, v2s64}})
942 .legalFor(HasFP16,
943 {{s16, s32}, {s16, s64}, {v4s16, v4s16}, {v8s16, v8s16}})
944 .scalarizeIf(scalarOrEltWiderThan(1, 64), 1)
948 .minScalar(1, s32)
949 .lowerIf([](const LegalityQuery &Query) {
950 return Query.Types[1].isVector() &&
951 Query.Types[1].getScalarSizeInBits() == 64 &&
952 Query.Types[0].getScalarSizeInBits() == 16;
953 })
954 .widenScalarOrEltToNextPow2OrMinSize(0, /*MinSize=*/HasFP16 ? 16 : 32)
956 // v2i64->v2f32 needs to scalarize to avoid double-rounding issues.
957 [](const LegalityQuery &Query) {
958 return Query.Types[0].getScalarSizeInBits() == 32 &&
959 Query.Types[1].getScalarSizeInBits() == 64;
960 },
961 0)
962 .widenScalarIf(
963 [](const LegalityQuery &Query) {
964 return Query.Types[1].getScalarSizeInBits() <= 64 &&
965 Query.Types[0].getScalarSizeInBits() <
966 Query.Types[1].getScalarSizeInBits();
967 },
969 .widenScalarIf(
970 [](const LegalityQuery &Query) {
971 return Query.Types[0].getScalarSizeInBits() <= 64 &&
972 Query.Types[0].getScalarSizeInBits() >
973 Query.Types[1].getScalarSizeInBits();
974 },
976 .clampNumElements(0, v4s16, v8s16)
977 .clampNumElements(0, v2s32, v4s32)
978 .clampMaxNumElements(0, s64, 2)
979 .libcallFor({{s16, s128},
980 {s32, s128},
981 {s64, s128},
982 {s128, s128},
983 {s128, s32},
984 {s128, s64}});
985
986 // Control-flow
989 .legalFor({s32})
990 .clampScalar(0, s32, s32);
991 getActionDefinitionsBuilder(G_BRINDIRECT).legalFor({p0});
992
994 .legalFor({{s32, s32}, {s64, s32}, {p0, s32}})
995 .widenScalarToNextPow2(0)
996 .clampScalar(0, s32, s64)
997 .clampScalar(1, s32, s32)
1000 .lowerIf(isVector(0));
1001
1002 // Pointer-handling
1003 getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0});
1004
1005 if (TM.getCodeModel() == CodeModel::Small)
1006 getActionDefinitionsBuilder(G_GLOBAL_VALUE).custom();
1007 else
1008 getActionDefinitionsBuilder(G_GLOBAL_VALUE).legalFor({p0});
1009
1010 getActionDefinitionsBuilder(G_PTRAUTH_GLOBAL_VALUE)
1011 .legalIf(all(typeIs(0, p0), typeIs(1, p0)));
1012
1013 getActionDefinitionsBuilder(G_PTRTOINT)
1014 .legalFor({{s64, p0}, {v2s64, v2p0}})
1015 .widenScalarToNextPow2(0, 64)
1016 .clampScalar(0, s64, s64)
1017 .clampMaxNumElements(0, s64, 2);
1018
1019 getActionDefinitionsBuilder(G_INTTOPTR)
1020 .unsupportedIf([&](const LegalityQuery &Query) {
1021 return Query.Types[0].getSizeInBits() != Query.Types[1].getSizeInBits();
1022 })
1023 .legalFor({{p0, s64}, {v2p0, v2s64}})
1024 .clampMaxNumElements(1, s64, 2);
1025
1026 // Casts for 32 and 64-bit width type are just copies.
1027 // Same for 128-bit width type, except they are on the FPR bank.
1029 // Keeping 32-bit instructions legal to prevent regression in some tests
1030 .legalForCartesianProduct({s32, v2s16, v4s8})
1031 .legalForCartesianProduct({s64, v8s8, v4s16, v2s32})
1032 .legalForCartesianProduct({s128, v16s8, v8s16, v4s32, v2s64, v2p0})
1033 .customIf([=](const LegalityQuery &Query) {
1034 // Handle casts from i1 vectors to scalars.
1035 LLT DstTy = Query.Types[0];
1036 LLT SrcTy = Query.Types[1];
1037 return DstTy.isScalar() && SrcTy.isVector() &&
1038 SrcTy.getScalarSizeInBits() == 1;
1039 })
1040 .lowerIf([=](const LegalityQuery &Query) {
1041 return Query.Types[0].isVector() != Query.Types[1].isVector();
1042 })
1044 .clampNumElements(0, v8s8, v16s8)
1045 .clampNumElements(0, v4s16, v8s16)
1046 .clampNumElements(0, v2s32, v4s32)
1047 .lower();
1048
1049 getActionDefinitionsBuilder(G_VASTART).legalFor({p0});
1050
1051 // va_list must be a pointer, but most sized types are pretty easy to handle
1052 // as the destination.
1054 .customForCartesianProduct({s8, s16, s32, s64, p0}, {p0})
1055 .clampScalar(0, s8, s64)
1056 .widenScalarToNextPow2(0, /*Min*/ 8);
1057
1058 getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG_WITH_SUCCESS)
1059 .lowerIf(
1060 all(typeInSet(0, {s8, s16, s32, s64, s128}), typeIs(2, p0)));
1061
1062 bool UseOutlineAtomics = ST.outlineAtomics() && !ST.hasLSE();
1063
1064 getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG)
1065 .legalFor(!UseOutlineAtomics, {{s32, p0}, {s64, p0}})
1066 .customFor(!UseOutlineAtomics, {{s128, p0}})
1067 .libcallFor(UseOutlineAtomics,
1068 {{s8, p0}, {s16, p0}, {s32, p0}, {s64, p0}, {s128, p0}})
1069 .clampScalar(0, s32, s64);
1070
1071 getActionDefinitionsBuilder({G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD,
1072 G_ATOMICRMW_SUB, G_ATOMICRMW_AND, G_ATOMICRMW_OR,
1073 G_ATOMICRMW_XOR})
1074 .legalFor(!UseOutlineAtomics, {{s32, p0}, {s64, p0}})
1075 .libcallFor(UseOutlineAtomics,
1076 {{s8, p0}, {s16, p0}, {s32, p0}, {s64, p0}})
1077 .clampScalar(0, s32, s64);
1078
1079 // Do not outline these atomics operations, as per comment in
1080 // AArch64ISelLowering.cpp's shouldExpandAtomicRMWInIR().
1082 {G_ATOMICRMW_MIN, G_ATOMICRMW_MAX, G_ATOMICRMW_UMIN, G_ATOMICRMW_UMAX})
1083 .legalIf(all(typeInSet(0, {s32, s64}), typeIs(1, p0)))
1084 .clampScalar(0, s32, s64);
1085
1086 getActionDefinitionsBuilder(G_BLOCK_ADDR).legalFor({p0});
1087
1088 // Merge/Unmerge
1089 for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
1090 unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
1091 unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0;
1093 .widenScalarToNextPow2(LitTyIdx, 8)
1094 .widenScalarToNextPow2(BigTyIdx, 32)
1095 .clampScalar(LitTyIdx, s8, s64)
1096 .clampScalar(BigTyIdx, s32, s128)
1097 .legalIf([=](const LegalityQuery &Q) {
1098 switch (Q.Types[BigTyIdx].getSizeInBits()) {
1099 case 32:
1100 case 64:
1101 case 128:
1102 break;
1103 default:
1104 return false;
1105 }
1106 switch (Q.Types[LitTyIdx].getSizeInBits()) {
1107 case 8:
1108 case 16:
1109 case 32:
1110 case 64:
1111 return true;
1112 default:
1113 return false;
1114 }
1115 });
1116 }
1117
1118 // TODO : nxv4s16, nxv2s16, nxv2s32
1119 getActionDefinitionsBuilder(G_EXTRACT_VECTOR_ELT)
1120 .legalFor(HasSVE, {{s16, nxv16s8, s64},
1121 {s16, nxv8s16, s64},
1122 {s32, nxv4s32, s64},
1123 {s64, nxv2s64, s64}})
1124 .unsupportedIf([=](const LegalityQuery &Query) {
1125 const LLT &EltTy = Query.Types[1].getElementType();
1126 if (Query.Types[1].isScalableVector())
1127 return false;
1128 return Query.Types[0] != EltTy;
1129 })
1130 .minScalar(2, s64)
1131 .customIf([=](const LegalityQuery &Query) {
1132 const LLT &VecTy = Query.Types[1];
1133 return VecTy == v8s8 || VecTy == v16s8 || VecTy == v2s16 ||
1134 VecTy == v4s16 || VecTy == v8s16 || VecTy == v2s32 ||
1135 VecTy == v4s32 || VecTy == v2s64 || VecTy == v2p0;
1136 })
1137 .minScalarOrEltIf(
1138 [=](const LegalityQuery &Query) {
1139 // We want to promote to <M x s1> to <M x s64> if that wouldn't
1140 // cause the total vec size to be > 128b.
1141 return Query.Types[1].isFixedVector() &&
1142 Query.Types[1].getNumElements() <= 2;
1143 },
1144 0, s64)
1145 .minScalarOrEltIf(
1146 [=](const LegalityQuery &Query) {
1147 return Query.Types[1].isFixedVector() &&
1148 Query.Types[1].getNumElements() <= 4;
1149 },
1150 0, s32)
1151 .minScalarOrEltIf(
1152 [=](const LegalityQuery &Query) {
1153 return Query.Types[1].isFixedVector() &&
1154 Query.Types[1].getNumElements() <= 8;
1155 },
1156 0, s16)
1157 .minScalarOrEltIf(
1158 [=](const LegalityQuery &Query) {
1159 return Query.Types[1].isFixedVector() &&
1160 Query.Types[1].getNumElements() <= 16;
1161 },
1162 0, s8)
1163 .minScalarOrElt(0, s8) // Worst case, we need at least s8.
1165 .clampMaxNumElements(1, s64, 2)
1166 .clampMaxNumElements(1, s32, 4)
1167 .clampMaxNumElements(1, s16, 8)
1168 .clampMaxNumElements(1, s8, 16)
1169 .clampMaxNumElements(1, p0, 2)
1171
1172 getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT)
1173 .legalIf(
1174 typeInSet(0, {v8s8, v16s8, v4s16, v8s16, v2s32, v4s32, v2s64, v2p0}))
1175 .legalFor(HasSVE, {{nxv16s8, s32, s64},
1176 {nxv8s16, s32, s64},
1177 {nxv4s32, s32, s64},
1178 {nxv2s64, s64, s64}})
1181 .clampNumElements(0, v8s8, v16s8)
1182 .clampNumElements(0, v4s16, v8s16)
1183 .clampNumElements(0, v2s32, v4s32)
1184 .clampMaxNumElements(0, s64, 2)
1185 .clampMaxNumElements(0, p0, 2)
1187
1188 getActionDefinitionsBuilder(G_BUILD_VECTOR)
1189 .legalFor({{v8s8, s8},
1190 {v16s8, s8},
1191 {v4s16, s16},
1192 {v8s16, s16},
1193 {v2s32, s32},
1194 {v4s32, s32},
1195 {v2s64, s64},
1196 {v2p0, p0}})
1197 .clampNumElements(0, v4s32, v4s32)
1198 .clampNumElements(0, v2s64, v2s64)
1199 .minScalarOrElt(0, s8)
1202 .minScalarSameAs(1, 0);
1203
1204 getActionDefinitionsBuilder(G_BUILD_VECTOR_TRUNC).lower();
1205
1206 getActionDefinitionsBuilder(G_SHUFFLE_VECTOR)
1207 .legalIf([=](const LegalityQuery &Query) {
1208 const LLT &DstTy = Query.Types[0];
1209 const LLT &SrcTy = Query.Types[1];
1210 // For now just support the TBL2 variant which needs the source vectors
1211 // to be the same size as the dest.
1212 if (DstTy != SrcTy)
1213 return false;
1214 return llvm::is_contained(
1215 {v8s8, v16s8, v4s16, v8s16, v2s32, v4s32, v2s64}, DstTy);
1216 })
1217 .moreElementsIf(
1218 [](const LegalityQuery &Query) {
1219 return Query.Types[0].getNumElements() >
1220 Query.Types[1].getNumElements();
1221 },
1222 changeTo(1, 0))
1225 [](const LegalityQuery &Query) {
1226 return Query.Types[0].getNumElements() <
1227 Query.Types[1].getNumElements();
1228 },
1229 changeTo(0, 1))
1230 .widenScalarOrEltToNextPow2OrMinSize(0, 8)
1231 .clampNumElements(0, v8s8, v16s8)
1232 .clampNumElements(0, v4s16, v8s16)
1233 .clampNumElements(0, v4s32, v4s32)
1234 .clampNumElements(0, v2s64, v2s64)
1236 .bitcastIf(isPointerVector(0), [=](const LegalityQuery &Query) {
1237 // Bitcast pointers vector to i64.
1238 const LLT DstTy = Query.Types[0];
1239 return std::pair(0, LLT::vector(DstTy.getElementCount(), 64));
1240 });
1241
1242 getActionDefinitionsBuilder(G_CONCAT_VECTORS)
1243 .legalFor({{v16s8, v8s8}, {v8s16, v4s16}, {v4s32, v2s32}})
1244 .bitcastIf(
1245 [=](const LegalityQuery &Query) {
1246 return Query.Types[0].isFixedVector() &&
1247 Query.Types[1].isFixedVector() &&
1248 Query.Types[0].getSizeInBits() <= 128 &&
1249 Query.Types[1].getSizeInBits() <= 64;
1250 },
1251 [=](const LegalityQuery &Query) {
1252 const LLT DstTy = Query.Types[0];
1253 const LLT SrcTy = Query.Types[1];
1254 return std::pair(
1255 0, DstTy.changeElementSize(SrcTy.getSizeInBits())
1258 SrcTy.getNumElements())));
1259 });
1260
1261 getActionDefinitionsBuilder(G_EXTRACT_SUBVECTOR)
1262 .legalFor({{v8s8, v16s8}, {v4s16, v8s16}, {v2s32, v4s32}})
1264 .immIdx(0); // Inform verifier imm idx 0 is handled.
1265
1266 // TODO: {nxv16s8, s8}, {nxv8s16, s16}
1267 getActionDefinitionsBuilder(G_SPLAT_VECTOR)
1268 .legalFor(HasSVE, {{nxv4s32, s32}, {nxv2s64, s64}});
1269
1270 getActionDefinitionsBuilder(G_JUMP_TABLE).legalFor({p0});
1271
1272 getActionDefinitionsBuilder(G_BRJT).legalFor({{p0, s64}});
1273
1274 getActionDefinitionsBuilder({G_TRAP, G_DEBUGTRAP, G_UBSANTRAP}).alwaysLegal();
1275
1276 getActionDefinitionsBuilder(G_DYN_STACKALLOC).custom();
1277
1278 getActionDefinitionsBuilder({G_STACKSAVE, G_STACKRESTORE}).lower();
1279
1280 if (ST.hasMOPS()) {
1281 // G_BZERO is not supported. Currently it is only emitted by
1282 // PreLegalizerCombiner for G_MEMSET with zero constant.
1284
1286 .legalForCartesianProduct({p0}, {s64}, {s64})
1287 .customForCartesianProduct({p0}, {s8}, {s64})
1288 .immIdx(0); // Inform verifier imm idx 0 is handled.
1289
1290 getActionDefinitionsBuilder({G_MEMCPY, G_MEMMOVE})
1291 .legalForCartesianProduct({p0}, {p0}, {s64})
1292 .immIdx(0); // Inform verifier imm idx 0 is handled.
1293
1294 // G_MEMCPY_INLINE does not have a tailcall immediate
1295 getActionDefinitionsBuilder(G_MEMCPY_INLINE)
1296 .legalForCartesianProduct({p0}, {p0}, {s64});
1297
1298 } else {
1299 getActionDefinitionsBuilder({G_BZERO, G_MEMCPY, G_MEMMOVE, G_MEMSET})
1300 .libcall();
1301 }
1302
1303 // For fadd reductions we have pairwise operations available. We treat the
1304 // usual legal types as legal and handle the lowering to pairwise instructions
1305 // later.
1306 getActionDefinitionsBuilder(G_VECREDUCE_FADD)
1307 .legalFor({{s32, v2s32}, {s32, v4s32}, {s64, v2s64}})
1308 .legalFor(HasFP16, {{s16, v4s16}, {s16, v8s16}})
1309 .minScalarOrElt(0, MinFPScalar)
1310 .clampMaxNumElements(1, s64, 2)
1311 .clampMaxNumElements(1, s32, 4)
1312 .clampMaxNumElements(1, s16, 8)
1314 .scalarize(1)
1315 .lower();
1316
1317 // For fmul reductions we need to split up into individual operations. We
1318 // clamp to 128 bit vectors then to 64bit vectors to produce a cascade of
1319 // smaller types, followed by scalarizing what remains.
1320 getActionDefinitionsBuilder(G_VECREDUCE_FMUL)
1321 .minScalarOrElt(0, MinFPScalar)
1322 .clampMaxNumElements(1, s64, 2)
1323 .clampMaxNumElements(1, s32, 4)
1324 .clampMaxNumElements(1, s16, 8)
1325 .clampMaxNumElements(1, s32, 2)
1326 .clampMaxNumElements(1, s16, 4)
1327 .scalarize(1)
1328 .lower();
1329
1330 getActionDefinitionsBuilder({G_VECREDUCE_SEQ_FADD, G_VECREDUCE_SEQ_FMUL})
1331 .scalarize(2)
1332 .lower();
1333
1334 getActionDefinitionsBuilder(G_VECREDUCE_ADD)
1335 .legalFor({{s8, v8s8},
1336 {s8, v16s8},
1337 {s16, v4s16},
1338 {s16, v8s16},
1339 {s32, v2s32},
1340 {s32, v4s32},
1341 {s64, v2s64}})
1343 .clampMaxNumElements(1, s64, 2)
1344 .clampMaxNumElements(1, s32, 4)
1345 .clampMaxNumElements(1, s16, 8)
1346 .clampMaxNumElements(1, s8, 16)
1348 .scalarize(1);
1349
1350 getActionDefinitionsBuilder({G_VECREDUCE_FMIN, G_VECREDUCE_FMAX,
1351 G_VECREDUCE_FMINIMUM, G_VECREDUCE_FMAXIMUM})
1352 .legalFor({{s32, v2s32}, {s32, v4s32}, {s64, v2s64}})
1353 .legalFor(HasFP16, {{s16, v4s16}, {s16, v8s16}})
1354 .minScalarOrElt(0, MinFPScalar)
1355 .clampMaxNumElements(1, s64, 2)
1356 .clampMaxNumElements(1, s32, 4)
1357 .clampMaxNumElements(1, s16, 8)
1358 .scalarize(1)
1359 .lower();
1360
1361 getActionDefinitionsBuilder(G_VECREDUCE_MUL)
1362 .clampMaxNumElements(1, s32, 2)
1363 .clampMaxNumElements(1, s16, 4)
1364 .clampMaxNumElements(1, s8, 8)
1365 .scalarize(1)
1366 .lower();
1367
1369 {G_VECREDUCE_SMIN, G_VECREDUCE_SMAX, G_VECREDUCE_UMIN, G_VECREDUCE_UMAX})
1370 .legalFor({{s8, v8s8},
1371 {s8, v16s8},
1372 {s16, v4s16},
1373 {s16, v8s16},
1374 {s32, v2s32},
1375 {s32, v4s32}})
1376 .moreElementsIf(
1377 [=](const LegalityQuery &Query) {
1378 return Query.Types[1].isVector() &&
1379 Query.Types[1].getElementType() != s8 &&
1380 Query.Types[1].getNumElements() & 1;
1381 },
1383 .clampMaxNumElements(1, s64, 2)
1384 .clampMaxNumElements(1, s32, 4)
1385 .clampMaxNumElements(1, s16, 8)
1386 .clampMaxNumElements(1, s8, 16)
1387 .scalarize(1)
1388 .lower();
1389
1391 {G_VECREDUCE_OR, G_VECREDUCE_AND, G_VECREDUCE_XOR})
1392 // Try to break down into smaller vectors as long as they're at least 64
1393 // bits. This lets us use vector operations for some parts of the
1394 // reduction.
1395 .fewerElementsIf(
1396 [=](const LegalityQuery &Q) {
1397 LLT SrcTy = Q.Types[1];
1398 if (SrcTy.isScalar())
1399 return false;
1400 if (!isPowerOf2_32(SrcTy.getNumElements()))
1401 return false;
1402 // We can usually perform 64b vector operations.
1403 return SrcTy.getSizeInBits() > 64;
1404 },
1405 [=](const LegalityQuery &Q) {
1406 LLT SrcTy = Q.Types[1];
1407 return std::make_pair(1, SrcTy.divide(2));
1408 })
1409 .scalarize(1)
1410 .lower();
1411
1412 // TODO: Update this to correct handling when adding AArch64/SVE support.
1413 getActionDefinitionsBuilder(G_VECTOR_COMPRESS).lower();
1414
1415 // Access to floating-point environment.
1416 getActionDefinitionsBuilder({G_GET_FPENV, G_SET_FPENV, G_RESET_FPENV,
1417 G_GET_FPMODE, G_SET_FPMODE, G_RESET_FPMODE})
1418 .libcall();
1419
1420 getActionDefinitionsBuilder(G_IS_FPCLASS).lower();
1421
1422 getActionDefinitionsBuilder(G_PREFETCH).custom();
1423
1424 getActionDefinitionsBuilder({G_SCMP, G_UCMP}).lower();
1425
1427 verify(*ST.getInstrInfo());
1428}
1429
1432 LostDebugLocObserver &LocObserver) const {
1433 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1434 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
1435 GISelChangeObserver &Observer = Helper.Observer;
1436 switch (MI.getOpcode()) {
1437 default:
1438 // No idea what to do.
1439 return false;
1440 case TargetOpcode::G_VAARG:
1441 return legalizeVaArg(MI, MRI, MIRBuilder);
1442 case TargetOpcode::G_LOAD:
1443 case TargetOpcode::G_STORE:
1444 return legalizeLoadStore(MI, MRI, MIRBuilder, Observer);
1445 case TargetOpcode::G_SHL:
1446 case TargetOpcode::G_ASHR:
1447 case TargetOpcode::G_LSHR:
1448 return legalizeShlAshrLshr(MI, MRI, MIRBuilder, Observer);
1449 case TargetOpcode::G_GLOBAL_VALUE:
1450 return legalizeSmallCMGlobalValue(MI, MRI, MIRBuilder, Observer);
1451 case TargetOpcode::G_SBFX:
1452 case TargetOpcode::G_UBFX:
1453 return legalizeBitfieldExtract(MI, MRI, Helper);
1454 case TargetOpcode::G_FSHL:
1455 case TargetOpcode::G_FSHR:
1456 return legalizeFunnelShift(MI, MRI, MIRBuilder, Observer, Helper);
1457 case TargetOpcode::G_ROTR:
1458 return legalizeRotate(MI, MRI, Helper);
1459 case TargetOpcode::G_CTPOP:
1460 return legalizeCTPOP(MI, MRI, Helper);
1461 case TargetOpcode::G_ATOMIC_CMPXCHG:
1462 return legalizeAtomicCmpxchg128(MI, MRI, Helper);
1463 case TargetOpcode::G_CTTZ:
1464 return legalizeCTTZ(MI, Helper);
1465 case TargetOpcode::G_BZERO:
1466 case TargetOpcode::G_MEMCPY:
1467 case TargetOpcode::G_MEMMOVE:
1468 case TargetOpcode::G_MEMSET:
1469 return legalizeMemOps(MI, Helper);
1470 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1471 return legalizeExtractVectorElt(MI, MRI, Helper);
1472 case TargetOpcode::G_DYN_STACKALLOC:
1473 return legalizeDynStackAlloc(MI, Helper);
1474 case TargetOpcode::G_PREFETCH:
1475 return legalizePrefetch(MI, Helper);
1476 case TargetOpcode::G_ABS:
1477 return Helper.lowerAbsToCNeg(MI);
1478 case TargetOpcode::G_ICMP:
1479 return legalizeICMP(MI, MRI, MIRBuilder);
1480 case TargetOpcode::G_BITCAST:
1481 return legalizeBitcast(MI, Helper);
1482 }
1483
1484 llvm_unreachable("expected switch to return");
1485}
1486
1487bool AArch64LegalizerInfo::legalizeBitcast(MachineInstr &MI,
1488 LegalizerHelper &Helper) const {
1489 assert(MI.getOpcode() == TargetOpcode::G_BITCAST && "Unexpected opcode");
1490 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
1491 // We're trying to handle casts from i1 vectors to scalars but reloading from
1492 // stack.
1493 if (!DstTy.isScalar() || !SrcTy.isVector() ||
1494 SrcTy.getElementType() != LLT::scalar(1))
1495 return false;
1496
1497 Helper.createStackStoreLoad(DstReg, SrcReg);
1498 MI.eraseFromParent();
1499 return true;
1500}
1501
1502bool AArch64LegalizerInfo::legalizeFunnelShift(MachineInstr &MI,
1504 MachineIRBuilder &MIRBuilder,
1505 GISelChangeObserver &Observer,
1506 LegalizerHelper &Helper) const {
1507 assert(MI.getOpcode() == TargetOpcode::G_FSHL ||
1508 MI.getOpcode() == TargetOpcode::G_FSHR);
1509
1510 // Keep as G_FSHR if shift amount is a G_CONSTANT, else use generic
1511 // lowering
1512 Register ShiftNo = MI.getOperand(3).getReg();
1513 LLT ShiftTy = MRI.getType(ShiftNo);
1514 auto VRegAndVal = getIConstantVRegValWithLookThrough(ShiftNo, MRI);
1515
1516 // Adjust shift amount according to Opcode (FSHL/FSHR)
1517 // Convert FSHL to FSHR
1518 LLT OperationTy = MRI.getType(MI.getOperand(0).getReg());
1519 APInt BitWidth(ShiftTy.getSizeInBits(), OperationTy.getSizeInBits(), false);
1520
1521 // Lower non-constant shifts and leave zero shifts to the optimizer.
1522 if (!VRegAndVal || VRegAndVal->Value.urem(BitWidth) == 0)
1523 return (Helper.lowerFunnelShiftAsShifts(MI) ==
1525
1526 APInt Amount = VRegAndVal->Value.urem(BitWidth);
1527
1528 Amount = MI.getOpcode() == TargetOpcode::G_FSHL ? BitWidth - Amount : Amount;
1529
1530 // If the instruction is G_FSHR, has a 64-bit G_CONSTANT for shift amount
1531 // in the range of 0 <-> BitWidth, it is legal
1532 if (ShiftTy.getSizeInBits() == 64 && MI.getOpcode() == TargetOpcode::G_FSHR &&
1533 VRegAndVal->Value.ult(BitWidth))
1534 return true;
1535
1536 // Cast the ShiftNumber to a 64-bit type
1537 auto Cast64 = MIRBuilder.buildConstant(LLT::scalar(64), Amount.zext(64));
1538
1539 if (MI.getOpcode() == TargetOpcode::G_FSHR) {
1540 Observer.changingInstr(MI);
1541 MI.getOperand(3).setReg(Cast64.getReg(0));
1542 Observer.changedInstr(MI);
1543 }
1544 // If Opcode is FSHL, remove the FSHL instruction and create a FSHR
1545 // instruction
1546 else if (MI.getOpcode() == TargetOpcode::G_FSHL) {
1547 MIRBuilder.buildInstr(TargetOpcode::G_FSHR, {MI.getOperand(0).getReg()},
1548 {MI.getOperand(1).getReg(), MI.getOperand(2).getReg(),
1549 Cast64.getReg(0)});
1550 MI.eraseFromParent();
1551 }
1552 return true;
1553}
1554
1555bool AArch64LegalizerInfo::legalizeICMP(MachineInstr &MI,
1557 MachineIRBuilder &MIRBuilder) const {
1558 Register DstReg = MI.getOperand(0).getReg();
1559 Register SrcReg1 = MI.getOperand(2).getReg();
1560 Register SrcReg2 = MI.getOperand(3).getReg();
1561 LLT DstTy = MRI.getType(DstReg);
1562 LLT SrcTy = MRI.getType(SrcReg1);
1563
1564 // Check the vector types are legal
1565 if (DstTy.getScalarSizeInBits() != SrcTy.getScalarSizeInBits() ||
1566 DstTy.getNumElements() != SrcTy.getNumElements() ||
1567 (DstTy.getSizeInBits() != 64 && DstTy.getSizeInBits() != 128))
1568 return false;
1569
1570 // Lowers G_ICMP NE => G_ICMP EQ to allow better pattern matching for
1571 // following passes
1572 CmpInst::Predicate Pred = (CmpInst::Predicate)MI.getOperand(1).getPredicate();
1573 if (Pred != CmpInst::ICMP_NE)
1574 return true;
1575 Register CmpReg =
1576 MIRBuilder
1577 .buildICmp(CmpInst::ICMP_EQ, MRI.getType(DstReg), SrcReg1, SrcReg2)
1578 .getReg(0);
1579 MIRBuilder.buildNot(DstReg, CmpReg);
1580
1581 MI.eraseFromParent();
1582 return true;
1583}
1584
1585bool AArch64LegalizerInfo::legalizeRotate(MachineInstr &MI,
1587 LegalizerHelper &Helper) const {
1588 // To allow for imported patterns to match, we ensure that the rotate amount
1589 // is 64b with an extension.
1590 Register AmtReg = MI.getOperand(2).getReg();
1591 LLT AmtTy = MRI.getType(AmtReg);
1592 (void)AmtTy;
1593 assert(AmtTy.isScalar() && "Expected a scalar rotate");
1594 assert(AmtTy.getSizeInBits() < 64 && "Expected this rotate to be legal");
1595 auto NewAmt = Helper.MIRBuilder.buildZExt(LLT::scalar(64), AmtReg);
1596 Helper.Observer.changingInstr(MI);
1597 MI.getOperand(2).setReg(NewAmt.getReg(0));
1598 Helper.Observer.changedInstr(MI);
1599 return true;
1600}
1601
1602bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(
1604 GISelChangeObserver &Observer) const {
1605 assert(MI.getOpcode() == TargetOpcode::G_GLOBAL_VALUE);
1606 // We do this custom legalization to convert G_GLOBAL_VALUE into target ADRP +
1607 // G_ADD_LOW instructions.
1608 // By splitting this here, we can optimize accesses in the small code model by
1609 // folding in the G_ADD_LOW into the load/store offset.
1610 auto &GlobalOp = MI.getOperand(1);
1611 // Don't modify an intrinsic call.
1612 if (GlobalOp.isSymbol())
1613 return true;
1614 const auto* GV = GlobalOp.getGlobal();
1615 if (GV->isThreadLocal())
1616 return true; // Don't want to modify TLS vars.
1617
1618 auto &TM = ST->getTargetLowering()->getTargetMachine();
1619 unsigned OpFlags = ST->ClassifyGlobalReference(GV, TM);
1620
1621 if (OpFlags & AArch64II::MO_GOT)
1622 return true;
1623
1624 auto Offset = GlobalOp.getOffset();
1625 Register DstReg = MI.getOperand(0).getReg();
1626 auto ADRP = MIRBuilder.buildInstr(AArch64::ADRP, {LLT::pointer(0, 64)}, {})
1627 .addGlobalAddress(GV, Offset, OpFlags | AArch64II::MO_PAGE);
1628 // Set the regclass on the dest reg too.
1629 MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass);
1630
1631 // MO_TAGGED on the page indicates a tagged address. Set the tag now. We do so
1632 // by creating a MOVK that sets bits 48-63 of the register to (global address
1633 // + 0x100000000 - PC) >> 48. The additional 0x100000000 offset here is to
1634 // prevent an incorrect tag being generated during relocation when the
1635 // global appears before the code section. Without the offset, a global at
1636 // `0x0f00'0000'0000'1000` (i.e. at `0x1000` with tag `0xf`) that's referenced
1637 // by code at `0x2000` would result in `0x0f00'0000'0000'1000 - 0x2000 =
1638 // 0x0eff'ffff'ffff'f000`, meaning the tag would be incorrectly set to `0xe`
1639 // instead of `0xf`.
1640 // This assumes that we're in the small code model so we can assume a binary
1641 // size of <= 4GB, which makes the untagged PC relative offset positive. The
1642 // binary must also be loaded into address range [0, 2^48). Both of these
1643 // properties need to be ensured at runtime when using tagged addresses.
1644 if (OpFlags & AArch64II::MO_TAGGED) {
1645 assert(!Offset &&
1646 "Should not have folded in an offset for a tagged global!");
1647 ADRP = MIRBuilder.buildInstr(AArch64::MOVKXi, {LLT::pointer(0, 64)}, {ADRP})
1648 .addGlobalAddress(GV, 0x100000000,
1650 .addImm(48);
1651 MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass);
1652 }
1653
1654 MIRBuilder.buildInstr(AArch64::G_ADD_LOW, {DstReg}, {ADRP})
1655 .addGlobalAddress(GV, Offset,
1657 MI.eraseFromParent();
1658 return true;
1659}
1660
1662 MachineInstr &MI) const {
1663 MachineIRBuilder &MIB = Helper.MIRBuilder;
1664 MachineRegisterInfo &MRI = *MIB.getMRI();
1665
1666 auto LowerUnaryOp = [&MI, &MIB](unsigned Opcode) {
1667 MIB.buildInstr(Opcode, {MI.getOperand(0)}, {MI.getOperand(2)});
1668 MI.eraseFromParent();
1669 return true;
1670 };
1671 auto LowerBinOp = [&MI, &MIB](unsigned Opcode) {
1672 MIB.buildInstr(Opcode, {MI.getOperand(0)},
1673 {MI.getOperand(2), MI.getOperand(3)});
1674 MI.eraseFromParent();
1675 return true;
1676 };
1677 auto LowerTriOp = [&MI, &MIB](unsigned Opcode) {
1678 MIB.buildInstr(Opcode, {MI.getOperand(0)},
1679 {MI.getOperand(2), MI.getOperand(3), MI.getOperand(4)});
1680 MI.eraseFromParent();
1681 return true;
1682 };
1683
1684 Intrinsic::ID IntrinsicID = cast<GIntrinsic>(MI).getIntrinsicID();
1685 switch (IntrinsicID) {
1686 case Intrinsic::vacopy: {
1687 unsigned PtrSize = ST->isTargetILP32() ? 4 : 8;
1688 unsigned VaListSize =
1689 (ST->isTargetDarwin() || ST->isTargetWindows())
1690 ? PtrSize
1691 : ST->isTargetILP32() ? 20 : 32;
1692
1693 MachineFunction &MF = *MI.getMF();
1695 LLT::scalar(VaListSize * 8));
1696 MIB.buildLoad(Val, MI.getOperand(2),
1699 VaListSize, Align(PtrSize)));
1700 MIB.buildStore(Val, MI.getOperand(1),
1703 VaListSize, Align(PtrSize)));
1704 MI.eraseFromParent();
1705 return true;
1706 }
1707 case Intrinsic::get_dynamic_area_offset: {
1708 MIB.buildConstant(MI.getOperand(0).getReg(), 0);
1709 MI.eraseFromParent();
1710 return true;
1711 }
1712 case Intrinsic::aarch64_mops_memset_tag: {
1713 assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
1714 // Anyext the value being set to 64 bit (only the bottom 8 bits are read by
1715 // the instruction).
1716 auto &Value = MI.getOperand(3);
1717 Register ExtValueReg = MIB.buildAnyExt(LLT::scalar(64), Value).getReg(0);
1718 Value.setReg(ExtValueReg);
1719 return true;
1720 }
1721 case Intrinsic::aarch64_prefetch: {
1722 auto &AddrVal = MI.getOperand(1);
1723
1724 int64_t IsWrite = MI.getOperand(2).getImm();
1725 int64_t Target = MI.getOperand(3).getImm();
1726 int64_t IsStream = MI.getOperand(4).getImm();
1727 int64_t IsData = MI.getOperand(5).getImm();
1728
1729 unsigned PrfOp = (IsWrite << 4) | // Load/Store bit
1730 (!IsData << 3) | // IsDataCache bit
1731 (Target << 1) | // Cache level bits
1732 (unsigned)IsStream; // Stream bit
1733
1734 MIB.buildInstr(AArch64::G_AARCH64_PREFETCH).addImm(PrfOp).add(AddrVal);
1735 MI.eraseFromParent();
1736 return true;
1737 }
1738 case Intrinsic::aarch64_neon_uaddv:
1739 case Intrinsic::aarch64_neon_saddv:
1740 case Intrinsic::aarch64_neon_umaxv:
1741 case Intrinsic::aarch64_neon_smaxv:
1742 case Intrinsic::aarch64_neon_uminv:
1743 case Intrinsic::aarch64_neon_sminv: {
1744 bool IsSigned = IntrinsicID == Intrinsic::aarch64_neon_saddv ||
1745 IntrinsicID == Intrinsic::aarch64_neon_smaxv ||
1746 IntrinsicID == Intrinsic::aarch64_neon_sminv;
1747
1748 auto OldDst = MI.getOperand(0).getReg();
1749 auto OldDstTy = MRI.getType(OldDst);
1750 LLT NewDstTy = MRI.getType(MI.getOperand(2).getReg()).getElementType();
1751 if (OldDstTy == NewDstTy)
1752 return true;
1753
1754 auto NewDst = MRI.createGenericVirtualRegister(NewDstTy);
1755
1756 Helper.Observer.changingInstr(MI);
1757 MI.getOperand(0).setReg(NewDst);
1758 Helper.Observer.changedInstr(MI);
1759
1760 MIB.setInsertPt(MIB.getMBB(), ++MIB.getInsertPt());
1761 MIB.buildExtOrTrunc(IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT,
1762 OldDst, NewDst);
1763
1764 return true;
1765 }
1766 case Intrinsic::aarch64_neon_uaddlp:
1767 case Intrinsic::aarch64_neon_saddlp: {
1768 unsigned Opc = IntrinsicID == Intrinsic::aarch64_neon_uaddlp
1769 ? AArch64::G_UADDLP
1770 : AArch64::G_SADDLP;
1771 MIB.buildInstr(Opc, {MI.getOperand(0)}, {MI.getOperand(2)});
1772 MI.eraseFromParent();
1773
1774 return true;
1775 }
1776 case Intrinsic::aarch64_neon_uaddlv:
1777 case Intrinsic::aarch64_neon_saddlv: {
1778 unsigned Opc = IntrinsicID == Intrinsic::aarch64_neon_uaddlv
1779 ? AArch64::G_UADDLV
1780 : AArch64::G_SADDLV;
1781 Register DstReg = MI.getOperand(0).getReg();
1782 Register SrcReg = MI.getOperand(2).getReg();
1783 LLT DstTy = MRI.getType(DstReg);
1784
1785 LLT MidTy, ExtTy;
1786 if (DstTy.isScalar() && DstTy.getScalarSizeInBits() <= 32) {
1787 MidTy = LLT::fixed_vector(4, 32);
1788 ExtTy = LLT::scalar(32);
1789 } else {
1790 MidTy = LLT::fixed_vector(2, 64);
1791 ExtTy = LLT::scalar(64);
1792 }
1793
1794 Register MidReg =
1795 MIB.buildInstr(Opc, {MidTy}, {SrcReg})->getOperand(0).getReg();
1796 Register ZeroReg =
1797 MIB.buildConstant(LLT::scalar(64), 0)->getOperand(0).getReg();
1798 Register ExtReg = MIB.buildInstr(AArch64::G_EXTRACT_VECTOR_ELT, {ExtTy},
1799 {MidReg, ZeroReg})
1800 .getReg(0);
1801
1802 if (DstTy.getScalarSizeInBits() < 32)
1803 MIB.buildTrunc(DstReg, ExtReg);
1804 else
1805 MIB.buildCopy(DstReg, ExtReg);
1806
1807 MI.eraseFromParent();
1808
1809 return true;
1810 }
1811 case Intrinsic::aarch64_neon_smax:
1812 return LowerBinOp(TargetOpcode::G_SMAX);
1813 case Intrinsic::aarch64_neon_smin:
1814 return LowerBinOp(TargetOpcode::G_SMIN);
1815 case Intrinsic::aarch64_neon_umax:
1816 return LowerBinOp(TargetOpcode::G_UMAX);
1817 case Intrinsic::aarch64_neon_umin:
1818 return LowerBinOp(TargetOpcode::G_UMIN);
1819 case Intrinsic::aarch64_neon_fmax:
1820 return LowerBinOp(TargetOpcode::G_FMAXIMUM);
1821 case Intrinsic::aarch64_neon_fmin:
1822 return LowerBinOp(TargetOpcode::G_FMINIMUM);
1823 case Intrinsic::aarch64_neon_fmaxnm:
1824 return LowerBinOp(TargetOpcode::G_FMAXNUM);
1825 case Intrinsic::aarch64_neon_fminnm:
1826 return LowerBinOp(TargetOpcode::G_FMINNUM);
1827 case Intrinsic::aarch64_neon_pmull:
1828 case Intrinsic::aarch64_neon_pmull64:
1829 return LowerBinOp(AArch64::G_PMULL);
1830 case Intrinsic::aarch64_neon_smull:
1831 return LowerBinOp(AArch64::G_SMULL);
1832 case Intrinsic::aarch64_neon_umull:
1833 return LowerBinOp(AArch64::G_UMULL);
1834 case Intrinsic::aarch64_neon_sabd:
1835 return LowerBinOp(TargetOpcode::G_ABDS);
1836 case Intrinsic::aarch64_neon_uabd:
1837 return LowerBinOp(TargetOpcode::G_ABDU);
1838 case Intrinsic::aarch64_neon_uhadd:
1839 return LowerBinOp(TargetOpcode::G_UAVGFLOOR);
1840 case Intrinsic::aarch64_neon_urhadd:
1841 return LowerBinOp(TargetOpcode::G_UAVGCEIL);
1842 case Intrinsic::aarch64_neon_shadd:
1843 return LowerBinOp(TargetOpcode::G_SAVGFLOOR);
1844 case Intrinsic::aarch64_neon_srhadd:
1845 return LowerBinOp(TargetOpcode::G_SAVGCEIL);
1846 case Intrinsic::aarch64_neon_abs: {
1847 // Lower the intrinsic to G_ABS.
1848 MIB.buildInstr(TargetOpcode::G_ABS, {MI.getOperand(0)}, {MI.getOperand(2)});
1849 MI.eraseFromParent();
1850 return true;
1851 }
1852 case Intrinsic::aarch64_neon_sqadd: {
1853 if (MRI.getType(MI.getOperand(0).getReg()).isVector())
1854 return LowerBinOp(TargetOpcode::G_SADDSAT);
1855 break;
1856 }
1857 case Intrinsic::aarch64_neon_sqsub: {
1858 if (MRI.getType(MI.getOperand(0).getReg()).isVector())
1859 return LowerBinOp(TargetOpcode::G_SSUBSAT);
1860 break;
1861 }
1862 case Intrinsic::aarch64_neon_uqadd: {
1863 if (MRI.getType(MI.getOperand(0).getReg()).isVector())
1864 return LowerBinOp(TargetOpcode::G_UADDSAT);
1865 break;
1866 }
1867 case Intrinsic::aarch64_neon_uqsub: {
1868 if (MRI.getType(MI.getOperand(0).getReg()).isVector())
1869 return LowerBinOp(TargetOpcode::G_USUBSAT);
1870 break;
1871 }
1872 case Intrinsic::aarch64_neon_udot:
1873 return LowerTriOp(AArch64::G_UDOT);
1874 case Intrinsic::aarch64_neon_sdot:
1875 return LowerTriOp(AArch64::G_SDOT);
1876 case Intrinsic::aarch64_neon_usdot:
1877 return LowerTriOp(AArch64::G_USDOT);
1878 case Intrinsic::aarch64_neon_sqxtn:
1879 return LowerUnaryOp(TargetOpcode::G_TRUNC_SSAT_S);
1880 case Intrinsic::aarch64_neon_sqxtun:
1881 return LowerUnaryOp(TargetOpcode::G_TRUNC_SSAT_U);
1882 case Intrinsic::aarch64_neon_uqxtn:
1883 return LowerUnaryOp(TargetOpcode::G_TRUNC_USAT_U);
1884
1885 case Intrinsic::vector_reverse:
1886 // TODO: Add support for vector_reverse
1887 return false;
1888 }
1889
1890 return true;
1891}
1892
1893bool AArch64LegalizerInfo::legalizeShlAshrLshr(
1895 GISelChangeObserver &Observer) const {
1896 assert(MI.getOpcode() == TargetOpcode::G_ASHR ||
1897 MI.getOpcode() == TargetOpcode::G_LSHR ||
1898 MI.getOpcode() == TargetOpcode::G_SHL);
1899 // If the shift amount is a G_CONSTANT, promote it to a 64 bit type so the
1900 // imported patterns can select it later. Either way, it will be legal.
1901 Register AmtReg = MI.getOperand(2).getReg();
1902 auto VRegAndVal = getIConstantVRegValWithLookThrough(AmtReg, MRI);
1903 if (!VRegAndVal)
1904 return true;
1905 // Check the shift amount is in range for an immediate form.
1906 int64_t Amount = VRegAndVal->Value.getSExtValue();
1907 if (Amount > 31)
1908 return true; // This will have to remain a register variant.
1909 auto ExtCst = MIRBuilder.buildConstant(LLT::scalar(64), Amount);
1910 Observer.changingInstr(MI);
1911 MI.getOperand(2).setReg(ExtCst.getReg(0));
1912 Observer.changedInstr(MI);
1913 return true;
1914}
1915
1918 Base = Root;
1919 Offset = 0;
1920
1921 Register NewBase;
1922 int64_t NewOffset;
1923 if (mi_match(Root, MRI, m_GPtrAdd(m_Reg(NewBase), m_ICst(NewOffset))) &&
1924 isShiftedInt<7, 3>(NewOffset)) {
1925 Base = NewBase;
1926 Offset = NewOffset;
1927 }
1928}
1929
1930// FIXME: This should be removed and replaced with the generic bitcast legalize
1931// action.
1932bool AArch64LegalizerInfo::legalizeLoadStore(
1934 GISelChangeObserver &Observer) const {
1935 assert(MI.getOpcode() == TargetOpcode::G_STORE ||
1936 MI.getOpcode() == TargetOpcode::G_LOAD);
1937 // Here we just try to handle vector loads/stores where our value type might
1938 // have pointer elements, which the SelectionDAG importer can't handle. To
1939 // allow the existing patterns for s64 to fire for p0, we just try to bitcast
1940 // the value to use s64 types.
1941
1942 // Custom legalization requires the instruction, if not deleted, must be fully
1943 // legalized. In order to allow further legalization of the inst, we create
1944 // a new instruction and erase the existing one.
1945
1946 Register ValReg = MI.getOperand(0).getReg();
1947 const LLT ValTy = MRI.getType(ValReg);
1948
1949 if (ValTy == LLT::scalar(128)) {
1950
1951 AtomicOrdering Ordering = (*MI.memoperands_begin())->getSuccessOrdering();
1952 bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD;
1953 bool IsLoadAcquire = IsLoad && Ordering == AtomicOrdering::Acquire;
1954 bool IsStoreRelease = !IsLoad && Ordering == AtomicOrdering::Release;
1955 bool IsRcpC3 =
1956 ST->hasLSE2() && ST->hasRCPC3() && (IsLoadAcquire || IsStoreRelease);
1957
1958 LLT s64 = LLT::scalar(64);
1959
1960 unsigned Opcode;
1961 if (IsRcpC3) {
1962 Opcode = IsLoad ? AArch64::LDIAPPX : AArch64::STILPX;
1963 } else {
1964 // For LSE2, loads/stores should have been converted to monotonic and had
1965 // a fence inserted after them.
1966 assert(Ordering == AtomicOrdering::Monotonic ||
1967 Ordering == AtomicOrdering::Unordered);
1968 assert(ST->hasLSE2() && "ldp/stp not single copy atomic without +lse2");
1969
1970 Opcode = IsLoad ? AArch64::LDPXi : AArch64::STPXi;
1971 }
1972
1973 MachineInstrBuilder NewI;
1974 if (IsLoad) {
1975 NewI = MIRBuilder.buildInstr(Opcode, {s64, s64}, {});
1976 MIRBuilder.buildMergeLikeInstr(
1977 ValReg, {NewI->getOperand(0), NewI->getOperand(1)});
1978 } else {
1979 auto Split = MIRBuilder.buildUnmerge(s64, MI.getOperand(0));
1980 NewI = MIRBuilder.buildInstr(
1981 Opcode, {}, {Split->getOperand(0), Split->getOperand(1)});
1982 }
1983
1984 if (IsRcpC3) {
1985 NewI.addUse(MI.getOperand(1).getReg());
1986 } else {
1987 Register Base;
1988 int Offset;
1989 matchLDPSTPAddrMode(MI.getOperand(1).getReg(), Base, Offset, MRI);
1990 NewI.addUse(Base);
1991 NewI.addImm(Offset / 8);
1992 }
1993
1994 NewI.cloneMemRefs(MI);
1995 constrainSelectedInstRegOperands(*NewI, *ST->getInstrInfo(),
1996 *MRI.getTargetRegisterInfo(),
1997 *ST->getRegBankInfo());
1998 MI.eraseFromParent();
1999 return true;
2000 }
2001
2002 if (!ValTy.isPointerVector() ||
2003 ValTy.getElementType().getAddressSpace() != 0) {
2004 LLVM_DEBUG(dbgs() << "Tried to do custom legalization on wrong load/store");
2005 return false;
2006 }
2007
2008 unsigned PtrSize = ValTy.getElementType().getSizeInBits();
2009 const LLT NewTy = LLT::vector(ValTy.getElementCount(), PtrSize);
2010 auto &MMO = **MI.memoperands_begin();
2011 MMO.setType(NewTy);
2012
2013 if (MI.getOpcode() == TargetOpcode::G_STORE) {
2014 auto Bitcast = MIRBuilder.buildBitcast(NewTy, ValReg);
2015 MIRBuilder.buildStore(Bitcast.getReg(0), MI.getOperand(1), MMO);
2016 } else {
2017 auto NewLoad = MIRBuilder.buildLoad(NewTy, MI.getOperand(1), MMO);
2018 MIRBuilder.buildBitcast(ValReg, NewLoad);
2019 }
2020 MI.eraseFromParent();
2021 return true;
2022}
2023
2024bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI,
2026 MachineIRBuilder &MIRBuilder) const {
2027 MachineFunction &MF = MIRBuilder.getMF();
2028 Align Alignment(MI.getOperand(2).getImm());
2029 Register Dst = MI.getOperand(0).getReg();
2030 Register ListPtr = MI.getOperand(1).getReg();
2031
2032 LLT PtrTy = MRI.getType(ListPtr);
2033 LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
2034
2035 const unsigned PtrSize = PtrTy.getSizeInBits() / 8;
2036 const Align PtrAlign = Align(PtrSize);
2037 auto List = MIRBuilder.buildLoad(
2038 PtrTy, ListPtr,
2039 *MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOLoad,
2040 PtrTy, PtrAlign));
2041
2042 MachineInstrBuilder DstPtr;
2043 if (Alignment > PtrAlign) {
2044 // Realign the list to the actual required alignment.
2045 auto AlignMinus1 =
2046 MIRBuilder.buildConstant(IntPtrTy, Alignment.value() - 1);
2047 auto ListTmp = MIRBuilder.buildPtrAdd(PtrTy, List, AlignMinus1.getReg(0));
2048 DstPtr = MIRBuilder.buildMaskLowPtrBits(PtrTy, ListTmp, Log2(Alignment));
2049 } else
2050 DstPtr = List;
2051
2052 LLT ValTy = MRI.getType(Dst);
2053 uint64_t ValSize = ValTy.getSizeInBits() / 8;
2054 MIRBuilder.buildLoad(
2055 Dst, DstPtr,
2056 *MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOLoad,
2057 ValTy, std::max(Alignment, PtrAlign)));
2058
2059 auto Size = MIRBuilder.buildConstant(IntPtrTy, alignTo(ValSize, PtrAlign));
2060
2061 auto NewList = MIRBuilder.buildPtrAdd(PtrTy, DstPtr, Size.getReg(0));
2062
2063 MIRBuilder.buildStore(NewList, ListPtr,
2064 *MF.getMachineMemOperand(MachinePointerInfo(),
2066 PtrTy, PtrAlign));
2067
2068 MI.eraseFromParent();
2069 return true;
2070}
2071
2072bool AArch64LegalizerInfo::legalizeBitfieldExtract(
2074 // Only legal if we can select immediate forms.
2075 // TODO: Lower this otherwise.
2076 return getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI) &&
2077 getIConstantVRegValWithLookThrough(MI.getOperand(3).getReg(), MRI);
2078}
2079
2080bool AArch64LegalizerInfo::legalizeCTPOP(MachineInstr &MI,
2082 LegalizerHelper &Helper) const {
2083 // When there is no integer popcount instruction (FEAT_CSSC isn't available),
2084 // it can be more efficiently lowered to the following sequence that uses
2085 // AdvSIMD registers/instructions as long as the copies to/from the AdvSIMD
2086 // registers are cheap.
2087 // FMOV D0, X0 // copy 64-bit int to vector, high bits zero'd
2088 // CNT V0.8B, V0.8B // 8xbyte pop-counts
2089 // ADDV B0, V0.8B // sum 8xbyte pop-counts
2090 // UMOV X0, V0.B[0] // copy byte result back to integer reg
2091 //
2092 // For 128 bit vector popcounts, we lower to the following sequence:
2093 // cnt.16b v0, v0 // v8s16, v4s32, v2s64
2094 // uaddlp.8h v0, v0 // v8s16, v4s32, v2s64
2095 // uaddlp.4s v0, v0 // v4s32, v2s64
2096 // uaddlp.2d v0, v0 // v2s64
2097 //
2098 // For 64 bit vector popcounts, we lower to the following sequence:
2099 // cnt.8b v0, v0 // v4s16, v2s32
2100 // uaddlp.4h v0, v0 // v4s16, v2s32
2101 // uaddlp.2s v0, v0 // v2s32
2102
2103 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2104 Register Dst = MI.getOperand(0).getReg();
2105 Register Val = MI.getOperand(1).getReg();
2106 LLT Ty = MRI.getType(Val);
2107 unsigned Size = Ty.getSizeInBits();
2108
2109 assert(Ty == MRI.getType(Dst) &&
2110 "Expected src and dst to have the same type!");
2111
2112 if (ST->hasCSSC() && Ty.isScalar() && Size == 128) {
2113 LLT s64 = LLT::scalar(64);
2114
2115 auto Split = MIRBuilder.buildUnmerge(s64, Val);
2116 auto CTPOP1 = MIRBuilder.buildCTPOP(s64, Split->getOperand(0));
2117 auto CTPOP2 = MIRBuilder.buildCTPOP(s64, Split->getOperand(1));
2118 auto Add = MIRBuilder.buildAdd(s64, CTPOP1, CTPOP2);
2119
2120 MIRBuilder.buildZExt(Dst, Add);
2121 MI.eraseFromParent();
2122 return true;
2123 }
2124
2125 if (!ST->hasNEON() ||
2126 MI.getMF()->getFunction().hasFnAttribute(Attribute::NoImplicitFloat)) {
2127 // Use generic lowering when custom lowering is not possible.
2128 return Ty.isScalar() && (Size == 32 || Size == 64) &&
2129 Helper.lowerBitCount(MI) ==
2131 }
2132
2133 // Pre-conditioning: widen Val up to the nearest vector type.
2134 // s32,s64,v4s16,v2s32 -> v8i8
2135 // v8s16,v4s32,v2s64 -> v16i8
2136 LLT VTy = Size == 128 ? LLT::fixed_vector(16, 8) : LLT::fixed_vector(8, 8);
2137 if (Ty.isScalar()) {
2138 assert((Size == 32 || Size == 64 || Size == 128) && "Expected only 32, 64, or 128 bit scalars!");
2139 if (Size == 32) {
2140 Val = MIRBuilder.buildZExt(LLT::scalar(64), Val).getReg(0);
2141 }
2142 }
2143 Val = MIRBuilder.buildBitcast(VTy, Val).getReg(0);
2144
2145 // Count bits in each byte-sized lane.
2146 auto CTPOP = MIRBuilder.buildCTPOP(VTy, Val);
2147
2148 // Sum across lanes.
2149
2150 if (ST->hasDotProd() && Ty.isVector() && Ty.getNumElements() >= 2 &&
2151 Ty.getScalarSizeInBits() != 16) {
2152 LLT Dt = Ty == LLT::fixed_vector(2, 64) ? LLT::fixed_vector(4, 32) : Ty;
2153 auto Zeros = MIRBuilder.buildConstant(Dt, 0);
2154 auto Ones = MIRBuilder.buildConstant(VTy, 1);
2155 MachineInstrBuilder Sum;
2156
2157 if (Ty == LLT::fixed_vector(2, 64)) {
2158 auto UDOT =
2159 MIRBuilder.buildInstr(AArch64::G_UDOT, {Dt}, {Zeros, Ones, CTPOP});
2160 Sum = MIRBuilder.buildInstr(AArch64::G_UADDLP, {Ty}, {UDOT});
2161 } else if (Ty == LLT::fixed_vector(4, 32)) {
2162 Sum = MIRBuilder.buildInstr(AArch64::G_UDOT, {Dt}, {Zeros, Ones, CTPOP});
2163 } else if (Ty == LLT::fixed_vector(2, 32)) {
2164 Sum = MIRBuilder.buildInstr(AArch64::G_UDOT, {Dt}, {Zeros, Ones, CTPOP});
2165 } else {
2166 llvm_unreachable("unexpected vector shape");
2167 }
2168
2169 Sum->getOperand(0).setReg(Dst);
2170 MI.eraseFromParent();
2171 return true;
2172 }
2173
2174 Register HSum = CTPOP.getReg(0);
2175 unsigned Opc;
2176 SmallVector<LLT> HAddTys;
2177 if (Ty.isScalar()) {
2178 Opc = Intrinsic::aarch64_neon_uaddlv;
2179 HAddTys.push_back(LLT::scalar(32));
2180 } else if (Ty == LLT::fixed_vector(8, 16)) {
2181 Opc = Intrinsic::aarch64_neon_uaddlp;
2182 HAddTys.push_back(LLT::fixed_vector(8, 16));
2183 } else if (Ty == LLT::fixed_vector(4, 32)) {
2184 Opc = Intrinsic::aarch64_neon_uaddlp;
2185 HAddTys.push_back(LLT::fixed_vector(8, 16));
2186 HAddTys.push_back(LLT::fixed_vector(4, 32));
2187 } else if (Ty == LLT::fixed_vector(2, 64)) {
2188 Opc = Intrinsic::aarch64_neon_uaddlp;
2189 HAddTys.push_back(LLT::fixed_vector(8, 16));
2190 HAddTys.push_back(LLT::fixed_vector(4, 32));
2191 HAddTys.push_back(LLT::fixed_vector(2, 64));
2192 } else if (Ty == LLT::fixed_vector(4, 16)) {
2193 Opc = Intrinsic::aarch64_neon_uaddlp;
2194 HAddTys.push_back(LLT::fixed_vector(4, 16));
2195 } else if (Ty == LLT::fixed_vector(2, 32)) {
2196 Opc = Intrinsic::aarch64_neon_uaddlp;
2197 HAddTys.push_back(LLT::fixed_vector(4, 16));
2198 HAddTys.push_back(LLT::fixed_vector(2, 32));
2199 } else
2200 llvm_unreachable("unexpected vector shape");
2202 for (LLT HTy : HAddTys) {
2203 UADD = MIRBuilder.buildIntrinsic(Opc, {HTy}).addUse(HSum);
2204 HSum = UADD.getReg(0);
2205 }
2206
2207 // Post-conditioning.
2208 if (Ty.isScalar() && (Size == 64 || Size == 128))
2209 MIRBuilder.buildZExt(Dst, UADD);
2210 else
2211 UADD->getOperand(0).setReg(Dst);
2212 MI.eraseFromParent();
2213 return true;
2214}
2215
2216bool AArch64LegalizerInfo::legalizeAtomicCmpxchg128(
2218 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2219 LLT s64 = LLT::scalar(64);
2220 auto Addr = MI.getOperand(1).getReg();
2221 auto DesiredI = MIRBuilder.buildUnmerge({s64, s64}, MI.getOperand(2));
2222 auto NewI = MIRBuilder.buildUnmerge({s64, s64}, MI.getOperand(3));
2223 auto DstLo = MRI.createGenericVirtualRegister(s64);
2224 auto DstHi = MRI.createGenericVirtualRegister(s64);
2225
2226 MachineInstrBuilder CAS;
2227 if (ST->hasLSE()) {
2228 // We have 128-bit CASP instructions taking XSeqPair registers, which are
2229 // s128. We need the merge/unmerge to bracket the expansion and pair up with
2230 // the rest of the MIR so we must reassemble the extracted registers into a
2231 // 128-bit known-regclass one with code like this:
2232 //
2233 // %in1 = REG_SEQUENCE Lo, Hi ; One for each input
2234 // %out = CASP %in1, ...
2235 // %OldLo = G_EXTRACT %out, 0
2236 // %OldHi = G_EXTRACT %out, 64
2237 auto Ordering = (*MI.memoperands_begin())->getMergedOrdering();
2238 unsigned Opcode;
2239 switch (Ordering) {
2241 Opcode = AArch64::CASPAX;
2242 break;
2244 Opcode = AArch64::CASPLX;
2245 break;
2248 Opcode = AArch64::CASPALX;
2249 break;
2250 default:
2251 Opcode = AArch64::CASPX;
2252 break;
2253 }
2254
2255 LLT s128 = LLT::scalar(128);
2256 auto CASDst = MRI.createGenericVirtualRegister(s128);
2257 auto CASDesired = MRI.createGenericVirtualRegister(s128);
2258 auto CASNew = MRI.createGenericVirtualRegister(s128);
2259 MIRBuilder.buildInstr(TargetOpcode::REG_SEQUENCE, {CASDesired}, {})
2260 .addUse(DesiredI->getOperand(0).getReg())
2261 .addImm(AArch64::sube64)
2262 .addUse(DesiredI->getOperand(1).getReg())
2263 .addImm(AArch64::subo64);
2264 MIRBuilder.buildInstr(TargetOpcode::REG_SEQUENCE, {CASNew}, {})
2265 .addUse(NewI->getOperand(0).getReg())
2266 .addImm(AArch64::sube64)
2267 .addUse(NewI->getOperand(1).getReg())
2268 .addImm(AArch64::subo64);
2269
2270 CAS = MIRBuilder.buildInstr(Opcode, {CASDst}, {CASDesired, CASNew, Addr});
2271
2272 MIRBuilder.buildExtract({DstLo}, {CASDst}, 0);
2273 MIRBuilder.buildExtract({DstHi}, {CASDst}, 64);
2274 } else {
2275 // The -O0 CMP_SWAP_128 is friendlier to generate code for because LDXP/STXP
2276 // can take arbitrary registers so it just has the normal GPR64 operands the
2277 // rest of AArch64 is expecting.
2278 auto Ordering = (*MI.memoperands_begin())->getMergedOrdering();
2279 unsigned Opcode;
2280 switch (Ordering) {
2282 Opcode = AArch64::CMP_SWAP_128_ACQUIRE;
2283 break;
2285 Opcode = AArch64::CMP_SWAP_128_RELEASE;
2286 break;
2289 Opcode = AArch64::CMP_SWAP_128;
2290 break;
2291 default:
2292 Opcode = AArch64::CMP_SWAP_128_MONOTONIC;
2293 break;
2294 }
2295
2296 auto Scratch = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2297 CAS = MIRBuilder.buildInstr(Opcode, {DstLo, DstHi, Scratch},
2298 {Addr, DesiredI->getOperand(0),
2299 DesiredI->getOperand(1), NewI->getOperand(0),
2300 NewI->getOperand(1)});
2301 }
2302
2303 CAS.cloneMemRefs(MI);
2304 constrainSelectedInstRegOperands(*CAS, *ST->getInstrInfo(),
2305 *MRI.getTargetRegisterInfo(),
2306 *ST->getRegBankInfo());
2307
2308 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {DstLo, DstHi});
2309 MI.eraseFromParent();
2310 return true;
2311}
2312
2313bool AArch64LegalizerInfo::legalizeCTTZ(MachineInstr &MI,
2314 LegalizerHelper &Helper) const {
2315 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2316 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
2317 LLT Ty = MRI.getType(MI.getOperand(1).getReg());
2318 auto BitReverse = MIRBuilder.buildBitReverse(Ty, MI.getOperand(1));
2319 MIRBuilder.buildCTLZ(MI.getOperand(0).getReg(), BitReverse);
2320 MI.eraseFromParent();
2321 return true;
2322}
2323
2324bool AArch64LegalizerInfo::legalizeMemOps(MachineInstr &MI,
2325 LegalizerHelper &Helper) const {
2326 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2327
2328 // Tagged version MOPSMemorySetTagged is legalised in legalizeIntrinsic
2329 if (MI.getOpcode() == TargetOpcode::G_MEMSET) {
2330 // Anyext the value being set to 64 bit (only the bottom 8 bits are read by
2331 // the instruction).
2332 auto &Value = MI.getOperand(1);
2333 Register ExtValueReg =
2334 MIRBuilder.buildAnyExt(LLT::scalar(64), Value).getReg(0);
2335 Value.setReg(ExtValueReg);
2336 return true;
2337 }
2338
2339 return false;
2340}
2341
2342bool AArch64LegalizerInfo::legalizeExtractVectorElt(
2344 const GExtractVectorElement *Element = cast<GExtractVectorElement>(&MI);
2345 auto VRegAndVal =
2347 if (VRegAndVal)
2348 return true;
2349 LLT VecTy = MRI.getType(Element->getVectorReg());
2350 if (VecTy.isScalableVector())
2351 return true;
2352 return Helper.lowerExtractInsertVectorElt(MI) !=
2354}
2355
2356bool AArch64LegalizerInfo::legalizeDynStackAlloc(
2357 MachineInstr &MI, LegalizerHelper &Helper) const {
2358 MachineFunction &MF = *MI.getParent()->getParent();
2359 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2360 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
2361
2362 // If stack probing is not enabled for this function, use the default
2363 // lowering.
2364 if (!MF.getFunction().hasFnAttribute("probe-stack") ||
2365 MF.getFunction().getFnAttribute("probe-stack").getValueAsString() !=
2366 "inline-asm") {
2367 Helper.lowerDynStackAlloc(MI);
2368 return true;
2369 }
2370
2371 Register Dst = MI.getOperand(0).getReg();
2372 Register AllocSize = MI.getOperand(1).getReg();
2373 Align Alignment = assumeAligned(MI.getOperand(2).getImm());
2374
2375 assert(MRI.getType(Dst) == LLT::pointer(0, 64) &&
2376 "Unexpected type for dynamic alloca");
2377 assert(MRI.getType(AllocSize) == LLT::scalar(64) &&
2378 "Unexpected type for dynamic alloca");
2379
2380 LLT PtrTy = MRI.getType(Dst);
2381 Register SPReg =
2383 Register SPTmp =
2384 Helper.getDynStackAllocTargetPtr(SPReg, AllocSize, Alignment, PtrTy);
2385 auto NewMI =
2386 MIRBuilder.buildInstr(AArch64::PROBED_STACKALLOC_DYN, {}, {SPTmp});
2387 MRI.setRegClass(NewMI.getReg(0), &AArch64::GPR64commonRegClass);
2388 MIRBuilder.setInsertPt(*NewMI->getParent(), NewMI);
2389 MIRBuilder.buildCopy(Dst, SPTmp);
2390
2391 MI.eraseFromParent();
2392 return true;
2393}
2394
2395bool AArch64LegalizerInfo::legalizePrefetch(MachineInstr &MI,
2396 LegalizerHelper &Helper) const {
2397 MachineIRBuilder &MIB = Helper.MIRBuilder;
2398 auto &AddrVal = MI.getOperand(0);
2399
2400 int64_t IsWrite = MI.getOperand(1).getImm();
2401 int64_t Locality = MI.getOperand(2).getImm();
2402 int64_t IsData = MI.getOperand(3).getImm();
2403
2404 bool IsStream = Locality == 0;
2405 if (Locality != 0) {
2406 assert(Locality <= 3 && "Prefetch locality out-of-range");
2407 // The locality degree is the opposite of the cache speed.
2408 // Put the number the other way around.
2409 // The encoding starts at 0 for level 1
2410 Locality = 3 - Locality;
2411 }
2412
2413 unsigned PrfOp = (IsWrite << 4) | (!IsData << 3) | (Locality << 1) | IsStream;
2414
2415 MIB.buildInstr(AArch64::G_AARCH64_PREFETCH).addImm(PrfOp).add(AddrVal);
2416 MI.eraseFromParent();
2417 return true;
2418}
unsigned const MachineRegisterInfo * MRI
static void matchLDPSTPAddrMode(Register Root, Register &Base, int &Offset, MachineRegisterInfo &MRI)
This file declares the targeting of the Machinelegalizer class for AArch64.
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static Error unsupported(const char *Str, const Triple &T)
Definition MachO.cpp:71
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
IRTranslator LLVM IR MI
Interface for Targets to specify which operations they can successfully select and how the others sho...
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineIRBuilder class.
Promote Memory to Register
Definition Mem2Reg.cpp:110
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
ppc ctr loops verify
if(PassOpts->AAPipeline)
static constexpr MCPhysReg SPReg
This file contains some templates that are useful if you are working with the STL at all.
#define LLVM_DEBUG(...)
Definition Debug.h:114
bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI, LostDebugLocObserver &LocObserver) const override
Called for instructions with the Custom LegalizationAction.
bool legalizeIntrinsic(LegalizerHelper &Helper, MachineInstr &MI) const override
AArch64LegalizerInfo(const AArch64Subtarget &ST)
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1012
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1666
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1563
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ ICMP_NE
not equal
Definition InstrTypes.h:698
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition Function.cpp:765
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:730
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
constexpr bool isScalableVector() const
Returns true if the LLT is a scalable vector.
constexpr unsigned getScalarSizeInBits() const
constexpr bool isScalar() const
static constexpr LLT scalable_vector(unsigned MinNumElements, unsigned ScalarSizeInBits)
Get a low-level scalable vector of some number of elements and element width.
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
constexpr bool isPointerVector() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
constexpr ElementCount getElementCount() const
constexpr LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
constexpr LLT changeElementCount(ElementCount EC) const
Return a vector or scalar with the same element type and the new element count.
LLVM_ABI void computeTables()
Compute any ancillary tables needed to quickly decide how an operation should be handled.
LegalizeRuleSet & minScalar(unsigned TypeIdx, const LLT Ty)
Ensure the scalar is at least as wide as Ty.
LegalizeRuleSet & widenScalarOrEltToNextPow2OrMinSize(unsigned TypeIdx, unsigned MinSize=0)
Widen the scalar or vector element type to the next power of two that is at least MinSize.
LegalizeRuleSet & legalFor(std::initializer_list< LLT > Types)
The instruction is legal when type index 0 is any type in the given list.
LegalizeRuleSet & maxScalarEltSameAsIf(LegalityPredicate Predicate, unsigned TypeIdx, unsigned SmallTypeIdx)
Conditionally narrow the scalar or elt to match the size of another.
LegalizeRuleSet & unsupported()
The instruction is unsupported.
LegalizeRuleSet & scalarSameSizeAs(unsigned TypeIdx, unsigned SameSizeIdx)
Change the type TypeIdx to have the same scalar size as type SameSizeIdx.
LegalizeRuleSet & bitcastIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
The specified type index is coerced if predicate is true.
LegalizeRuleSet & libcallFor(std::initializer_list< LLT > Types)
LegalizeRuleSet & maxScalar(unsigned TypeIdx, const LLT Ty)
Ensure the scalar is at most as wide as Ty.
LegalizeRuleSet & minScalarOrElt(unsigned TypeIdx, const LLT Ty)
Ensure the scalar or element is at least as wide as Ty.
LegalizeRuleSet & clampMaxNumElements(unsigned TypeIdx, const LLT EltTy, unsigned MaxElements)
Limit the number of elements in EltTy vectors to at most MaxElements.
LegalizeRuleSet & clampMinNumElements(unsigned TypeIdx, const LLT EltTy, unsigned MinElements)
Limit the number of elements in EltTy vectors to at least MinElements.
LegalizeRuleSet & widenVectorEltsToVectorMinSize(unsigned TypeIdx, unsigned VectorSize)
Ensure the vector size is at least as wide as VectorSize by promoting the element.
LegalizeRuleSet & lowerIfMemSizeNotPow2()
Lower a memory operation if the memory size, rounded to bytes, is not a power of 2.
LegalizeRuleSet & minScalarEltSameAsIf(LegalityPredicate Predicate, unsigned TypeIdx, unsigned LargeTypeIdx)
Conditionally widen the scalar or elt to match the size of another.
LegalizeRuleSet & customForCartesianProduct(std::initializer_list< LLT > Types)
LegalizeRuleSet & lowerIfMemSizeNotByteSizePow2()
Lower a memory operation if the memory access size is not a round power of 2 byte size.
LegalizeRuleSet & moreElementsToNextPow2(unsigned TypeIdx)
Add more elements to the vector to reach the next power of two.
LegalizeRuleSet & narrowScalarIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Narrow the scalar to the one selected by the mutation if the predicate is true.
LegalizeRuleSet & lower()
The instruction is lowered.
LegalizeRuleSet & moreElementsIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Add more elements to reach the type selected by the mutation if the predicate is true.
LegalizeRuleSet & lowerFor(std::initializer_list< LLT > Types)
The instruction is lowered when type index 0 is any type in the given list.
LegalizeRuleSet & scalarizeIf(LegalityPredicate Predicate, unsigned TypeIdx)
LegalizeRuleSet & lowerIf(LegalityPredicate Predicate)
The instruction is lowered if predicate is true.
LegalizeRuleSet & clampScalar(unsigned TypeIdx, const LLT MinTy, const LLT MaxTy)
Limit the range of scalar sizes to MinTy and MaxTy.
LegalizeRuleSet & custom()
Unconditionally custom lower.
LegalizeRuleSet & minScalarSameAs(unsigned TypeIdx, unsigned LargeTypeIdx)
Widen the scalar to match the size of another.
LegalizeRuleSet & unsupportedIf(LegalityPredicate Predicate)
LegalizeRuleSet & minScalarOrEltIf(LegalityPredicate Predicate, unsigned TypeIdx, const LLT Ty)
Ensure the scalar or element is at least as wide as Ty.
LegalizeRuleSet & widenScalarIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Widen the scalar to the one selected by the mutation if the predicate is true.
LegalizeRuleSet & alwaysLegal()
LegalizeRuleSet & clampNumElements(unsigned TypeIdx, const LLT MinTy, const LLT MaxTy)
Limit the number of elements for the given vectors to at least MinTy's number of elements and at most...
LegalizeRuleSet & maxScalarIf(LegalityPredicate Predicate, unsigned TypeIdx, const LLT Ty)
Conditionally limit the maximum size of the scalar.
LegalizeRuleSet & customIf(LegalityPredicate Predicate)
LegalizeRuleSet & widenScalarToNextPow2(unsigned TypeIdx, unsigned MinSize=0)
Widen the scalar to the next power of two that is at least MinSize.
LegalizeRuleSet & scalarize(unsigned TypeIdx)
LegalizeRuleSet & legalForCartesianProduct(std::initializer_list< LLT > Types)
The instruction is legal when type indexes 0 and 1 are both in the given list.
LegalizeRuleSet & legalForTypesWithMemDesc(std::initializer_list< LegalityPredicates::TypePairAndMemDesc > TypesAndMemDesc)
The instruction is legal when type indexes 0 and 1 along with the memory size and minimum alignment i...
unsigned immIdx(unsigned ImmIdx)
LegalizeRuleSet & widenScalarOrEltToNextPow2(unsigned TypeIdx, unsigned MinSize=0)
Widen the scalar or vector element type to the next power of two that is at least MinSize.
LegalizeRuleSet & legalIf(LegalityPredicate Predicate)
The instruction is legal if predicate is true.
LLVM_ABI LegalizeResult lowerDynStackAlloc(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerBitCount(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerExtractInsertVectorElt(MachineInstr &MI)
Lower a vector extract or insert by writing the vector to a stack temporary and reloading the element...
LLVM_ABI LegalizeResult lowerAbsToCNeg(MachineInstr &MI)
const TargetLowering & getTargetLowering() const
LLVM_ABI LegalizeResult lowerFunnelShiftAsShifts(MachineInstr &MI)
LLVM_ABI MachineInstrBuilder createStackStoreLoad(const DstOp &Res, const SrcOp &Val)
Create a store of Val to a stack temporary and return a load as the same type as Res.
@ Legalized
Instruction has been legalized and the MachineFunction changed.
@ UnableToLegalize
Some kind of error has occurred and we could not legalize this instruction.
GISelChangeObserver & Observer
To keep track of changes made by the LegalizerHelper.
LLVM_ABI Register getDynStackAllocTargetPtr(Register SPReg, Register AllocSize, Align Alignment, LLT PtrTy)
MachineIRBuilder & MIRBuilder
Expose MIRBuilder so clients can set their own RecordInsertInstruction functions.
LegalizeRuleSet & getActionDefinitionsBuilder(unsigned Opcode)
Get the action definition builder for the given opcode.
const LegacyLegalizerInfo & getLegacyLegalizerInfo() const
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Helper class to build MachineInstr.
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
MachineInstrBuilder buildAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ADD Op0, Op1.
MachineInstrBuilder buildNot(const DstOp &Dst, const SrcOp &Src0)
Build and insert a bitwise not, NegOne = G_CONSTANT -1 Res = G_OR Op0, NegOne.
MachineInstrBuilder buildUnmerge(ArrayRef< LLT > Res, const SrcOp &Op)
Build and insert Res0, ... = G_UNMERGE_VALUES Op.
MachineInstrBuilder buildExtract(const DstOp &Res, const SrcOp &Src, uint64_t Index)
Build and insert Res0, ... = G_EXTRACT Src, Idx0.
MachineInstrBuilder buildICmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert a Res = G_ICMP Pred, Op0, Op1.
MachineBasicBlock::iterator getInsertPt()
Current insertion point for new instructions.
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ZEXT Op.
MachineInstrBuilder buildIntrinsic(Intrinsic::ID ID, ArrayRef< Register > Res, bool HasSideEffects, bool isConvergent)
Build and insert a G_INTRINSIC instruction.
MachineInstrBuilder buildCTLZ(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ Op0, Src0.
MachineInstrBuilder buildMergeLikeInstr(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ... or Res = G_BUILD_VECTOR Op0, ... or Res = G_CONCAT_VEC...
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_PTR_ADD Op0, Op1.
MachineInstrBuilder buildBitReverse(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITREVERSE Src.
MachineInstrBuilder buildStore(const SrcOp &Val, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert G_STORE Val, Addr, MMO.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildCTPOP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTPOP Op0, Src0.
MachineFunction & getMF()
Getter for the function we currently build.
MachineInstrBuilder buildExtOrTrunc(unsigned ExtOpc, const DstOp &Res, const SrcOp &Op)
Build and insert Res = ExtOpc, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes of...
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_TRUNC Op.
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
MachineInstrBuilder buildAnyExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ANYEXT Op0.
MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITCAST Src.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
MachineInstrBuilder buildMaskLowPtrBits(const DstOp &Res, const SrcOp &Op0, uint32_t NumBits)
Build and insert Res = G_PTRMASK Op0, G_CONSTANT (1 << NumBits) - 1.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
Representation of each machine instruction.
const MachineOperand & getOperand(unsigned i) const
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
Primary interface to the complete machine description for the target machine.
CodeModel::Model getCodeModel() const
Returns the code model.
Target - Wrapper for Target specific information.
LLVM Value Representation.
Definition Value.h:75
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition TypeSize.h:252
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_PREL
MO_PREL - Indicates that the bits of the symbol operand represented by MO_G0 etc are PC relative.
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_TAGGED
MO_TAGGED - With MO_PAGE, indicates that the page includes a memory tag in bits 56-63.
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
LLVM_ABI LegalityPredicate scalarOrEltWiderThan(unsigned TypeIdx, unsigned Size)
True iff the specified type index is a scalar or a vector with an element type that's wider than the ...
LLVM_ABI LegalityPredicate isPointerVector(unsigned TypeIdx)
True iff the specified type index is a vector of pointers (with any address space).
LLVM_ABI LegalityPredicate typeInSet(unsigned TypeIdx, std::initializer_list< LLT > TypesInit)
True iff the given type index is one of the specified types.
LLVM_ABI LegalityPredicate smallerThan(unsigned TypeIdx0, unsigned TypeIdx1)
True iff the first type index has a smaller total bit size than second type index.
LLVM_ABI LegalityPredicate atomicOrderingAtLeastOrStrongerThan(unsigned MMOIdx, AtomicOrdering Ordering)
True iff the specified MMO index has at an atomic ordering of at Ordering or stronger.
Predicate any(Predicate P0, Predicate P1)
True iff P0 or P1 are true.
LLVM_ABI LegalityPredicate isVector(unsigned TypeIdx)
True iff the specified type index is a vector.
Predicate all(Predicate P0, Predicate P1)
True iff P0 and P1 are true.
LLVM_ABI LegalityPredicate typeIs(unsigned TypeIdx, LLT TypesInit)
True iff the given type index is the specified type.
LLVM_ABI LegalityPredicate scalarWiderThan(unsigned TypeIdx, unsigned Size)
True iff the specified type index is a scalar that's wider than the given size.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
LLVM_ABI LegalizeMutation moreElementsToNextPow2(unsigned TypeIdx, unsigned Min=0)
Add more elements to the type for the given type index to the next power of.
LLVM_ABI LegalizeMutation scalarize(unsigned TypeIdx)
Break up the vector type for the given type index into the element type.
LLVM_ABI LegalizeMutation changeElementTo(unsigned TypeIdx, unsigned FromTypeIdx)
Keep the same scalar or element type as the given type index.
LLVM_ABI LegalizeMutation widenScalarOrEltToNextPow2(unsigned TypeIdx, unsigned Min=0)
Widen the scalar type or vector element type for the given type index to the next power of 2.
LLVM_ABI LegalizeMutation changeTo(unsigned TypeIdx, LLT Ty)
Select this specific type for the given type index.
LLVM_ABI LegalizeMutation changeElementSizeTo(unsigned TypeIdx, unsigned FromTypeIdx)
Change the scalar size or element size to have the same scalar size as type index FromIndex.
operand_type_match m_Reg()
ConstantMatch< APInt > m_ICst(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
Invariant opcodes: All instruction sets have these as their low opcodes.
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI bool constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
Definition Utils.cpp:155
std::function< bool(const LegalityQuery &)> LegalityPredicate
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Add
Sum of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
DWARFExpression::Operation Op
constexpr bool isShiftedInt(int64_t x)
Checks if a signed integer is an N bit number shifted left by S.
Definition MathExtras.h:182
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition Utils.cpp:433
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1897
Align assumeAligned(uint64_t Value)
Treats the value 0 as a 1, so Align is always at least 1.
Definition Alignment.h:100
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition Alignment.h:197
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
ArrayRef< MemDesc > MMODescrs
Operations which require memory can use this to place requirements on the memory type for each MMO.
ArrayRef< LLT > Types
This class contains a discriminated union of information about pointers in memory operands,...