LLVM 22.0.0git
AArch64LegalizerInfo.cpp
Go to the documentation of this file.
1//===- AArch64LegalizerInfo.cpp ----------------------------------*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the Machinelegalizer class for
10/// AArch64.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
15#include "AArch64Subtarget.h"
16#include "llvm/ADT/STLExtras.h"
27#include "llvm/IR/Intrinsics.h"
28#include "llvm/IR/IntrinsicsAArch64.h"
29#include "llvm/IR/Type.h"
31#include <initializer_list>
32
33#define DEBUG_TYPE "aarch64-legalinfo"
34
35using namespace llvm;
36using namespace LegalizeActions;
37using namespace LegalizeMutations;
38using namespace LegalityPredicates;
39using namespace MIPatternMatch;
40
42 : ST(&ST) {
43 using namespace TargetOpcode;
44 const LLT p0 = LLT::pointer(0, 64);
45 const LLT s8 = LLT::scalar(8);
46 const LLT s16 = LLT::scalar(16);
47 const LLT s32 = LLT::scalar(32);
48 const LLT s64 = LLT::scalar(64);
49 const LLT s128 = LLT::scalar(128);
50 const LLT v16s8 = LLT::fixed_vector(16, 8);
51 const LLT v8s8 = LLT::fixed_vector(8, 8);
52 const LLT v4s8 = LLT::fixed_vector(4, 8);
53 const LLT v2s8 = LLT::fixed_vector(2, 8);
54 const LLT v8s16 = LLT::fixed_vector(8, 16);
55 const LLT v4s16 = LLT::fixed_vector(4, 16);
56 const LLT v2s16 = LLT::fixed_vector(2, 16);
57 const LLT v2s32 = LLT::fixed_vector(2, 32);
58 const LLT v4s32 = LLT::fixed_vector(4, 32);
59 const LLT v2s64 = LLT::fixed_vector(2, 64);
60 const LLT v2p0 = LLT::fixed_vector(2, p0);
61
62 const LLT nxv16s8 = LLT::scalable_vector(16, s8);
63 const LLT nxv8s16 = LLT::scalable_vector(8, s16);
64 const LLT nxv4s32 = LLT::scalable_vector(4, s32);
65 const LLT nxv2s64 = LLT::scalable_vector(2, s64);
66
67 std::initializer_list<LLT> PackedVectorAllTypeList = {/* Begin 128bit types */
68 v16s8, v8s16, v4s32,
69 v2s64, v2p0,
70 /* End 128bit types */
71 /* Begin 64bit types */
72 v8s8, v4s16, v2s32};
73 std::initializer_list<LLT> ScalarAndPtrTypesList = {s8, s16, s32, s64, p0};
74 SmallVector<LLT, 8> PackedVectorAllTypesVec(PackedVectorAllTypeList);
75 SmallVector<LLT, 8> ScalarAndPtrTypesVec(ScalarAndPtrTypesList);
76
77 const TargetMachine &TM = ST.getTargetLowering()->getTargetMachine();
78
79 // FIXME: support subtargets which have neon/fp-armv8 disabled.
80 if (!ST.hasNEON() || !ST.hasFPARMv8()) {
82 return;
83 }
84
85 // Some instructions only support s16 if the subtarget has full 16-bit FP
86 // support.
87 const bool HasFP16 = ST.hasFullFP16();
88 const LLT &MinFPScalar = HasFP16 ? s16 : s32;
89
90 const bool HasCSSC = ST.hasCSSC();
91 const bool HasRCPC3 = ST.hasRCPC3();
92 const bool HasSVE = ST.hasSVE();
93
95 {G_IMPLICIT_DEF, G_FREEZE, G_CONSTANT_FOLD_BARRIER})
96 .legalFor({p0, s8, s16, s32, s64})
97 .legalFor({v2s8, v4s8, v8s8, v16s8, v2s16, v4s16, v8s16, v2s32, v4s32,
98 v2s64, v2p0})
99 .widenScalarToNextPow2(0)
100 .clampScalar(0, s8, s64)
103 .clampNumElements(0, v8s8, v16s8)
104 .clampNumElements(0, v4s16, v8s16)
105 .clampNumElements(0, v2s32, v4s32)
106 .clampMaxNumElements(0, s64, 2)
107 .clampMaxNumElements(0, p0, 2)
109
111 .legalFor({p0, s16, s32, s64})
112 .legalFor(PackedVectorAllTypeList)
116 .clampScalar(0, s16, s64)
117 .clampNumElements(0, v8s8, v16s8)
118 .clampNumElements(0, v4s16, v8s16)
119 .clampNumElements(0, v2s32, v4s32)
120 .clampMaxNumElements(0, s64, 2)
121 .clampMaxNumElements(0, p0, 2);
122
124 .legalIf(all(typeInSet(0, {s32, s64, p0}), typeInSet(1, {s8, s16, s32}),
125 smallerThan(1, 0)))
126 .widenScalarToNextPow2(0)
127 .clampScalar(0, s32, s64)
129 .minScalar(1, s8)
130 .maxScalarIf(typeInSet(0, {s32}), 1, s16)
131 .maxScalarIf(typeInSet(0, {s64, p0}), 1, s32);
132
134 .legalIf(all(typeInSet(0, {s16, s32, s64, p0}),
135 typeInSet(1, {s32, s64, s128, p0}), smallerThan(0, 1)))
136 .widenScalarToNextPow2(1)
137 .clampScalar(1, s32, s128)
139 .minScalar(0, s16)
140 .maxScalarIf(typeInSet(1, {s32}), 0, s16)
141 .maxScalarIf(typeInSet(1, {s64, p0}), 0, s32)
142 .maxScalarIf(typeInSet(1, {s128}), 0, s64);
143
144 getActionDefinitionsBuilder({G_ADD, G_SUB, G_AND, G_OR, G_XOR})
145 .legalFor({s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32, v2s64})
146 .legalFor(HasSVE, {nxv16s8, nxv8s16, nxv4s32, nxv2s64})
147 .widenScalarToNextPow2(0)
148 .clampScalar(0, s32, s64)
149 .clampMaxNumElements(0, s8, 16)
150 .clampMaxNumElements(0, s16, 8)
151 .clampNumElements(0, v2s32, v4s32)
152 .clampNumElements(0, v2s64, v2s64)
154 [=](const LegalityQuery &Query) {
155 return Query.Types[0].getNumElements() <= 2;
156 },
157 0, s32)
158 .minScalarOrEltIf(
159 [=](const LegalityQuery &Query) {
160 return Query.Types[0].getNumElements() <= 4;
161 },
162 0, s16)
163 .minScalarOrEltIf(
164 [=](const LegalityQuery &Query) {
165 return Query.Types[0].getNumElements() <= 16;
166 },
167 0, s8)
168 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
170
172 .legalFor({s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32, v2s64})
173 .widenScalarToNextPow2(0)
174 .clampScalar(0, s32, s64)
175 .clampMaxNumElements(0, s8, 16)
176 .clampMaxNumElements(0, s16, 8)
177 .clampNumElements(0, v2s32, v4s32)
178 .clampNumElements(0, v2s64, v2s64)
180 [=](const LegalityQuery &Query) {
181 return Query.Types[0].getNumElements() <= 2;
182 },
183 0, s32)
184 .minScalarOrEltIf(
185 [=](const LegalityQuery &Query) {
186 return Query.Types[0].getNumElements() <= 4;
187 },
188 0, s16)
189 .minScalarOrEltIf(
190 [=](const LegalityQuery &Query) {
191 return Query.Types[0].getNumElements() <= 16;
192 },
193 0, s8)
194 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
196
197 getActionDefinitionsBuilder({G_SHL, G_ASHR, G_LSHR})
198 .customIf([=](const LegalityQuery &Query) {
199 const auto &SrcTy = Query.Types[0];
200 const auto &AmtTy = Query.Types[1];
201 return !SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
202 AmtTy.getSizeInBits() == 32;
203 })
204 .legalFor({
205 {s32, s32},
206 {s32, s64},
207 {s64, s64},
208 {v8s8, v8s8},
209 {v16s8, v16s8},
210 {v4s16, v4s16},
211 {v8s16, v8s16},
212 {v2s32, v2s32},
213 {v4s32, v4s32},
214 {v2s64, v2s64},
215 })
216 .widenScalarToNextPow2(0)
217 .clampScalar(1, s32, s64)
218 .clampScalar(0, s32, s64)
219 .clampNumElements(0, v8s8, v16s8)
220 .clampNumElements(0, v4s16, v8s16)
221 .clampNumElements(0, v2s32, v4s32)
222 .clampNumElements(0, v2s64, v2s64)
224 .minScalarSameAs(1, 0)
228
230 .legalFor({{p0, s64}, {v2p0, v2s64}})
231 .clampScalarOrElt(1, s64, s64)
232 .clampNumElements(0, v2p0, v2p0);
233
234 getActionDefinitionsBuilder(G_PTRMASK).legalFor({{p0, s64}});
235
236 getActionDefinitionsBuilder({G_SDIV, G_UDIV})
237 .legalFor({s32, s64})
238 .libcallFor({s128})
239 .clampScalar(0, s32, s64)
241 .scalarize(0);
242
243 getActionDefinitionsBuilder({G_SREM, G_UREM, G_SDIVREM, G_UDIVREM})
244 .lowerFor({s8, s16, s32, s64, v2s32, v4s32, v2s64})
245 .libcallFor({s128})
247 .minScalarOrElt(0, s32)
248 .clampNumElements(0, v2s32, v4s32)
249 .clampNumElements(0, v2s64, v2s64)
250 .scalarize(0);
251
252 getActionDefinitionsBuilder({G_SMULO, G_UMULO})
253 .widenScalarToNextPow2(0, /*Min = */ 32)
254 .clampScalar(0, s32, s64)
255 .lower();
256
257 getActionDefinitionsBuilder({G_SMULH, G_UMULH})
258 .legalFor({s64, v16s8, v8s16, v4s32})
259 .lower();
260
261 getActionDefinitionsBuilder({G_SMIN, G_SMAX, G_UMIN, G_UMAX})
262 .legalFor({v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
263 .legalFor(HasCSSC, {s32, s64})
264 .minScalar(HasCSSC, 0, s32)
265 .clampNumElements(0, v8s8, v16s8)
266 .clampNumElements(0, v4s16, v8s16)
267 .clampNumElements(0, v2s32, v4s32)
268 .lower();
269
270 // FIXME: Legal vector types are only legal with NEON.
272 .legalFor(HasCSSC, {s32, s64})
273 .legalFor(PackedVectorAllTypeList)
274 .customIf([=](const LegalityQuery &Q) {
275 // TODO: Fix suboptimal codegen for 128+ bit types.
276 LLT SrcTy = Q.Types[0];
277 return SrcTy.isScalar() && SrcTy.getSizeInBits() < 128;
278 })
279 .widenScalarIf(
280 [=](const LegalityQuery &Query) { return Query.Types[0] == v4s8; },
281 [=](const LegalityQuery &Query) { return std::make_pair(0, v4s16); })
282 .widenScalarIf(
283 [=](const LegalityQuery &Query) { return Query.Types[0] == v2s16; },
284 [=](const LegalityQuery &Query) { return std::make_pair(0, v2s32); })
285 .clampNumElements(0, v8s8, v16s8)
286 .clampNumElements(0, v4s16, v8s16)
287 .clampNumElements(0, v2s32, v4s32)
288 .clampNumElements(0, v2s64, v2s64)
290 .lower();
291
292 getActionDefinitionsBuilder({G_ABDS, G_ABDU})
293 .legalFor({v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
294 .lower();
295
297 {G_SADDE, G_SSUBE, G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_UADDO, G_USUBO})
298 .legalFor({{s32, s32}, {s64, s32}})
299 .clampScalar(0, s32, s64)
300 .clampScalar(1, s32, s64)
302
303 getActionDefinitionsBuilder({G_FSHL, G_FSHR})
304 .customFor({{s32, s32}, {s32, s64}, {s64, s64}})
305 .lower();
306
308 .legalFor({{s32, s64}, {s64, s64}})
309 .customIf([=](const LegalityQuery &Q) {
310 return Q.Types[0].isScalar() && Q.Types[1].getScalarSizeInBits() < 64;
311 })
312 .lower();
314
315 getActionDefinitionsBuilder({G_SBFX, G_UBFX})
316 .customFor({{s32, s32}, {s64, s64}});
317
318 auto always = [=](const LegalityQuery &Q) { return true; };
320 .legalFor(HasCSSC, {{s32, s32}, {s64, s64}})
321 .legalFor({{v8s8, v8s8}, {v16s8, v16s8}})
322 .customFor(!HasCSSC, {{s32, s32}, {s64, s64}})
323 .customFor({{s128, s128},
324 {v4s16, v4s16},
325 {v8s16, v8s16},
326 {v2s32, v2s32},
327 {v4s32, v4s32},
328 {v2s64, v2s64}})
329 .clampScalar(0, s32, s128)
331 .minScalarEltSameAsIf(always, 1, 0)
332 .maxScalarEltSameAsIf(always, 1, 0)
333 .clampNumElements(0, v8s8, v16s8)
334 .clampNumElements(0, v4s16, v8s16)
335 .clampNumElements(0, v2s32, v4s32)
336 .clampNumElements(0, v2s64, v2s64)
339
341 .legalFor({{s32, s32},
342 {s64, s64},
343 {v8s8, v8s8},
344 {v16s8, v16s8},
345 {v4s16, v4s16},
346 {v8s16, v8s16},
347 {v2s32, v2s32},
348 {v4s32, v4s32}})
349 .widenScalarToNextPow2(1, /*Min=*/32)
350 .clampScalar(1, s32, s64)
351 .clampNumElements(0, v8s8, v16s8)
352 .clampNumElements(0, v4s16, v8s16)
353 .clampNumElements(0, v2s32, v4s32)
356 .scalarSameSizeAs(0, 1);
357
358 getActionDefinitionsBuilder(G_CTLZ_ZERO_UNDEF).lower();
359
361 .lowerIf(isVector(0))
362 .widenScalarToNextPow2(1, /*Min=*/32)
363 .clampScalar(1, s32, s64)
364 .scalarSameSizeAs(0, 1)
365 .legalFor(HasCSSC, {s32, s64})
366 .customFor(!HasCSSC, {s32, s64});
367
368 getActionDefinitionsBuilder(G_CTTZ_ZERO_UNDEF).lower();
369
370 getActionDefinitionsBuilder(G_BITREVERSE)
371 .legalFor({s32, s64, v8s8, v16s8})
372 .widenScalarToNextPow2(0, /*Min = */ 32)
374 .clampScalar(0, s32, s64)
375 .clampNumElements(0, v8s8, v16s8)
376 .clampNumElements(0, v4s16, v8s16)
377 .clampNumElements(0, v2s32, v4s32)
378 .clampNumElements(0, v2s64, v2s64)
381 .lower();
382
384 .legalFor({s32, s64, v4s16, v8s16, v2s32, v4s32, v2s64})
386 .clampScalar(0, s32, s64)
387 .clampNumElements(0, v4s16, v8s16)
388 .clampNumElements(0, v2s32, v4s32)
389 .clampNumElements(0, v2s64, v2s64)
391
392 getActionDefinitionsBuilder({G_UADDSAT, G_SADDSAT, G_USUBSAT, G_SSUBSAT})
393 .legalFor({v8s8, v16s8, v4s16, v8s16, v2s32, v4s32, v2s64})
394 .legalFor(HasSVE, {nxv16s8, nxv8s16, nxv4s32, nxv2s64})
395 .clampNumElements(0, v8s8, v16s8)
396 .clampNumElements(0, v4s16, v8s16)
397 .clampNumElements(0, v2s32, v4s32)
398 .clampMaxNumElements(0, s64, 2)
401 .lower();
402
404 {G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FMA, G_FSQRT, G_FMAXNUM, G_FMINNUM,
405 G_FMAXIMUM, G_FMINIMUM, G_FCEIL, G_FFLOOR, G_FRINT, G_FNEARBYINT,
406 G_INTRINSIC_TRUNC, G_INTRINSIC_ROUND, G_INTRINSIC_ROUNDEVEN})
407 .legalFor({s32, s64, v2s32, v4s32, v2s64})
408 .legalFor(HasFP16, {s16, v4s16, v8s16})
409 .libcallFor({s128})
410 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
411 .minScalarOrElt(0, MinFPScalar)
412 .clampNumElements(0, v4s16, v8s16)
413 .clampNumElements(0, v2s32, v4s32)
414 .clampNumElements(0, v2s64, v2s64)
416
417 getActionDefinitionsBuilder({G_FABS, G_FNEG})
418 .legalFor({s32, s64, v2s32, v4s32, v2s64})
419 .legalFor(HasFP16, {s16, v4s16, v8s16})
420 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
422 .clampNumElements(0, v4s16, v8s16)
423 .clampNumElements(0, v2s32, v4s32)
424 .clampNumElements(0, v2s64, v2s64)
426 .lowerFor({s16, v4s16, v8s16});
427
429 .libcallFor({s32, s64, s128})
430 .minScalar(0, s32)
431 .scalarize(0);
432
433 getActionDefinitionsBuilder({G_INTRINSIC_LRINT, G_INTRINSIC_LLRINT})
434 .legalFor({{s64, MinFPScalar}, {s64, s32}, {s64, s64}})
435 .libcallFor({{s64, s128}})
436 .minScalarOrElt(1, MinFPScalar);
437
438 getActionDefinitionsBuilder({G_FCOS, G_FSIN, G_FPOW, G_FLOG, G_FLOG2,
439 G_FLOG10, G_FTAN, G_FEXP, G_FEXP2, G_FEXP10,
440 G_FACOS, G_FASIN, G_FATAN, G_FATAN2, G_FCOSH,
441 G_FSINH, G_FTANH, G_FMODF})
442 // We need a call for these, so we always need to scalarize.
443 .scalarize(0)
444 // Regardless of FP16 support, widen 16-bit elements to 32-bits.
445 .minScalar(0, s32)
446 .libcallFor({s32, s64, s128});
447 getActionDefinitionsBuilder({G_FPOWI, G_FLDEXP})
448 .scalarize(0)
449 .minScalar(0, s32)
450 .libcallFor({{s32, s32}, {s64, s32}, {s128, s32}});
451
452 // TODO: Libcall support for s128.
453 // TODO: s16 should be legal with full FP16 support.
454 getActionDefinitionsBuilder({G_LROUND, G_LLROUND})
455 .legalFor({{s64, s32}, {s64, s64}});
456
457 // TODO: Custom legalization for mismatched types.
458 getActionDefinitionsBuilder(G_FCOPYSIGN)
460 [](const LegalityQuery &Query) { return Query.Types[0].isScalar(); },
461 [=](const LegalityQuery &Query) {
462 const LLT Ty = Query.Types[0];
463 return std::pair(0, LLT::fixed_vector(Ty == s16 ? 4 : 2, Ty));
464 })
465 .lower();
466
468
469 for (unsigned Op : {G_SEXTLOAD, G_ZEXTLOAD}) {
470 auto &Actions = getActionDefinitionsBuilder(Op);
471
472 if (Op == G_SEXTLOAD)
474
475 // Atomics have zero extending behavior.
476 Actions
477 .legalForTypesWithMemDesc({{s32, p0, s8, 8},
478 {s32, p0, s16, 8},
479 {s32, p0, s32, 8},
480 {s64, p0, s8, 2},
481 {s64, p0, s16, 2},
482 {s64, p0, s32, 4},
483 {s64, p0, s64, 8},
484 {p0, p0, s64, 8},
485 {v2s32, p0, s64, 8}})
486 .widenScalarToNextPow2(0)
487 .clampScalar(0, s32, s64)
488 // TODO: We could support sum-of-pow2's but the lowering code doesn't know
489 // how to do that yet.
490 .unsupportedIfMemSizeNotPow2()
491 // Lower anything left over into G_*EXT and G_LOAD
492 .lower();
493 }
494
495 auto IsPtrVecPred = [=](const LegalityQuery &Query) {
496 const LLT &ValTy = Query.Types[0];
497 return ValTy.isPointerVector() && ValTy.getAddressSpace() == 0;
498 };
499
501 .customIf([=](const LegalityQuery &Query) {
502 return HasRCPC3 && Query.Types[0] == s128 &&
503 Query.MMODescrs[0].Ordering == AtomicOrdering::Acquire;
504 })
505 .customIf([=](const LegalityQuery &Query) {
506 return Query.Types[0] == s128 &&
507 Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic;
508 })
509 .legalForTypesWithMemDesc({{s8, p0, s8, 8},
510 {s16, p0, s16, 8},
511 {s32, p0, s32, 8},
512 {s64, p0, s64, 8},
513 {p0, p0, s64, 8},
514 {s128, p0, s128, 8},
515 {v8s8, p0, s64, 8},
516 {v16s8, p0, s128, 8},
517 {v4s16, p0, s64, 8},
518 {v8s16, p0, s128, 8},
519 {v2s32, p0, s64, 8},
520 {v4s32, p0, s128, 8},
521 {v2s64, p0, s128, 8}})
522 // These extends are also legal
523 .legalForTypesWithMemDesc(
524 {{s32, p0, s8, 8}, {s32, p0, s16, 8}, {s64, p0, s32, 8}})
525 .legalForTypesWithMemDesc({
526 // SVE vscale x 128 bit base sizes
527 {nxv16s8, p0, nxv16s8, 8},
528 {nxv8s16, p0, nxv8s16, 8},
529 {nxv4s32, p0, nxv4s32, 8},
530 {nxv2s64, p0, nxv2s64, 8},
531 })
532 .widenScalarToNextPow2(0, /* MinSize = */ 8)
533 .clampMaxNumElements(0, s8, 16)
534 .clampMaxNumElements(0, s16, 8)
535 .clampMaxNumElements(0, s32, 4)
536 .clampMaxNumElements(0, s64, 2)
537 .clampMaxNumElements(0, p0, 2)
539 .clampScalar(0, s8, s64)
541 [=](const LegalityQuery &Query) {
542 // Clamp extending load results to 32-bits.
543 return Query.Types[0].isScalar() &&
544 Query.Types[0] != Query.MMODescrs[0].MemoryTy &&
545 Query.Types[0].getSizeInBits() > 32;
546 },
547 changeTo(0, s32))
548 // TODO: Use BITCAST for v2i8, v2i16 after G_TRUNC gets sorted out
549 .bitcastIf(typeInSet(0, {v4s8}),
550 [=](const LegalityQuery &Query) {
551 const LLT VecTy = Query.Types[0];
552 return std::pair(0, LLT::scalar(VecTy.getSizeInBits()));
553 })
554 .customIf(IsPtrVecPred)
555 .scalarizeIf(typeInSet(0, {v2s16, v2s8}), 0)
556 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0);
557
559 .customIf([=](const LegalityQuery &Query) {
560 return HasRCPC3 && Query.Types[0] == s128 &&
561 Query.MMODescrs[0].Ordering == AtomicOrdering::Release;
562 })
563 .customIf([=](const LegalityQuery &Query) {
564 return Query.Types[0] == s128 &&
565 Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic;
566 })
567 .legalForTypesWithMemDesc(
568 {{s8, p0, s8, 8}, {s16, p0, s8, 8}, // truncstorei8 from s16
569 {s32, p0, s8, 8}, // truncstorei8 from s32
570 {s64, p0, s8, 8}, // truncstorei8 from s64
571 {s16, p0, s16, 8}, {s32, p0, s16, 8}, // truncstorei16 from s32
572 {s64, p0, s16, 8}, // truncstorei16 from s64
573 {s32, p0, s8, 8}, {s32, p0, s16, 8}, {s32, p0, s32, 8},
574 {s64, p0, s64, 8}, {s64, p0, s32, 8}, // truncstorei32 from s64
575 {p0, p0, s64, 8}, {s128, p0, s128, 8}, {v16s8, p0, s128, 8},
576 {v8s8, p0, s64, 8}, {v4s16, p0, s64, 8}, {v8s16, p0, s128, 8},
577 {v2s32, p0, s64, 8}, {v4s32, p0, s128, 8}, {v2s64, p0, s128, 8}})
578 .legalForTypesWithMemDesc({
579 // SVE vscale x 128 bit base sizes
580 // TODO: Add nxv2p0. Consider bitcastIf.
581 // See #92130
582 // https://github.com/llvm/llvm-project/pull/92130#discussion_r1616888461
583 {nxv16s8, p0, nxv16s8, 8},
584 {nxv8s16, p0, nxv8s16, 8},
585 {nxv4s32, p0, nxv4s32, 8},
586 {nxv2s64, p0, nxv2s64, 8},
587 })
588 .clampScalar(0, s8, s64)
589 .minScalarOrElt(0, s8)
590 .lowerIf([=](const LegalityQuery &Query) {
591 return Query.Types[0].isScalar() &&
592 Query.Types[0] != Query.MMODescrs[0].MemoryTy;
593 })
594 // Maximum: sN * k = 128
595 .clampMaxNumElements(0, s8, 16)
596 .clampMaxNumElements(0, s16, 8)
597 .clampMaxNumElements(0, s32, 4)
598 .clampMaxNumElements(0, s64, 2)
599 .clampMaxNumElements(0, p0, 2)
601 // TODO: Use BITCAST for v2i8, v2i16 after G_TRUNC gets sorted out
602 .bitcastIf(all(typeInSet(0, {v4s8}),
603 LegalityPredicate([=](const LegalityQuery &Query) {
604 return Query.Types[0].getSizeInBits() ==
605 Query.MMODescrs[0].MemoryTy.getSizeInBits();
606 })),
607 [=](const LegalityQuery &Query) {
608 const LLT VecTy = Query.Types[0];
609 return std::pair(0, LLT::scalar(VecTy.getSizeInBits()));
610 })
611 .customIf(IsPtrVecPred)
612 .scalarizeIf(typeInSet(0, {v2s16, v2s8}), 0)
613 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
614 .lower();
615
616 getActionDefinitionsBuilder(G_INDEXED_STORE)
617 // Idx 0 == Ptr, Idx 1 == Val
618 // TODO: we can implement legalizations but as of now these are
619 // generated in a very specific way.
621 {p0, s8, s8, 8},
622 {p0, s16, s16, 8},
623 {p0, s32, s8, 8},
624 {p0, s32, s16, 8},
625 {p0, s32, s32, 8},
626 {p0, s64, s64, 8},
627 {p0, p0, p0, 8},
628 {p0, v8s8, v8s8, 8},
629 {p0, v16s8, v16s8, 8},
630 {p0, v4s16, v4s16, 8},
631 {p0, v8s16, v8s16, 8},
632 {p0, v2s32, v2s32, 8},
633 {p0, v4s32, v4s32, 8},
634 {p0, v2s64, v2s64, 8},
635 {p0, v2p0, v2p0, 8},
636 {p0, s128, s128, 8},
637 })
638 .unsupported();
639
640 auto IndexedLoadBasicPred = [=](const LegalityQuery &Query) {
641 LLT LdTy = Query.Types[0];
642 LLT PtrTy = Query.Types[1];
643 if (!llvm::is_contained(PackedVectorAllTypesVec, LdTy) &&
644 !llvm::is_contained(ScalarAndPtrTypesVec, LdTy) && LdTy != s128)
645 return false;
646 if (PtrTy != p0)
647 return false;
648 return true;
649 };
650 getActionDefinitionsBuilder(G_INDEXED_LOAD)
653 .legalIf(IndexedLoadBasicPred)
654 .unsupported();
655 getActionDefinitionsBuilder({G_INDEXED_SEXTLOAD, G_INDEXED_ZEXTLOAD})
656 .unsupportedIf(
658 .legalIf(all(typeInSet(0, {s16, s32, s64}),
659 LegalityPredicate([=](const LegalityQuery &Q) {
660 LLT LdTy = Q.Types[0];
661 LLT PtrTy = Q.Types[1];
662 LLT MemTy = Q.MMODescrs[0].MemoryTy;
663 if (PtrTy != p0)
664 return false;
665 if (LdTy == s16)
666 return MemTy == s8;
667 if (LdTy == s32)
668 return MemTy == s8 || MemTy == s16;
669 if (LdTy == s64)
670 return MemTy == s8 || MemTy == s16 || MemTy == s32;
671 return false;
672 })))
673 .unsupported();
674
675 // Constants
677 .legalFor({p0, s8, s16, s32, s64})
678 .widenScalarToNextPow2(0)
679 .clampScalar(0, s8, s64);
680 getActionDefinitionsBuilder(G_FCONSTANT)
681 // Always legalize s16 to prevent G_FCONSTANT being widened to G_CONSTANT
682 .legalFor({s16, s32, s64, s128})
683 .clampScalar(0, MinFPScalar, s128);
684
685 // FIXME: fix moreElementsToNextPow2
687 .legalFor({{s32, s32}, {s32, s64}, {s32, p0}})
689 .clampScalar(1, s32, s64)
690 .clampScalar(0, s32, s32)
693 [=](const LegalityQuery &Query) {
694 const LLT &Ty = Query.Types[0];
695 const LLT &SrcTy = Query.Types[1];
696 return Ty.isVector() && !SrcTy.isPointerVector() &&
697 Ty.getElementType() != SrcTy.getElementType();
698 },
699 0, 1)
700 .minScalarOrEltIf(
701 [=](const LegalityQuery &Query) { return Query.Types[1] == v2s16; },
702 1, s32)
703 .minScalarOrEltIf(
704 [=](const LegalityQuery &Query) {
705 return Query.Types[1].isPointerVector();
706 },
707 0, s64)
709 .clampNumElements(1, v8s8, v16s8)
710 .clampNumElements(1, v4s16, v8s16)
711 .clampNumElements(1, v2s32, v4s32)
712 .clampNumElements(1, v2s64, v2s64)
713 .clampNumElements(1, v2p0, v2p0)
714 .customIf(isVector(0));
715
717 .legalFor({{s32, s32},
718 {s32, s64},
719 {v4s32, v4s32},
720 {v2s32, v2s32},
721 {v2s64, v2s64}})
722 .legalFor(HasFP16, {{s32, s16}, {v4s16, v4s16}, {v8s16, v8s16}})
724 .clampScalar(0, s32, s32)
725 .minScalarOrElt(1, MinFPScalar)
728 [=](const LegalityQuery &Query) {
729 const LLT &Ty = Query.Types[0];
730 const LLT &SrcTy = Query.Types[1];
731 return Ty.isVector() && !SrcTy.isPointerVector() &&
732 Ty.getElementType() != SrcTy.getElementType();
733 },
734 0, 1)
735 .clampNumElements(1, v4s16, v8s16)
736 .clampNumElements(1, v2s32, v4s32)
737 .clampMaxNumElements(1, s64, 2)
739 .libcallFor({{s32, s128}});
740
741 // Extensions
742 auto ExtLegalFunc = [=](const LegalityQuery &Query) {
743 unsigned DstSize = Query.Types[0].getSizeInBits();
744
745 // Handle legal vectors using legalFor
746 if (Query.Types[0].isVector())
747 return false;
748
749 if (DstSize < 8 || DstSize >= 128 || !isPowerOf2_32(DstSize))
750 return false; // Extending to a scalar s128 needs narrowing.
751
752 const LLT &SrcTy = Query.Types[1];
753
754 // Make sure we fit in a register otherwise. Don't bother checking that
755 // the source type is below 128 bits. We shouldn't be allowing anything
756 // through which is wider than the destination in the first place.
757 unsigned SrcSize = SrcTy.getSizeInBits();
758 if (SrcSize < 8 || !isPowerOf2_32(SrcSize))
759 return false;
760
761 return true;
762 };
763 getActionDefinitionsBuilder({G_ZEXT, G_SEXT, G_ANYEXT})
764 .legalIf(ExtLegalFunc)
765 .legalFor({{v8s16, v8s8}, {v4s32, v4s16}, {v2s64, v2s32}})
766 .clampScalar(0, s64, s64) // Just for s128, others are handled above.
768 .clampMaxNumElements(1, s8, 8)
769 .clampMaxNumElements(1, s16, 4)
770 .clampMaxNumElements(1, s32, 2)
771 // Tries to convert a large EXTEND into two smaller EXTENDs
772 .lowerIf([=](const LegalityQuery &Query) {
773 return (Query.Types[0].getScalarSizeInBits() >
774 Query.Types[1].getScalarSizeInBits() * 2) &&
775 Query.Types[0].isVector() &&
776 (Query.Types[1].getScalarSizeInBits() == 8 ||
777 Query.Types[1].getScalarSizeInBits() == 16);
778 })
779 .clampMinNumElements(1, s8, 8)
780 .clampMinNumElements(1, s16, 4)
782
784 .legalFor({{v8s8, v8s16}, {v4s16, v4s32}, {v2s32, v2s64}})
786 .clampMaxNumElements(0, s8, 8)
787 .clampMaxNumElements(0, s16, 4)
788 .clampMaxNumElements(0, s32, 2)
790 [=](const LegalityQuery &Query) { return Query.Types[0].isVector(); },
791 0, s8)
792 .lowerIf([=](const LegalityQuery &Query) {
793 LLT DstTy = Query.Types[0];
794 LLT SrcTy = Query.Types[1];
795 return DstTy.isVector() && SrcTy.getSizeInBits() > 128 &&
796 DstTy.getScalarSizeInBits() * 2 <= SrcTy.getScalarSizeInBits();
797 })
798 .clampMinNumElements(0, s8, 8)
799 .clampMinNumElements(0, s16, 4)
800 .alwaysLegal();
801
802 getActionDefinitionsBuilder({G_TRUNC_SSAT_S, G_TRUNC_SSAT_U, G_TRUNC_USAT_U})
803 .legalFor({{v8s8, v8s16}, {v4s16, v4s32}, {v2s32, v2s64}});
804
805 getActionDefinitionsBuilder(G_SEXT_INREG)
806 .legalFor({s32, s64})
807 .legalFor(PackedVectorAllTypeList)
808 .maxScalar(0, s64)
809 .clampNumElements(0, v8s8, v16s8)
810 .clampNumElements(0, v4s16, v8s16)
811 .clampNumElements(0, v2s32, v4s32)
812 .clampMaxNumElements(0, s64, 2)
813 .lower();
814
815 // FP conversions
817 .legalFor(
818 {{s16, s32}, {s16, s64}, {s32, s64}, {v4s16, v4s32}, {v2s32, v2s64}})
819 .libcallFor({{s16, s128}, {s32, s128}, {s64, s128}})
820 .clampNumElements(0, v4s16, v4s16)
821 .clampNumElements(0, v2s32, v2s32)
822 .scalarize(0);
823
825 .legalFor(
826 {{s32, s16}, {s64, s16}, {s64, s32}, {v4s32, v4s16}, {v2s64, v2s32}})
827 .libcallFor({{s128, s64}, {s128, s32}, {s128, s16}})
828 .clampNumElements(0, v4s32, v4s32)
829 .clampNumElements(0, v2s64, v2s64)
830 .scalarize(0);
831
832 // Conversions
833 getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
834 .legalFor({{s32, s32},
835 {s64, s32},
836 {s32, s64},
837 {s64, s64},
838 {v2s32, v2s32},
839 {v4s32, v4s32},
840 {v2s64, v2s64}})
841 .legalFor(HasFP16,
842 {{s32, s16}, {s64, s16}, {v4s16, v4s16}, {v8s16, v8s16}})
843 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
845 // The range of a fp16 value fits into an i17, so we can lower the width
846 // to i64.
848 [=](const LegalityQuery &Query) {
849 return Query.Types[1] == s16 && Query.Types[0].getSizeInBits() > 64;
850 },
851 changeTo(0, s64))
854 .minScalar(0, s32)
855 .widenScalarOrEltToNextPow2OrMinSize(1, /*MinSize=*/HasFP16 ? 16 : 32)
857 [=](const LegalityQuery &Query) {
858 return Query.Types[0].getScalarSizeInBits() <= 64 &&
859 Query.Types[0].getScalarSizeInBits() >
860 Query.Types[1].getScalarSizeInBits();
861 },
863 .widenScalarIf(
864 [=](const LegalityQuery &Query) {
865 return Query.Types[1].getScalarSizeInBits() <= 64 &&
866 Query.Types[0].getScalarSizeInBits() <
867 Query.Types[1].getScalarSizeInBits();
868 },
870 .clampNumElements(0, v4s16, v8s16)
871 .clampNumElements(0, v2s32, v4s32)
872 .clampMaxNumElements(0, s64, 2)
873 .libcallFor(
874 {{s32, s128}, {s64, s128}, {s128, s128}, {s128, s32}, {s128, s64}});
875
876 getActionDefinitionsBuilder({G_FPTOSI_SAT, G_FPTOUI_SAT})
877 .legalFor({{s32, s32},
878 {s64, s32},
879 {s32, s64},
880 {s64, s64},
881 {v2s32, v2s32},
882 {v4s32, v4s32},
883 {v2s64, v2s64}})
884 .legalFor(
885 HasFP16,
886 {{s16, s16}, {s32, s16}, {s64, s16}, {v4s16, v4s16}, {v8s16, v8s16}})
887 // Handle types larger than i64 by scalarizing/lowering.
888 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
890 // The range of a fp16 value fits into an i17, so we can lower the width
891 // to i64.
893 [=](const LegalityQuery &Query) {
894 return Query.Types[1] == s16 && Query.Types[0].getSizeInBits() > 64;
895 },
896 changeTo(0, s64))
897 .lowerIf(::any(scalarWiderThan(0, 64), scalarWiderThan(1, 64)), 0)
899 .widenScalarToNextPow2(0, /*MinSize=*/32)
900 .minScalar(0, s32)
901 .widenScalarOrEltToNextPow2OrMinSize(1, /*MinSize=*/HasFP16 ? 16 : 32)
903 [=](const LegalityQuery &Query) {
904 unsigned ITySize = Query.Types[0].getScalarSizeInBits();
905 return (ITySize == 16 || ITySize == 32 || ITySize == 64) &&
906 ITySize > Query.Types[1].getScalarSizeInBits();
907 },
909 .widenScalarIf(
910 [=](const LegalityQuery &Query) {
911 unsigned FTySize = Query.Types[1].getScalarSizeInBits();
912 return (FTySize == 16 || FTySize == 32 || FTySize == 64) &&
913 Query.Types[0].getScalarSizeInBits() < FTySize;
914 },
917 .clampNumElements(0, v4s16, v8s16)
918 .clampNumElements(0, v2s32, v4s32)
919 .clampMaxNumElements(0, s64, 2);
920
921 getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
922 .legalFor({{s32, s32},
923 {s64, s32},
924 {s32, s64},
925 {s64, s64},
926 {v2s32, v2s32},
927 {v4s32, v4s32},
928 {v2s64, v2s64}})
929 .legalFor(HasFP16,
930 {{s16, s32}, {s16, s64}, {v4s16, v4s16}, {v8s16, v8s16}})
931 .scalarizeIf(scalarOrEltWiderThan(1, 64), 1)
935 .minScalar(1, s32)
936 .lowerIf([](const LegalityQuery &Query) {
937 return Query.Types[1].isVector() &&
938 Query.Types[1].getScalarSizeInBits() == 64 &&
939 Query.Types[0].getScalarSizeInBits() == 16;
940 })
941 .widenScalarOrEltToNextPow2OrMinSize(0, /*MinSize=*/HasFP16 ? 16 : 32)
943 // v2i64->v2f32 needs to scalarize to avoid double-rounding issues.
944 [](const LegalityQuery &Query) {
945 return Query.Types[0].getScalarSizeInBits() == 32 &&
946 Query.Types[1].getScalarSizeInBits() == 64;
947 },
948 0)
949 .widenScalarIf(
950 [](const LegalityQuery &Query) {
951 return Query.Types[1].getScalarSizeInBits() <= 64 &&
952 Query.Types[0].getScalarSizeInBits() <
953 Query.Types[1].getScalarSizeInBits();
954 },
956 .widenScalarIf(
957 [](const LegalityQuery &Query) {
958 return Query.Types[0].getScalarSizeInBits() <= 64 &&
959 Query.Types[0].getScalarSizeInBits() >
960 Query.Types[1].getScalarSizeInBits();
961 },
963 .clampNumElements(0, v4s16, v8s16)
964 .clampNumElements(0, v2s32, v4s32)
965 .clampMaxNumElements(0, s64, 2)
966 .libcallFor({{s16, s128},
967 {s32, s128},
968 {s64, s128},
969 {s128, s128},
970 {s128, s32},
971 {s128, s64}});
972
973 // Control-flow
976 .legalFor({s32})
977 .clampScalar(0, s32, s32);
978 getActionDefinitionsBuilder(G_BRINDIRECT).legalFor({p0});
979
981 .legalFor({{s32, s32}, {s64, s32}, {p0, s32}})
982 .widenScalarToNextPow2(0)
983 .clampScalar(0, s32, s64)
984 .clampScalar(1, s32, s32)
987 .lowerIf(isVector(0));
988
989 // Pointer-handling
990 getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0});
991
992 if (TM.getCodeModel() == CodeModel::Small)
993 getActionDefinitionsBuilder(G_GLOBAL_VALUE).custom();
994 else
995 getActionDefinitionsBuilder(G_GLOBAL_VALUE).legalFor({p0});
996
997 getActionDefinitionsBuilder(G_PTRAUTH_GLOBAL_VALUE)
998 .legalIf(all(typeIs(0, p0), typeIs(1, p0)));
999
1000 getActionDefinitionsBuilder(G_PTRTOINT)
1001 .legalFor({{s64, p0}, {v2s64, v2p0}})
1002 .widenScalarToNextPow2(0, 64)
1003 .clampScalar(0, s64, s64)
1004 .clampMaxNumElements(0, s64, 2);
1005
1006 getActionDefinitionsBuilder(G_INTTOPTR)
1007 .unsupportedIf([&](const LegalityQuery &Query) {
1008 return Query.Types[0].getSizeInBits() != Query.Types[1].getSizeInBits();
1009 })
1010 .legalFor({{p0, s64}, {v2p0, v2s64}})
1011 .clampMaxNumElements(1, s64, 2);
1012
1013 // Casts for 32 and 64-bit width type are just copies.
1014 // Same for 128-bit width type, except they are on the FPR bank.
1016 // Keeping 32-bit instructions legal to prevent regression in some tests
1017 .legalForCartesianProduct({s32, v2s16, v4s8})
1018 .legalForCartesianProduct({s64, v8s8, v4s16, v2s32})
1019 .legalForCartesianProduct({s128, v16s8, v8s16, v4s32, v2s64, v2p0})
1020 .customIf([=](const LegalityQuery &Query) {
1021 // Handle casts from i1 vectors to scalars.
1022 LLT DstTy = Query.Types[0];
1023 LLT SrcTy = Query.Types[1];
1024 return DstTy.isScalar() && SrcTy.isVector() &&
1025 SrcTy.getScalarSizeInBits() == 1;
1026 })
1027 .lowerIf([=](const LegalityQuery &Query) {
1028 return Query.Types[0].isVector() != Query.Types[1].isVector();
1029 })
1031 .clampNumElements(0, v8s8, v16s8)
1032 .clampNumElements(0, v4s16, v8s16)
1033 .clampNumElements(0, v2s32, v4s32)
1034 .lower();
1035
1036 getActionDefinitionsBuilder(G_VASTART).legalFor({p0});
1037
1038 // va_list must be a pointer, but most sized types are pretty easy to handle
1039 // as the destination.
1041 .customForCartesianProduct({s8, s16, s32, s64, p0}, {p0})
1042 .clampScalar(0, s8, s64)
1043 .widenScalarToNextPow2(0, /*Min*/ 8);
1044
1045 getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG_WITH_SUCCESS)
1046 .lowerIf(
1047 all(typeInSet(0, {s8, s16, s32, s64, s128}), typeIs(2, p0)));
1048
1049 bool UseOutlineAtomics = ST.outlineAtomics() && !ST.hasLSE();
1050
1051 getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG)
1052 .legalFor(!UseOutlineAtomics, {{s32, p0}, {s64, p0}})
1053 .customFor(!UseOutlineAtomics, {{s128, p0}})
1054 .libcallFor(UseOutlineAtomics,
1055 {{s8, p0}, {s16, p0}, {s32, p0}, {s64, p0}, {s128, p0}})
1056 .clampScalar(0, s32, s64);
1057
1058 getActionDefinitionsBuilder({G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD,
1059 G_ATOMICRMW_SUB, G_ATOMICRMW_AND, G_ATOMICRMW_OR,
1060 G_ATOMICRMW_XOR})
1061 .legalFor(!UseOutlineAtomics, {{s32, p0}, {s64, p0}})
1062 .libcallFor(UseOutlineAtomics,
1063 {{s8, p0}, {s16, p0}, {s32, p0}, {s64, p0}})
1064 .clampScalar(0, s32, s64);
1065
1066 // Do not outline these atomics operations, as per comment in
1067 // AArch64ISelLowering.cpp's shouldExpandAtomicRMWInIR().
1069 {G_ATOMICRMW_MIN, G_ATOMICRMW_MAX, G_ATOMICRMW_UMIN, G_ATOMICRMW_UMAX})
1070 .legalIf(all(typeInSet(0, {s32, s64}), typeIs(1, p0)))
1071 .clampScalar(0, s32, s64);
1072
1073 getActionDefinitionsBuilder(G_BLOCK_ADDR).legalFor({p0});
1074
1075 // Merge/Unmerge
1076 for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
1077 unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
1078 unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0;
1080 .widenScalarToNextPow2(LitTyIdx, 8)
1081 .widenScalarToNextPow2(BigTyIdx, 32)
1082 .clampScalar(LitTyIdx, s8, s64)
1083 .clampScalar(BigTyIdx, s32, s128)
1084 .legalIf([=](const LegalityQuery &Q) {
1085 switch (Q.Types[BigTyIdx].getSizeInBits()) {
1086 case 32:
1087 case 64:
1088 case 128:
1089 break;
1090 default:
1091 return false;
1092 }
1093 switch (Q.Types[LitTyIdx].getSizeInBits()) {
1094 case 8:
1095 case 16:
1096 case 32:
1097 case 64:
1098 return true;
1099 default:
1100 return false;
1101 }
1102 });
1103 }
1104
1105 // TODO : nxv4s16, nxv2s16, nxv2s32
1106 getActionDefinitionsBuilder(G_EXTRACT_VECTOR_ELT)
1107 .legalFor(HasSVE, {{s16, nxv16s8, s64},
1108 {s16, nxv8s16, s64},
1109 {s32, nxv4s32, s64},
1110 {s64, nxv2s64, s64}})
1111 .unsupportedIf([=](const LegalityQuery &Query) {
1112 const LLT &EltTy = Query.Types[1].getElementType();
1113 if (Query.Types[1].isScalableVector())
1114 return false;
1115 return Query.Types[0] != EltTy;
1116 })
1117 .minScalar(2, s64)
1118 .customIf([=](const LegalityQuery &Query) {
1119 const LLT &VecTy = Query.Types[1];
1120 return VecTy == v8s8 || VecTy == v16s8 || VecTy == v2s16 ||
1121 VecTy == v4s16 || VecTy == v8s16 || VecTy == v2s32 ||
1122 VecTy == v4s32 || VecTy == v2s64 || VecTy == v2p0;
1123 })
1124 .minScalarOrEltIf(
1125 [=](const LegalityQuery &Query) {
1126 // We want to promote to <M x s1> to <M x s64> if that wouldn't
1127 // cause the total vec size to be > 128b.
1128 return Query.Types[1].isFixedVector() &&
1129 Query.Types[1].getNumElements() <= 2;
1130 },
1131 0, s64)
1132 .minScalarOrEltIf(
1133 [=](const LegalityQuery &Query) {
1134 return Query.Types[1].isFixedVector() &&
1135 Query.Types[1].getNumElements() <= 4;
1136 },
1137 0, s32)
1138 .minScalarOrEltIf(
1139 [=](const LegalityQuery &Query) {
1140 return Query.Types[1].isFixedVector() &&
1141 Query.Types[1].getNumElements() <= 8;
1142 },
1143 0, s16)
1144 .minScalarOrEltIf(
1145 [=](const LegalityQuery &Query) {
1146 return Query.Types[1].isFixedVector() &&
1147 Query.Types[1].getNumElements() <= 16;
1148 },
1149 0, s8)
1150 .minScalarOrElt(0, s8) // Worst case, we need at least s8.
1152 .clampMaxNumElements(1, s64, 2)
1153 .clampMaxNumElements(1, s32, 4)
1154 .clampMaxNumElements(1, s16, 8)
1155 .clampMaxNumElements(1, s8, 16)
1156 .clampMaxNumElements(1, p0, 2)
1158
1159 getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT)
1160 .legalIf(
1161 typeInSet(0, {v8s8, v16s8, v4s16, v8s16, v2s32, v4s32, v2s64, v2p0}))
1162 .legalFor(HasSVE, {{nxv16s8, s32, s64},
1163 {nxv8s16, s32, s64},
1164 {nxv4s32, s32, s64},
1165 {nxv2s64, s64, s64}})
1168 .clampNumElements(0, v8s8, v16s8)
1169 .clampNumElements(0, v4s16, v8s16)
1170 .clampNumElements(0, v2s32, v4s32)
1171 .clampMaxNumElements(0, s64, 2)
1172 .clampMaxNumElements(0, p0, 2)
1174
1175 getActionDefinitionsBuilder(G_BUILD_VECTOR)
1176 .legalFor({{v8s8, s8},
1177 {v16s8, s8},
1178 {v4s16, s16},
1179 {v8s16, s16},
1180 {v2s32, s32},
1181 {v4s32, s32},
1182 {v2s64, s64},
1183 {v2p0, p0}})
1184 .clampNumElements(0, v4s32, v4s32)
1185 .clampNumElements(0, v2s64, v2s64)
1186 .minScalarOrElt(0, s8)
1189 .minScalarSameAs(1, 0);
1190
1191 getActionDefinitionsBuilder(G_BUILD_VECTOR_TRUNC).lower();
1192
1193 getActionDefinitionsBuilder(G_SHUFFLE_VECTOR)
1194 .legalIf([=](const LegalityQuery &Query) {
1195 const LLT &DstTy = Query.Types[0];
1196 const LLT &SrcTy = Query.Types[1];
1197 // For now just support the TBL2 variant which needs the source vectors
1198 // to be the same size as the dest.
1199 if (DstTy != SrcTy)
1200 return false;
1201 return llvm::is_contained(
1202 {v8s8, v16s8, v4s16, v8s16, v2s32, v4s32, v2s64}, DstTy);
1203 })
1204 // G_SHUFFLE_VECTOR can have scalar sources (from 1 x s vectors) or scalar
1205 // destinations, we just want those lowered into G_BUILD_VECTOR or
1206 // G_EXTRACT_ELEMENT.
1207 .lowerIf([=](const LegalityQuery &Query) {
1208 return !Query.Types[0].isVector() || !Query.Types[1].isVector();
1209 })
1210 .moreElementsIf(
1211 [](const LegalityQuery &Query) {
1212 return Query.Types[0].isVector() && Query.Types[1].isVector() &&
1213 Query.Types[0].getNumElements() >
1214 Query.Types[1].getNumElements();
1215 },
1216 changeTo(1, 0))
1219 [](const LegalityQuery &Query) {
1220 return Query.Types[0].isVector() && Query.Types[1].isVector() &&
1221 Query.Types[0].getNumElements() <
1222 Query.Types[1].getNumElements();
1223 },
1224 changeTo(0, 1))
1225 .widenScalarOrEltToNextPow2OrMinSize(0, 8)
1226 .clampNumElements(0, v8s8, v16s8)
1227 .clampNumElements(0, v4s16, v8s16)
1228 .clampNumElements(0, v4s32, v4s32)
1229 .clampNumElements(0, v2s64, v2s64)
1231 .bitcastIf(isPointerVector(0), [=](const LegalityQuery &Query) {
1232 // Bitcast pointers vector to i64.
1233 const LLT DstTy = Query.Types[0];
1234 return std::pair(0, LLT::vector(DstTy.getElementCount(), 64));
1235 });
1236
1237 getActionDefinitionsBuilder(G_CONCAT_VECTORS)
1238 .legalFor({{v16s8, v8s8}, {v8s16, v4s16}, {v4s32, v2s32}})
1239 .bitcastIf(
1240 [=](const LegalityQuery &Query) {
1241 return Query.Types[0].getSizeInBits() <= 128 &&
1242 Query.Types[1].getSizeInBits() <= 64;
1243 },
1244 [=](const LegalityQuery &Query) {
1245 const LLT DstTy = Query.Types[0];
1246 const LLT SrcTy = Query.Types[1];
1247 return std::pair(
1248 0, DstTy.changeElementSize(SrcTy.getSizeInBits())
1251 SrcTy.getNumElements())));
1252 });
1253
1254 getActionDefinitionsBuilder(G_EXTRACT_SUBVECTOR)
1255 .legalFor({{v8s8, v16s8}, {v4s16, v8s16}, {v2s32, v4s32}})
1257 .immIdx(0); // Inform verifier imm idx 0 is handled.
1258
1259 // TODO: {nxv16s8, s8}, {nxv8s16, s16}
1260 getActionDefinitionsBuilder(G_SPLAT_VECTOR)
1261 .legalFor(HasSVE, {{nxv4s32, s32}, {nxv2s64, s64}});
1262
1263 getActionDefinitionsBuilder(G_JUMP_TABLE).legalFor({p0});
1264
1265 getActionDefinitionsBuilder(G_BRJT).legalFor({{p0, s64}});
1266
1267 getActionDefinitionsBuilder({G_TRAP, G_DEBUGTRAP, G_UBSANTRAP}).alwaysLegal();
1268
1269 getActionDefinitionsBuilder(G_DYN_STACKALLOC).custom();
1270
1271 getActionDefinitionsBuilder({G_STACKSAVE, G_STACKRESTORE}).lower();
1272
1273 if (ST.hasMOPS()) {
1274 // G_BZERO is not supported. Currently it is only emitted by
1275 // PreLegalizerCombiner for G_MEMSET with zero constant.
1277
1279 .legalForCartesianProduct({p0}, {s64}, {s64})
1280 .customForCartesianProduct({p0}, {s8}, {s64})
1281 .immIdx(0); // Inform verifier imm idx 0 is handled.
1282
1283 getActionDefinitionsBuilder({G_MEMCPY, G_MEMMOVE})
1284 .legalForCartesianProduct({p0}, {p0}, {s64})
1285 .immIdx(0); // Inform verifier imm idx 0 is handled.
1286
1287 // G_MEMCPY_INLINE does not have a tailcall immediate
1288 getActionDefinitionsBuilder(G_MEMCPY_INLINE)
1289 .legalForCartesianProduct({p0}, {p0}, {s64});
1290
1291 } else {
1292 getActionDefinitionsBuilder({G_BZERO, G_MEMCPY, G_MEMMOVE, G_MEMSET})
1293 .libcall();
1294 }
1295
1296 // For fadd reductions we have pairwise operations available. We treat the
1297 // usual legal types as legal and handle the lowering to pairwise instructions
1298 // later.
1299 getActionDefinitionsBuilder(G_VECREDUCE_FADD)
1300 .legalFor({{s32, v2s32}, {s32, v4s32}, {s64, v2s64}})
1301 .legalFor(HasFP16, {{s16, v4s16}, {s16, v8s16}})
1302 .minScalarOrElt(0, MinFPScalar)
1303 .clampMaxNumElements(1, s64, 2)
1304 .clampMaxNumElements(1, s32, 4)
1305 .clampMaxNumElements(1, s16, 8)
1307 .scalarize(1)
1308 .lower();
1309
1310 // For fmul reductions we need to split up into individual operations. We
1311 // clamp to 128 bit vectors then to 64bit vectors to produce a cascade of
1312 // smaller types, followed by scalarizing what remains.
1313 getActionDefinitionsBuilder(G_VECREDUCE_FMUL)
1314 .minScalarOrElt(0, MinFPScalar)
1315 .clampMaxNumElements(1, s64, 2)
1316 .clampMaxNumElements(1, s32, 4)
1317 .clampMaxNumElements(1, s16, 8)
1318 .clampMaxNumElements(1, s32, 2)
1319 .clampMaxNumElements(1, s16, 4)
1320 .scalarize(1)
1321 .lower();
1322
1323 getActionDefinitionsBuilder({G_VECREDUCE_SEQ_FADD, G_VECREDUCE_SEQ_FMUL})
1324 .scalarize(2)
1325 .lower();
1326
1327 getActionDefinitionsBuilder(G_VECREDUCE_ADD)
1328 .legalFor({{s8, v8s8},
1329 {s8, v16s8},
1330 {s16, v4s16},
1331 {s16, v8s16},
1332 {s32, v2s32},
1333 {s32, v4s32},
1334 {s64, v2s64}})
1336 .clampMaxNumElements(1, s64, 2)
1337 .clampMaxNumElements(1, s32, 4)
1338 .clampMaxNumElements(1, s16, 8)
1339 .clampMaxNumElements(1, s8, 16)
1341 .scalarize(1);
1342
1343 getActionDefinitionsBuilder({G_VECREDUCE_FMIN, G_VECREDUCE_FMAX,
1344 G_VECREDUCE_FMINIMUM, G_VECREDUCE_FMAXIMUM})
1345 .legalFor({{s32, v2s32}, {s32, v4s32}, {s64, v2s64}})
1346 .legalFor(HasFP16, {{s16, v4s16}, {s16, v8s16}})
1347 .minScalarOrElt(0, MinFPScalar)
1348 .clampMaxNumElements(1, s64, 2)
1349 .clampMaxNumElements(1, s32, 4)
1350 .clampMaxNumElements(1, s16, 8)
1351 .scalarize(1)
1352 .lower();
1353
1354 getActionDefinitionsBuilder(G_VECREDUCE_MUL)
1355 .clampMaxNumElements(1, s32, 2)
1356 .clampMaxNumElements(1, s16, 4)
1357 .clampMaxNumElements(1, s8, 8)
1358 .scalarize(1)
1359 .lower();
1360
1362 {G_VECREDUCE_SMIN, G_VECREDUCE_SMAX, G_VECREDUCE_UMIN, G_VECREDUCE_UMAX})
1363 .legalFor({{s8, v8s8},
1364 {s8, v16s8},
1365 {s16, v4s16},
1366 {s16, v8s16},
1367 {s32, v2s32},
1368 {s32, v4s32}})
1369 .moreElementsIf(
1370 [=](const LegalityQuery &Query) {
1371 return Query.Types[1].isVector() &&
1372 Query.Types[1].getElementType() != s8 &&
1373 Query.Types[1].getNumElements() & 1;
1374 },
1376 .clampMaxNumElements(1, s64, 2)
1377 .clampMaxNumElements(1, s32, 4)
1378 .clampMaxNumElements(1, s16, 8)
1379 .clampMaxNumElements(1, s8, 16)
1380 .scalarize(1)
1381 .lower();
1382
1384 {G_VECREDUCE_OR, G_VECREDUCE_AND, G_VECREDUCE_XOR})
1385 // Try to break down into smaller vectors as long as they're at least 64
1386 // bits. This lets us use vector operations for some parts of the
1387 // reduction.
1388 .fewerElementsIf(
1389 [=](const LegalityQuery &Q) {
1390 LLT SrcTy = Q.Types[1];
1391 if (SrcTy.isScalar())
1392 return false;
1393 if (!isPowerOf2_32(SrcTy.getNumElements()))
1394 return false;
1395 // We can usually perform 64b vector operations.
1396 return SrcTy.getSizeInBits() > 64;
1397 },
1398 [=](const LegalityQuery &Q) {
1399 LLT SrcTy = Q.Types[1];
1400 return std::make_pair(1, SrcTy.divide(2));
1401 })
1402 .scalarize(1)
1403 .lower();
1404
1405 // TODO: Update this to correct handling when adding AArch64/SVE support.
1406 getActionDefinitionsBuilder(G_VECTOR_COMPRESS).lower();
1407
1408 // Access to floating-point environment.
1409 getActionDefinitionsBuilder({G_GET_FPENV, G_SET_FPENV, G_RESET_FPENV,
1410 G_GET_FPMODE, G_SET_FPMODE, G_RESET_FPMODE})
1411 .libcall();
1412
1413 getActionDefinitionsBuilder(G_IS_FPCLASS).lower();
1414
1415 getActionDefinitionsBuilder(G_PREFETCH).custom();
1416
1417 getActionDefinitionsBuilder({G_SCMP, G_UCMP}).lower();
1418
1420 verify(*ST.getInstrInfo());
1421}
1422
1425 LostDebugLocObserver &LocObserver) const {
1426 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1427 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
1428 GISelChangeObserver &Observer = Helper.Observer;
1429 switch (MI.getOpcode()) {
1430 default:
1431 // No idea what to do.
1432 return false;
1433 case TargetOpcode::G_VAARG:
1434 return legalizeVaArg(MI, MRI, MIRBuilder);
1435 case TargetOpcode::G_LOAD:
1436 case TargetOpcode::G_STORE:
1437 return legalizeLoadStore(MI, MRI, MIRBuilder, Observer);
1438 case TargetOpcode::G_SHL:
1439 case TargetOpcode::G_ASHR:
1440 case TargetOpcode::G_LSHR:
1441 return legalizeShlAshrLshr(MI, MRI, MIRBuilder, Observer);
1442 case TargetOpcode::G_GLOBAL_VALUE:
1443 return legalizeSmallCMGlobalValue(MI, MRI, MIRBuilder, Observer);
1444 case TargetOpcode::G_SBFX:
1445 case TargetOpcode::G_UBFX:
1446 return legalizeBitfieldExtract(MI, MRI, Helper);
1447 case TargetOpcode::G_FSHL:
1448 case TargetOpcode::G_FSHR:
1449 return legalizeFunnelShift(MI, MRI, MIRBuilder, Observer, Helper);
1450 case TargetOpcode::G_ROTR:
1451 return legalizeRotate(MI, MRI, Helper);
1452 case TargetOpcode::G_CTPOP:
1453 return legalizeCTPOP(MI, MRI, Helper);
1454 case TargetOpcode::G_ATOMIC_CMPXCHG:
1455 return legalizeAtomicCmpxchg128(MI, MRI, Helper);
1456 case TargetOpcode::G_CTTZ:
1457 return legalizeCTTZ(MI, Helper);
1458 case TargetOpcode::G_BZERO:
1459 case TargetOpcode::G_MEMCPY:
1460 case TargetOpcode::G_MEMMOVE:
1461 case TargetOpcode::G_MEMSET:
1462 return legalizeMemOps(MI, Helper);
1463 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1464 return legalizeExtractVectorElt(MI, MRI, Helper);
1465 case TargetOpcode::G_DYN_STACKALLOC:
1466 return legalizeDynStackAlloc(MI, Helper);
1467 case TargetOpcode::G_PREFETCH:
1468 return legalizePrefetch(MI, Helper);
1469 case TargetOpcode::G_ABS:
1470 return Helper.lowerAbsToCNeg(MI);
1471 case TargetOpcode::G_ICMP:
1472 return legalizeICMP(MI, MRI, MIRBuilder);
1473 case TargetOpcode::G_BITCAST:
1474 return legalizeBitcast(MI, Helper);
1475 }
1476
1477 llvm_unreachable("expected switch to return");
1478}
1479
1480bool AArch64LegalizerInfo::legalizeBitcast(MachineInstr &MI,
1481 LegalizerHelper &Helper) const {
1482 assert(MI.getOpcode() == TargetOpcode::G_BITCAST && "Unexpected opcode");
1483 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
1484 // We're trying to handle casts from i1 vectors to scalars but reloading from
1485 // stack.
1486 if (!DstTy.isScalar() || !SrcTy.isVector() ||
1487 SrcTy.getElementType() != LLT::scalar(1))
1488 return false;
1489
1490 Helper.createStackStoreLoad(DstReg, SrcReg);
1491 MI.eraseFromParent();
1492 return true;
1493}
1494
1495bool AArch64LegalizerInfo::legalizeFunnelShift(MachineInstr &MI,
1497 MachineIRBuilder &MIRBuilder,
1498 GISelChangeObserver &Observer,
1499 LegalizerHelper &Helper) const {
1500 assert(MI.getOpcode() == TargetOpcode::G_FSHL ||
1501 MI.getOpcode() == TargetOpcode::G_FSHR);
1502
1503 // Keep as G_FSHR if shift amount is a G_CONSTANT, else use generic
1504 // lowering
1505 Register ShiftNo = MI.getOperand(3).getReg();
1506 LLT ShiftTy = MRI.getType(ShiftNo);
1507 auto VRegAndVal = getIConstantVRegValWithLookThrough(ShiftNo, MRI);
1508
1509 // Adjust shift amount according to Opcode (FSHL/FSHR)
1510 // Convert FSHL to FSHR
1511 LLT OperationTy = MRI.getType(MI.getOperand(0).getReg());
1512 APInt BitWidth(ShiftTy.getSizeInBits(), OperationTy.getSizeInBits(), false);
1513
1514 // Lower non-constant shifts and leave zero shifts to the optimizer.
1515 if (!VRegAndVal || VRegAndVal->Value.urem(BitWidth) == 0)
1516 return (Helper.lowerFunnelShiftAsShifts(MI) ==
1518
1519 APInt Amount = VRegAndVal->Value.urem(BitWidth);
1520
1521 Amount = MI.getOpcode() == TargetOpcode::G_FSHL ? BitWidth - Amount : Amount;
1522
1523 // If the instruction is G_FSHR, has a 64-bit G_CONSTANT for shift amount
1524 // in the range of 0 <-> BitWidth, it is legal
1525 if (ShiftTy.getSizeInBits() == 64 && MI.getOpcode() == TargetOpcode::G_FSHR &&
1526 VRegAndVal->Value.ult(BitWidth))
1527 return true;
1528
1529 // Cast the ShiftNumber to a 64-bit type
1530 auto Cast64 = MIRBuilder.buildConstant(LLT::scalar(64), Amount.zext(64));
1531
1532 if (MI.getOpcode() == TargetOpcode::G_FSHR) {
1533 Observer.changingInstr(MI);
1534 MI.getOperand(3).setReg(Cast64.getReg(0));
1535 Observer.changedInstr(MI);
1536 }
1537 // If Opcode is FSHL, remove the FSHL instruction and create a FSHR
1538 // instruction
1539 else if (MI.getOpcode() == TargetOpcode::G_FSHL) {
1540 MIRBuilder.buildInstr(TargetOpcode::G_FSHR, {MI.getOperand(0).getReg()},
1541 {MI.getOperand(1).getReg(), MI.getOperand(2).getReg(),
1542 Cast64.getReg(0)});
1543 MI.eraseFromParent();
1544 }
1545 return true;
1546}
1547
1548bool AArch64LegalizerInfo::legalizeICMP(MachineInstr &MI,
1550 MachineIRBuilder &MIRBuilder) const {
1551 Register DstReg = MI.getOperand(0).getReg();
1552 Register SrcReg1 = MI.getOperand(2).getReg();
1553 Register SrcReg2 = MI.getOperand(3).getReg();
1554 LLT DstTy = MRI.getType(DstReg);
1555 LLT SrcTy = MRI.getType(SrcReg1);
1556
1557 // Check the vector types are legal
1558 if (DstTy.getScalarSizeInBits() != SrcTy.getScalarSizeInBits() ||
1559 DstTy.getNumElements() != SrcTy.getNumElements() ||
1560 (DstTy.getSizeInBits() != 64 && DstTy.getSizeInBits() != 128))
1561 return false;
1562
1563 // Lowers G_ICMP NE => G_ICMP EQ to allow better pattern matching for
1564 // following passes
1565 CmpInst::Predicate Pred = (CmpInst::Predicate)MI.getOperand(1).getPredicate();
1566 if (Pred != CmpInst::ICMP_NE)
1567 return true;
1568 Register CmpReg =
1569 MIRBuilder
1570 .buildICmp(CmpInst::ICMP_EQ, MRI.getType(DstReg), SrcReg1, SrcReg2)
1571 .getReg(0);
1572 MIRBuilder.buildNot(DstReg, CmpReg);
1573
1574 MI.eraseFromParent();
1575 return true;
1576}
1577
1578bool AArch64LegalizerInfo::legalizeRotate(MachineInstr &MI,
1580 LegalizerHelper &Helper) const {
1581 // To allow for imported patterns to match, we ensure that the rotate amount
1582 // is 64b with an extension.
1583 Register AmtReg = MI.getOperand(2).getReg();
1584 LLT AmtTy = MRI.getType(AmtReg);
1585 (void)AmtTy;
1586 assert(AmtTy.isScalar() && "Expected a scalar rotate");
1587 assert(AmtTy.getSizeInBits() < 64 && "Expected this rotate to be legal");
1588 auto NewAmt = Helper.MIRBuilder.buildZExt(LLT::scalar(64), AmtReg);
1589 Helper.Observer.changingInstr(MI);
1590 MI.getOperand(2).setReg(NewAmt.getReg(0));
1591 Helper.Observer.changedInstr(MI);
1592 return true;
1593}
1594
1595bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(
1597 GISelChangeObserver &Observer) const {
1598 assert(MI.getOpcode() == TargetOpcode::G_GLOBAL_VALUE);
1599 // We do this custom legalization to convert G_GLOBAL_VALUE into target ADRP +
1600 // G_ADD_LOW instructions.
1601 // By splitting this here, we can optimize accesses in the small code model by
1602 // folding in the G_ADD_LOW into the load/store offset.
1603 auto &GlobalOp = MI.getOperand(1);
1604 // Don't modify an intrinsic call.
1605 if (GlobalOp.isSymbol())
1606 return true;
1607 const auto* GV = GlobalOp.getGlobal();
1608 if (GV->isThreadLocal())
1609 return true; // Don't want to modify TLS vars.
1610
1611 auto &TM = ST->getTargetLowering()->getTargetMachine();
1612 unsigned OpFlags = ST->ClassifyGlobalReference(GV, TM);
1613
1614 if (OpFlags & AArch64II::MO_GOT)
1615 return true;
1616
1617 auto Offset = GlobalOp.getOffset();
1618 Register DstReg = MI.getOperand(0).getReg();
1619 auto ADRP = MIRBuilder.buildInstr(AArch64::ADRP, {LLT::pointer(0, 64)}, {})
1620 .addGlobalAddress(GV, Offset, OpFlags | AArch64II::MO_PAGE);
1621 // Set the regclass on the dest reg too.
1622 MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass);
1623
1624 // MO_TAGGED on the page indicates a tagged address. Set the tag now. We do so
1625 // by creating a MOVK that sets bits 48-63 of the register to (global address
1626 // + 0x100000000 - PC) >> 48. The additional 0x100000000 offset here is to
1627 // prevent an incorrect tag being generated during relocation when the
1628 // global appears before the code section. Without the offset, a global at
1629 // `0x0f00'0000'0000'1000` (i.e. at `0x1000` with tag `0xf`) that's referenced
1630 // by code at `0x2000` would result in `0x0f00'0000'0000'1000 - 0x2000 =
1631 // 0x0eff'ffff'ffff'f000`, meaning the tag would be incorrectly set to `0xe`
1632 // instead of `0xf`.
1633 // This assumes that we're in the small code model so we can assume a binary
1634 // size of <= 4GB, which makes the untagged PC relative offset positive. The
1635 // binary must also be loaded into address range [0, 2^48). Both of these
1636 // properties need to be ensured at runtime when using tagged addresses.
1637 if (OpFlags & AArch64II::MO_TAGGED) {
1638 assert(!Offset &&
1639 "Should not have folded in an offset for a tagged global!");
1640 ADRP = MIRBuilder.buildInstr(AArch64::MOVKXi, {LLT::pointer(0, 64)}, {ADRP})
1641 .addGlobalAddress(GV, 0x100000000,
1643 .addImm(48);
1644 MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass);
1645 }
1646
1647 MIRBuilder.buildInstr(AArch64::G_ADD_LOW, {DstReg}, {ADRP})
1648 .addGlobalAddress(GV, Offset,
1650 MI.eraseFromParent();
1651 return true;
1652}
1653
1655 MachineInstr &MI) const {
1656 MachineIRBuilder &MIB = Helper.MIRBuilder;
1657 MachineRegisterInfo &MRI = *MIB.getMRI();
1658
1659 auto LowerUnaryOp = [&MI, &MIB](unsigned Opcode) {
1660 MIB.buildInstr(Opcode, {MI.getOperand(0)}, {MI.getOperand(2)});
1661 MI.eraseFromParent();
1662 return true;
1663 };
1664 auto LowerBinOp = [&MI, &MIB](unsigned Opcode) {
1665 MIB.buildInstr(Opcode, {MI.getOperand(0)},
1666 {MI.getOperand(2), MI.getOperand(3)});
1667 MI.eraseFromParent();
1668 return true;
1669 };
1670 auto LowerTriOp = [&MI, &MIB](unsigned Opcode) {
1671 MIB.buildInstr(Opcode, {MI.getOperand(0)},
1672 {MI.getOperand(2), MI.getOperand(3), MI.getOperand(4)});
1673 MI.eraseFromParent();
1674 return true;
1675 };
1676
1677 Intrinsic::ID IntrinsicID = cast<GIntrinsic>(MI).getIntrinsicID();
1678 switch (IntrinsicID) {
1679 case Intrinsic::vacopy: {
1680 unsigned PtrSize = ST->isTargetILP32() ? 4 : 8;
1681 unsigned VaListSize =
1682 (ST->isTargetDarwin() || ST->isTargetWindows())
1683 ? PtrSize
1684 : ST->isTargetILP32() ? 20 : 32;
1685
1686 MachineFunction &MF = *MI.getMF();
1688 LLT::scalar(VaListSize * 8));
1689 MIB.buildLoad(Val, MI.getOperand(2),
1692 VaListSize, Align(PtrSize)));
1693 MIB.buildStore(Val, MI.getOperand(1),
1696 VaListSize, Align(PtrSize)));
1697 MI.eraseFromParent();
1698 return true;
1699 }
1700 case Intrinsic::get_dynamic_area_offset: {
1701 MIB.buildConstant(MI.getOperand(0).getReg(), 0);
1702 MI.eraseFromParent();
1703 return true;
1704 }
1705 case Intrinsic::aarch64_mops_memset_tag: {
1706 assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
1707 // Anyext the value being set to 64 bit (only the bottom 8 bits are read by
1708 // the instruction).
1709 auto &Value = MI.getOperand(3);
1710 Register ExtValueReg = MIB.buildAnyExt(LLT::scalar(64), Value).getReg(0);
1711 Value.setReg(ExtValueReg);
1712 return true;
1713 }
1714 case Intrinsic::aarch64_prefetch: {
1715 auto &AddrVal = MI.getOperand(1);
1716
1717 int64_t IsWrite = MI.getOperand(2).getImm();
1718 int64_t Target = MI.getOperand(3).getImm();
1719 int64_t IsStream = MI.getOperand(4).getImm();
1720 int64_t IsData = MI.getOperand(5).getImm();
1721
1722 unsigned PrfOp = (IsWrite << 4) | // Load/Store bit
1723 (!IsData << 3) | // IsDataCache bit
1724 (Target << 1) | // Cache level bits
1725 (unsigned)IsStream; // Stream bit
1726
1727 MIB.buildInstr(AArch64::G_AARCH64_PREFETCH).addImm(PrfOp).add(AddrVal);
1728 MI.eraseFromParent();
1729 return true;
1730 }
1731 case Intrinsic::aarch64_neon_uaddv:
1732 case Intrinsic::aarch64_neon_saddv:
1733 case Intrinsic::aarch64_neon_umaxv:
1734 case Intrinsic::aarch64_neon_smaxv:
1735 case Intrinsic::aarch64_neon_uminv:
1736 case Intrinsic::aarch64_neon_sminv: {
1737 bool IsSigned = IntrinsicID == Intrinsic::aarch64_neon_saddv ||
1738 IntrinsicID == Intrinsic::aarch64_neon_smaxv ||
1739 IntrinsicID == Intrinsic::aarch64_neon_sminv;
1740
1741 auto OldDst = MI.getOperand(0).getReg();
1742 auto OldDstTy = MRI.getType(OldDst);
1743 LLT NewDstTy = MRI.getType(MI.getOperand(2).getReg()).getElementType();
1744 if (OldDstTy == NewDstTy)
1745 return true;
1746
1747 auto NewDst = MRI.createGenericVirtualRegister(NewDstTy);
1748
1749 Helper.Observer.changingInstr(MI);
1750 MI.getOperand(0).setReg(NewDst);
1751 Helper.Observer.changedInstr(MI);
1752
1753 MIB.setInsertPt(MIB.getMBB(), ++MIB.getInsertPt());
1754 MIB.buildExtOrTrunc(IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT,
1755 OldDst, NewDst);
1756
1757 return true;
1758 }
1759 case Intrinsic::aarch64_neon_uaddlp:
1760 case Intrinsic::aarch64_neon_saddlp: {
1761 unsigned Opc = IntrinsicID == Intrinsic::aarch64_neon_uaddlp
1762 ? AArch64::G_UADDLP
1763 : AArch64::G_SADDLP;
1764 MIB.buildInstr(Opc, {MI.getOperand(0)}, {MI.getOperand(2)});
1765 MI.eraseFromParent();
1766
1767 return true;
1768 }
1769 case Intrinsic::aarch64_neon_uaddlv:
1770 case Intrinsic::aarch64_neon_saddlv: {
1771 unsigned Opc = IntrinsicID == Intrinsic::aarch64_neon_uaddlv
1772 ? AArch64::G_UADDLV
1773 : AArch64::G_SADDLV;
1774 Register DstReg = MI.getOperand(0).getReg();
1775 Register SrcReg = MI.getOperand(2).getReg();
1776 LLT DstTy = MRI.getType(DstReg);
1777
1778 LLT MidTy, ExtTy;
1779 if (DstTy.isScalar() && DstTy.getScalarSizeInBits() <= 32) {
1780 MidTy = LLT::fixed_vector(4, 32);
1781 ExtTy = LLT::scalar(32);
1782 } else {
1783 MidTy = LLT::fixed_vector(2, 64);
1784 ExtTy = LLT::scalar(64);
1785 }
1786
1787 Register MidReg =
1788 MIB.buildInstr(Opc, {MidTy}, {SrcReg})->getOperand(0).getReg();
1789 Register ZeroReg =
1790 MIB.buildConstant(LLT::scalar(64), 0)->getOperand(0).getReg();
1791 Register ExtReg = MIB.buildInstr(AArch64::G_EXTRACT_VECTOR_ELT, {ExtTy},
1792 {MidReg, ZeroReg})
1793 .getReg(0);
1794
1795 if (DstTy.getScalarSizeInBits() < 32)
1796 MIB.buildTrunc(DstReg, ExtReg);
1797 else
1798 MIB.buildCopy(DstReg, ExtReg);
1799
1800 MI.eraseFromParent();
1801
1802 return true;
1803 }
1804 case Intrinsic::aarch64_neon_smax:
1805 return LowerBinOp(TargetOpcode::G_SMAX);
1806 case Intrinsic::aarch64_neon_smin:
1807 return LowerBinOp(TargetOpcode::G_SMIN);
1808 case Intrinsic::aarch64_neon_umax:
1809 return LowerBinOp(TargetOpcode::G_UMAX);
1810 case Intrinsic::aarch64_neon_umin:
1811 return LowerBinOp(TargetOpcode::G_UMIN);
1812 case Intrinsic::aarch64_neon_fmax:
1813 return LowerBinOp(TargetOpcode::G_FMAXIMUM);
1814 case Intrinsic::aarch64_neon_fmin:
1815 return LowerBinOp(TargetOpcode::G_FMINIMUM);
1816 case Intrinsic::aarch64_neon_fmaxnm:
1817 return LowerBinOp(TargetOpcode::G_FMAXNUM);
1818 case Intrinsic::aarch64_neon_fminnm:
1819 return LowerBinOp(TargetOpcode::G_FMINNUM);
1820 case Intrinsic::aarch64_neon_smull:
1821 return LowerBinOp(AArch64::G_SMULL);
1822 case Intrinsic::aarch64_neon_umull:
1823 return LowerBinOp(AArch64::G_UMULL);
1824 case Intrinsic::aarch64_neon_sabd:
1825 return LowerBinOp(TargetOpcode::G_ABDS);
1826 case Intrinsic::aarch64_neon_uabd:
1827 return LowerBinOp(TargetOpcode::G_ABDU);
1828 case Intrinsic::aarch64_neon_abs: {
1829 // Lower the intrinsic to G_ABS.
1830 MIB.buildInstr(TargetOpcode::G_ABS, {MI.getOperand(0)}, {MI.getOperand(2)});
1831 MI.eraseFromParent();
1832 return true;
1833 }
1834 case Intrinsic::aarch64_neon_sqadd: {
1835 if (MRI.getType(MI.getOperand(0).getReg()).isVector())
1836 return LowerBinOp(TargetOpcode::G_SADDSAT);
1837 break;
1838 }
1839 case Intrinsic::aarch64_neon_sqsub: {
1840 if (MRI.getType(MI.getOperand(0).getReg()).isVector())
1841 return LowerBinOp(TargetOpcode::G_SSUBSAT);
1842 break;
1843 }
1844 case Intrinsic::aarch64_neon_uqadd: {
1845 if (MRI.getType(MI.getOperand(0).getReg()).isVector())
1846 return LowerBinOp(TargetOpcode::G_UADDSAT);
1847 break;
1848 }
1849 case Intrinsic::aarch64_neon_uqsub: {
1850 if (MRI.getType(MI.getOperand(0).getReg()).isVector())
1851 return LowerBinOp(TargetOpcode::G_USUBSAT);
1852 break;
1853 }
1854 case Intrinsic::aarch64_neon_udot:
1855 return LowerTriOp(AArch64::G_UDOT);
1856 case Intrinsic::aarch64_neon_sdot:
1857 return LowerTriOp(AArch64::G_SDOT);
1858 case Intrinsic::aarch64_neon_usdot:
1859 return LowerTriOp(AArch64::G_USDOT);
1860 case Intrinsic::aarch64_neon_sqxtn:
1861 return LowerUnaryOp(TargetOpcode::G_TRUNC_SSAT_S);
1862 case Intrinsic::aarch64_neon_sqxtun:
1863 return LowerUnaryOp(TargetOpcode::G_TRUNC_SSAT_U);
1864 case Intrinsic::aarch64_neon_uqxtn:
1865 return LowerUnaryOp(TargetOpcode::G_TRUNC_USAT_U);
1866
1867 case Intrinsic::vector_reverse:
1868 // TODO: Add support for vector_reverse
1869 return false;
1870 }
1871
1872 return true;
1873}
1874
1875bool AArch64LegalizerInfo::legalizeShlAshrLshr(
1877 GISelChangeObserver &Observer) const {
1878 assert(MI.getOpcode() == TargetOpcode::G_ASHR ||
1879 MI.getOpcode() == TargetOpcode::G_LSHR ||
1880 MI.getOpcode() == TargetOpcode::G_SHL);
1881 // If the shift amount is a G_CONSTANT, promote it to a 64 bit type so the
1882 // imported patterns can select it later. Either way, it will be legal.
1883 Register AmtReg = MI.getOperand(2).getReg();
1884 auto VRegAndVal = getIConstantVRegValWithLookThrough(AmtReg, MRI);
1885 if (!VRegAndVal)
1886 return true;
1887 // Check the shift amount is in range for an immediate form.
1888 int64_t Amount = VRegAndVal->Value.getSExtValue();
1889 if (Amount > 31)
1890 return true; // This will have to remain a register variant.
1891 auto ExtCst = MIRBuilder.buildConstant(LLT::scalar(64), Amount);
1892 Observer.changingInstr(MI);
1893 MI.getOperand(2).setReg(ExtCst.getReg(0));
1894 Observer.changedInstr(MI);
1895 return true;
1896}
1897
1900 Base = Root;
1901 Offset = 0;
1902
1903 Register NewBase;
1904 int64_t NewOffset;
1905 if (mi_match(Root, MRI, m_GPtrAdd(m_Reg(NewBase), m_ICst(NewOffset))) &&
1906 isShiftedInt<7, 3>(NewOffset)) {
1907 Base = NewBase;
1908 Offset = NewOffset;
1909 }
1910}
1911
1912// FIXME: This should be removed and replaced with the generic bitcast legalize
1913// action.
1914bool AArch64LegalizerInfo::legalizeLoadStore(
1916 GISelChangeObserver &Observer) const {
1917 assert(MI.getOpcode() == TargetOpcode::G_STORE ||
1918 MI.getOpcode() == TargetOpcode::G_LOAD);
1919 // Here we just try to handle vector loads/stores where our value type might
1920 // have pointer elements, which the SelectionDAG importer can't handle. To
1921 // allow the existing patterns for s64 to fire for p0, we just try to bitcast
1922 // the value to use s64 types.
1923
1924 // Custom legalization requires the instruction, if not deleted, must be fully
1925 // legalized. In order to allow further legalization of the inst, we create
1926 // a new instruction and erase the existing one.
1927
1928 Register ValReg = MI.getOperand(0).getReg();
1929 const LLT ValTy = MRI.getType(ValReg);
1930
1931 if (ValTy == LLT::scalar(128)) {
1932
1933 AtomicOrdering Ordering = (*MI.memoperands_begin())->getSuccessOrdering();
1934 bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD;
1935 bool IsLoadAcquire = IsLoad && Ordering == AtomicOrdering::Acquire;
1936 bool IsStoreRelease = !IsLoad && Ordering == AtomicOrdering::Release;
1937 bool IsRcpC3 =
1938 ST->hasLSE2() && ST->hasRCPC3() && (IsLoadAcquire || IsStoreRelease);
1939
1940 LLT s64 = LLT::scalar(64);
1941
1942 unsigned Opcode;
1943 if (IsRcpC3) {
1944 Opcode = IsLoad ? AArch64::LDIAPPX : AArch64::STILPX;
1945 } else {
1946 // For LSE2, loads/stores should have been converted to monotonic and had
1947 // a fence inserted after them.
1948 assert(Ordering == AtomicOrdering::Monotonic ||
1949 Ordering == AtomicOrdering::Unordered);
1950 assert(ST->hasLSE2() && "ldp/stp not single copy atomic without +lse2");
1951
1952 Opcode = IsLoad ? AArch64::LDPXi : AArch64::STPXi;
1953 }
1954
1955 MachineInstrBuilder NewI;
1956 if (IsLoad) {
1957 NewI = MIRBuilder.buildInstr(Opcode, {s64, s64}, {});
1958 MIRBuilder.buildMergeLikeInstr(
1959 ValReg, {NewI->getOperand(0), NewI->getOperand(1)});
1960 } else {
1961 auto Split = MIRBuilder.buildUnmerge(s64, MI.getOperand(0));
1962 NewI = MIRBuilder.buildInstr(
1963 Opcode, {}, {Split->getOperand(0), Split->getOperand(1)});
1964 }
1965
1966 if (IsRcpC3) {
1967 NewI.addUse(MI.getOperand(1).getReg());
1968 } else {
1969 Register Base;
1970 int Offset;
1971 matchLDPSTPAddrMode(MI.getOperand(1).getReg(), Base, Offset, MRI);
1972 NewI.addUse(Base);
1973 NewI.addImm(Offset / 8);
1974 }
1975
1976 NewI.cloneMemRefs(MI);
1977 constrainSelectedInstRegOperands(*NewI, *ST->getInstrInfo(),
1978 *MRI.getTargetRegisterInfo(),
1979 *ST->getRegBankInfo());
1980 MI.eraseFromParent();
1981 return true;
1982 }
1983
1984 if (!ValTy.isPointerVector() ||
1985 ValTy.getElementType().getAddressSpace() != 0) {
1986 LLVM_DEBUG(dbgs() << "Tried to do custom legalization on wrong load/store");
1987 return false;
1988 }
1989
1990 unsigned PtrSize = ValTy.getElementType().getSizeInBits();
1991 const LLT NewTy = LLT::vector(ValTy.getElementCount(), PtrSize);
1992 auto &MMO = **MI.memoperands_begin();
1993 MMO.setType(NewTy);
1994
1995 if (MI.getOpcode() == TargetOpcode::G_STORE) {
1996 auto Bitcast = MIRBuilder.buildBitcast(NewTy, ValReg);
1997 MIRBuilder.buildStore(Bitcast.getReg(0), MI.getOperand(1), MMO);
1998 } else {
1999 auto NewLoad = MIRBuilder.buildLoad(NewTy, MI.getOperand(1), MMO);
2000 MIRBuilder.buildBitcast(ValReg, NewLoad);
2001 }
2002 MI.eraseFromParent();
2003 return true;
2004}
2005
2006bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI,
2008 MachineIRBuilder &MIRBuilder) const {
2009 MachineFunction &MF = MIRBuilder.getMF();
2010 Align Alignment(MI.getOperand(2).getImm());
2011 Register Dst = MI.getOperand(0).getReg();
2012 Register ListPtr = MI.getOperand(1).getReg();
2013
2014 LLT PtrTy = MRI.getType(ListPtr);
2015 LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
2016
2017 const unsigned PtrSize = PtrTy.getSizeInBits() / 8;
2018 const Align PtrAlign = Align(PtrSize);
2019 auto List = MIRBuilder.buildLoad(
2020 PtrTy, ListPtr,
2021 *MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOLoad,
2022 PtrTy, PtrAlign));
2023
2024 MachineInstrBuilder DstPtr;
2025 if (Alignment > PtrAlign) {
2026 // Realign the list to the actual required alignment.
2027 auto AlignMinus1 =
2028 MIRBuilder.buildConstant(IntPtrTy, Alignment.value() - 1);
2029 auto ListTmp = MIRBuilder.buildPtrAdd(PtrTy, List, AlignMinus1.getReg(0));
2030 DstPtr = MIRBuilder.buildMaskLowPtrBits(PtrTy, ListTmp, Log2(Alignment));
2031 } else
2032 DstPtr = List;
2033
2034 LLT ValTy = MRI.getType(Dst);
2035 uint64_t ValSize = ValTy.getSizeInBits() / 8;
2036 MIRBuilder.buildLoad(
2037 Dst, DstPtr,
2038 *MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOLoad,
2039 ValTy, std::max(Alignment, PtrAlign)));
2040
2041 auto Size = MIRBuilder.buildConstant(IntPtrTy, alignTo(ValSize, PtrAlign));
2042
2043 auto NewList = MIRBuilder.buildPtrAdd(PtrTy, DstPtr, Size.getReg(0));
2044
2045 MIRBuilder.buildStore(NewList, ListPtr,
2046 *MF.getMachineMemOperand(MachinePointerInfo(),
2048 PtrTy, PtrAlign));
2049
2050 MI.eraseFromParent();
2051 return true;
2052}
2053
2054bool AArch64LegalizerInfo::legalizeBitfieldExtract(
2056 // Only legal if we can select immediate forms.
2057 // TODO: Lower this otherwise.
2058 return getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI) &&
2059 getIConstantVRegValWithLookThrough(MI.getOperand(3).getReg(), MRI);
2060}
2061
2062bool AArch64LegalizerInfo::legalizeCTPOP(MachineInstr &MI,
2064 LegalizerHelper &Helper) const {
2065 // When there is no integer popcount instruction (FEAT_CSSC isn't available),
2066 // it can be more efficiently lowered to the following sequence that uses
2067 // AdvSIMD registers/instructions as long as the copies to/from the AdvSIMD
2068 // registers are cheap.
2069 // FMOV D0, X0 // copy 64-bit int to vector, high bits zero'd
2070 // CNT V0.8B, V0.8B // 8xbyte pop-counts
2071 // ADDV B0, V0.8B // sum 8xbyte pop-counts
2072 // UMOV X0, V0.B[0] // copy byte result back to integer reg
2073 //
2074 // For 128 bit vector popcounts, we lower to the following sequence:
2075 // cnt.16b v0, v0 // v8s16, v4s32, v2s64
2076 // uaddlp.8h v0, v0 // v8s16, v4s32, v2s64
2077 // uaddlp.4s v0, v0 // v4s32, v2s64
2078 // uaddlp.2d v0, v0 // v2s64
2079 //
2080 // For 64 bit vector popcounts, we lower to the following sequence:
2081 // cnt.8b v0, v0 // v4s16, v2s32
2082 // uaddlp.4h v0, v0 // v4s16, v2s32
2083 // uaddlp.2s v0, v0 // v2s32
2084
2085 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2086 Register Dst = MI.getOperand(0).getReg();
2087 Register Val = MI.getOperand(1).getReg();
2088 LLT Ty = MRI.getType(Val);
2089 unsigned Size = Ty.getSizeInBits();
2090
2091 assert(Ty == MRI.getType(Dst) &&
2092 "Expected src and dst to have the same type!");
2093
2094 if (ST->hasCSSC() && Ty.isScalar() && Size == 128) {
2095 LLT s64 = LLT::scalar(64);
2096
2097 auto Split = MIRBuilder.buildUnmerge(s64, Val);
2098 auto CTPOP1 = MIRBuilder.buildCTPOP(s64, Split->getOperand(0));
2099 auto CTPOP2 = MIRBuilder.buildCTPOP(s64, Split->getOperand(1));
2100 auto Add = MIRBuilder.buildAdd(s64, CTPOP1, CTPOP2);
2101
2102 MIRBuilder.buildZExt(Dst, Add);
2103 MI.eraseFromParent();
2104 return true;
2105 }
2106
2107 if (!ST->hasNEON() ||
2108 MI.getMF()->getFunction().hasFnAttribute(Attribute::NoImplicitFloat)) {
2109 // Use generic lowering when custom lowering is not possible.
2110 return Ty.isScalar() && (Size == 32 || Size == 64) &&
2111 Helper.lowerBitCount(MI) ==
2113 }
2114
2115 // Pre-conditioning: widen Val up to the nearest vector type.
2116 // s32,s64,v4s16,v2s32 -> v8i8
2117 // v8s16,v4s32,v2s64 -> v16i8
2118 LLT VTy = Size == 128 ? LLT::fixed_vector(16, 8) : LLT::fixed_vector(8, 8);
2119 if (Ty.isScalar()) {
2120 assert((Size == 32 || Size == 64 || Size == 128) && "Expected only 32, 64, or 128 bit scalars!");
2121 if (Size == 32) {
2122 Val = MIRBuilder.buildZExt(LLT::scalar(64), Val).getReg(0);
2123 }
2124 }
2125 Val = MIRBuilder.buildBitcast(VTy, Val).getReg(0);
2126
2127 // Count bits in each byte-sized lane.
2128 auto CTPOP = MIRBuilder.buildCTPOP(VTy, Val);
2129
2130 // Sum across lanes.
2131
2132 if (ST->hasDotProd() && Ty.isVector() && Ty.getNumElements() >= 2 &&
2133 Ty.getScalarSizeInBits() != 16) {
2134 LLT Dt = Ty == LLT::fixed_vector(2, 64) ? LLT::fixed_vector(4, 32) : Ty;
2135 auto Zeros = MIRBuilder.buildConstant(Dt, 0);
2136 auto Ones = MIRBuilder.buildConstant(VTy, 1);
2137 MachineInstrBuilder Sum;
2138
2139 if (Ty == LLT::fixed_vector(2, 64)) {
2140 auto UDOT =
2141 MIRBuilder.buildInstr(AArch64::G_UDOT, {Dt}, {Zeros, Ones, CTPOP});
2142 Sum = MIRBuilder.buildInstr(AArch64::G_UADDLP, {Ty}, {UDOT});
2143 } else if (Ty == LLT::fixed_vector(4, 32)) {
2144 Sum = MIRBuilder.buildInstr(AArch64::G_UDOT, {Dt}, {Zeros, Ones, CTPOP});
2145 } else if (Ty == LLT::fixed_vector(2, 32)) {
2146 Sum = MIRBuilder.buildInstr(AArch64::G_UDOT, {Dt}, {Zeros, Ones, CTPOP});
2147 } else {
2148 llvm_unreachable("unexpected vector shape");
2149 }
2150
2151 Sum->getOperand(0).setReg(Dst);
2152 MI.eraseFromParent();
2153 return true;
2154 }
2155
2156 Register HSum = CTPOP.getReg(0);
2157 unsigned Opc;
2158 SmallVector<LLT> HAddTys;
2159 if (Ty.isScalar()) {
2160 Opc = Intrinsic::aarch64_neon_uaddlv;
2161 HAddTys.push_back(LLT::scalar(32));
2162 } else if (Ty == LLT::fixed_vector(8, 16)) {
2163 Opc = Intrinsic::aarch64_neon_uaddlp;
2164 HAddTys.push_back(LLT::fixed_vector(8, 16));
2165 } else if (Ty == LLT::fixed_vector(4, 32)) {
2166 Opc = Intrinsic::aarch64_neon_uaddlp;
2167 HAddTys.push_back(LLT::fixed_vector(8, 16));
2168 HAddTys.push_back(LLT::fixed_vector(4, 32));
2169 } else if (Ty == LLT::fixed_vector(2, 64)) {
2170 Opc = Intrinsic::aarch64_neon_uaddlp;
2171 HAddTys.push_back(LLT::fixed_vector(8, 16));
2172 HAddTys.push_back(LLT::fixed_vector(4, 32));
2173 HAddTys.push_back(LLT::fixed_vector(2, 64));
2174 } else if (Ty == LLT::fixed_vector(4, 16)) {
2175 Opc = Intrinsic::aarch64_neon_uaddlp;
2176 HAddTys.push_back(LLT::fixed_vector(4, 16));
2177 } else if (Ty == LLT::fixed_vector(2, 32)) {
2178 Opc = Intrinsic::aarch64_neon_uaddlp;
2179 HAddTys.push_back(LLT::fixed_vector(4, 16));
2180 HAddTys.push_back(LLT::fixed_vector(2, 32));
2181 } else
2182 llvm_unreachable("unexpected vector shape");
2184 for (LLT HTy : HAddTys) {
2185 UADD = MIRBuilder.buildIntrinsic(Opc, {HTy}).addUse(HSum);
2186 HSum = UADD.getReg(0);
2187 }
2188
2189 // Post-conditioning.
2190 if (Ty.isScalar() && (Size == 64 || Size == 128))
2191 MIRBuilder.buildZExt(Dst, UADD);
2192 else
2193 UADD->getOperand(0).setReg(Dst);
2194 MI.eraseFromParent();
2195 return true;
2196}
2197
2198bool AArch64LegalizerInfo::legalizeAtomicCmpxchg128(
2200 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2201 LLT s64 = LLT::scalar(64);
2202 auto Addr = MI.getOperand(1).getReg();
2203 auto DesiredI = MIRBuilder.buildUnmerge({s64, s64}, MI.getOperand(2));
2204 auto NewI = MIRBuilder.buildUnmerge({s64, s64}, MI.getOperand(3));
2205 auto DstLo = MRI.createGenericVirtualRegister(s64);
2206 auto DstHi = MRI.createGenericVirtualRegister(s64);
2207
2208 MachineInstrBuilder CAS;
2209 if (ST->hasLSE()) {
2210 // We have 128-bit CASP instructions taking XSeqPair registers, which are
2211 // s128. We need the merge/unmerge to bracket the expansion and pair up with
2212 // the rest of the MIR so we must reassemble the extracted registers into a
2213 // 128-bit known-regclass one with code like this:
2214 //
2215 // %in1 = REG_SEQUENCE Lo, Hi ; One for each input
2216 // %out = CASP %in1, ...
2217 // %OldLo = G_EXTRACT %out, 0
2218 // %OldHi = G_EXTRACT %out, 64
2219 auto Ordering = (*MI.memoperands_begin())->getMergedOrdering();
2220 unsigned Opcode;
2221 switch (Ordering) {
2223 Opcode = AArch64::CASPAX;
2224 break;
2226 Opcode = AArch64::CASPLX;
2227 break;
2230 Opcode = AArch64::CASPALX;
2231 break;
2232 default:
2233 Opcode = AArch64::CASPX;
2234 break;
2235 }
2236
2237 LLT s128 = LLT::scalar(128);
2238 auto CASDst = MRI.createGenericVirtualRegister(s128);
2239 auto CASDesired = MRI.createGenericVirtualRegister(s128);
2240 auto CASNew = MRI.createGenericVirtualRegister(s128);
2241 MIRBuilder.buildInstr(TargetOpcode::REG_SEQUENCE, {CASDesired}, {})
2242 .addUse(DesiredI->getOperand(0).getReg())
2243 .addImm(AArch64::sube64)
2244 .addUse(DesiredI->getOperand(1).getReg())
2245 .addImm(AArch64::subo64);
2246 MIRBuilder.buildInstr(TargetOpcode::REG_SEQUENCE, {CASNew}, {})
2247 .addUse(NewI->getOperand(0).getReg())
2248 .addImm(AArch64::sube64)
2249 .addUse(NewI->getOperand(1).getReg())
2250 .addImm(AArch64::subo64);
2251
2252 CAS = MIRBuilder.buildInstr(Opcode, {CASDst}, {CASDesired, CASNew, Addr});
2253
2254 MIRBuilder.buildExtract({DstLo}, {CASDst}, 0);
2255 MIRBuilder.buildExtract({DstHi}, {CASDst}, 64);
2256 } else {
2257 // The -O0 CMP_SWAP_128 is friendlier to generate code for because LDXP/STXP
2258 // can take arbitrary registers so it just has the normal GPR64 operands the
2259 // rest of AArch64 is expecting.
2260 auto Ordering = (*MI.memoperands_begin())->getMergedOrdering();
2261 unsigned Opcode;
2262 switch (Ordering) {
2264 Opcode = AArch64::CMP_SWAP_128_ACQUIRE;
2265 break;
2267 Opcode = AArch64::CMP_SWAP_128_RELEASE;
2268 break;
2271 Opcode = AArch64::CMP_SWAP_128;
2272 break;
2273 default:
2274 Opcode = AArch64::CMP_SWAP_128_MONOTONIC;
2275 break;
2276 }
2277
2278 auto Scratch = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2279 CAS = MIRBuilder.buildInstr(Opcode, {DstLo, DstHi, Scratch},
2280 {Addr, DesiredI->getOperand(0),
2281 DesiredI->getOperand(1), NewI->getOperand(0),
2282 NewI->getOperand(1)});
2283 }
2284
2285 CAS.cloneMemRefs(MI);
2286 constrainSelectedInstRegOperands(*CAS, *ST->getInstrInfo(),
2287 *MRI.getTargetRegisterInfo(),
2288 *ST->getRegBankInfo());
2289
2290 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {DstLo, DstHi});
2291 MI.eraseFromParent();
2292 return true;
2293}
2294
2295bool AArch64LegalizerInfo::legalizeCTTZ(MachineInstr &MI,
2296 LegalizerHelper &Helper) const {
2297 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2298 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
2299 LLT Ty = MRI.getType(MI.getOperand(1).getReg());
2300 auto BitReverse = MIRBuilder.buildBitReverse(Ty, MI.getOperand(1));
2301 MIRBuilder.buildCTLZ(MI.getOperand(0).getReg(), BitReverse);
2302 MI.eraseFromParent();
2303 return true;
2304}
2305
2306bool AArch64LegalizerInfo::legalizeMemOps(MachineInstr &MI,
2307 LegalizerHelper &Helper) const {
2308 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2309
2310 // Tagged version MOPSMemorySetTagged is legalised in legalizeIntrinsic
2311 if (MI.getOpcode() == TargetOpcode::G_MEMSET) {
2312 // Anyext the value being set to 64 bit (only the bottom 8 bits are read by
2313 // the instruction).
2314 auto &Value = MI.getOperand(1);
2315 Register ExtValueReg =
2316 MIRBuilder.buildAnyExt(LLT::scalar(64), Value).getReg(0);
2317 Value.setReg(ExtValueReg);
2318 return true;
2319 }
2320
2321 return false;
2322}
2323
2324bool AArch64LegalizerInfo::legalizeExtractVectorElt(
2326 const GExtractVectorElement *Element = cast<GExtractVectorElement>(&MI);
2327 auto VRegAndVal =
2329 if (VRegAndVal)
2330 return true;
2331 LLT VecTy = MRI.getType(Element->getVectorReg());
2332 if (VecTy.isScalableVector())
2333 return true;
2334 return Helper.lowerExtractInsertVectorElt(MI) !=
2336}
2337
2338bool AArch64LegalizerInfo::legalizeDynStackAlloc(
2339 MachineInstr &MI, LegalizerHelper &Helper) const {
2340 MachineFunction &MF = *MI.getParent()->getParent();
2341 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2342 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
2343
2344 // If stack probing is not enabled for this function, use the default
2345 // lowering.
2346 if (!MF.getFunction().hasFnAttribute("probe-stack") ||
2347 MF.getFunction().getFnAttribute("probe-stack").getValueAsString() !=
2348 "inline-asm") {
2349 Helper.lowerDynStackAlloc(MI);
2350 return true;
2351 }
2352
2353 Register Dst = MI.getOperand(0).getReg();
2354 Register AllocSize = MI.getOperand(1).getReg();
2355 Align Alignment = assumeAligned(MI.getOperand(2).getImm());
2356
2357 assert(MRI.getType(Dst) == LLT::pointer(0, 64) &&
2358 "Unexpected type for dynamic alloca");
2359 assert(MRI.getType(AllocSize) == LLT::scalar(64) &&
2360 "Unexpected type for dynamic alloca");
2361
2362 LLT PtrTy = MRI.getType(Dst);
2363 Register SPReg =
2365 Register SPTmp =
2366 Helper.getDynStackAllocTargetPtr(SPReg, AllocSize, Alignment, PtrTy);
2367 auto NewMI =
2368 MIRBuilder.buildInstr(AArch64::PROBED_STACKALLOC_DYN, {}, {SPTmp});
2369 MRI.setRegClass(NewMI.getReg(0), &AArch64::GPR64commonRegClass);
2370 MIRBuilder.setInsertPt(*NewMI->getParent(), NewMI);
2371 MIRBuilder.buildCopy(Dst, SPTmp);
2372
2373 MI.eraseFromParent();
2374 return true;
2375}
2376
2377bool AArch64LegalizerInfo::legalizePrefetch(MachineInstr &MI,
2378 LegalizerHelper &Helper) const {
2379 MachineIRBuilder &MIB = Helper.MIRBuilder;
2380 auto &AddrVal = MI.getOperand(0);
2381
2382 int64_t IsWrite = MI.getOperand(1).getImm();
2383 int64_t Locality = MI.getOperand(2).getImm();
2384 int64_t IsData = MI.getOperand(3).getImm();
2385
2386 bool IsStream = Locality == 0;
2387 if (Locality != 0) {
2388 assert(Locality <= 3 && "Prefetch locality out-of-range");
2389 // The locality degree is the opposite of the cache speed.
2390 // Put the number the other way around.
2391 // The encoding starts at 0 for level 1
2392 Locality = 3 - Locality;
2393 }
2394
2395 unsigned PrfOp = (IsWrite << 4) | (!IsData << 3) | (Locality << 1) | IsStream;
2396
2397 MIB.buildInstr(AArch64::G_AARCH64_PREFETCH).addImm(PrfOp).add(AddrVal);
2398 MI.eraseFromParent();
2399 return true;
2400}
unsigned const MachineRegisterInfo * MRI
static void matchLDPSTPAddrMode(Register Root, Register &Base, int &Offset, MachineRegisterInfo &MRI)
This file declares the targeting of the Machinelegalizer class for AArch64.
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static Error unsupported(const char *Str, const Triple &T)
Definition MachO.cpp:71
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
IRTranslator LLVM IR MI
Interface for Targets to specify which operations they can successfully select and how the others sho...
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineIRBuilder class.
Promote Memory to Register
Definition Mem2Reg.cpp:110
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
ppc ctr loops verify
if(PassOpts->AAPipeline)
static constexpr MCPhysReg SPReg
This file contains some templates that are useful if you are working with the STL at all.
#define LLVM_DEBUG(...)
Definition Debug.h:114
bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI, LostDebugLocObserver &LocObserver) const override
Called for instructions with the Custom LegalizationAction.
bool legalizeIntrinsic(LegalizerHelper &Helper, MachineInstr &MI) const override
AArch64LegalizerInfo(const AArch64Subtarget &ST)
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1012
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1666
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1562
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ ICMP_NE
not equal
Definition InstrTypes.h:698
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition Function.cpp:762
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:727
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
constexpr bool isScalableVector() const
Returns true if the LLT is a scalable vector.
constexpr unsigned getScalarSizeInBits() const
constexpr bool isScalar() const
static constexpr LLT scalable_vector(unsigned MinNumElements, unsigned ScalarSizeInBits)
Get a low-level scalable vector of some number of elements and element width.
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
constexpr bool isPointerVector() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
constexpr ElementCount getElementCount() const
constexpr LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
constexpr LLT changeElementCount(ElementCount EC) const
Return a vector or scalar with the same element type and the new element count.
LLVM_ABI void computeTables()
Compute any ancillary tables needed to quickly decide how an operation should be handled.
LegalizeRuleSet & minScalar(unsigned TypeIdx, const LLT Ty)
Ensure the scalar is at least as wide as Ty.
LegalizeRuleSet & widenScalarOrEltToNextPow2OrMinSize(unsigned TypeIdx, unsigned MinSize=0)
Widen the scalar or vector element type to the next power of two that is at least MinSize.
LegalizeRuleSet & legalFor(std::initializer_list< LLT > Types)
The instruction is legal when type index 0 is any type in the given list.
LegalizeRuleSet & maxScalarEltSameAsIf(LegalityPredicate Predicate, unsigned TypeIdx, unsigned SmallTypeIdx)
Conditionally narrow the scalar or elt to match the size of another.
LegalizeRuleSet & unsupported()
The instruction is unsupported.
LegalizeRuleSet & scalarSameSizeAs(unsigned TypeIdx, unsigned SameSizeIdx)
Change the type TypeIdx to have the same scalar size as type SameSizeIdx.
LegalizeRuleSet & bitcastIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
The specified type index is coerced if predicate is true.
LegalizeRuleSet & libcallFor(std::initializer_list< LLT > Types)
LegalizeRuleSet & maxScalar(unsigned TypeIdx, const LLT Ty)
Ensure the scalar is at most as wide as Ty.
LegalizeRuleSet & minScalarOrElt(unsigned TypeIdx, const LLT Ty)
Ensure the scalar or element is at least as wide as Ty.
LegalizeRuleSet & clampMaxNumElements(unsigned TypeIdx, const LLT EltTy, unsigned MaxElements)
Limit the number of elements in EltTy vectors to at most MaxElements.
LegalizeRuleSet & clampMinNumElements(unsigned TypeIdx, const LLT EltTy, unsigned MinElements)
Limit the number of elements in EltTy vectors to at least MinElements.
LegalizeRuleSet & widenVectorEltsToVectorMinSize(unsigned TypeIdx, unsigned VectorSize)
Ensure the vector size is at least as wide as VectorSize by promoting the element.
LegalizeRuleSet & lowerIfMemSizeNotPow2()
Lower a memory operation if the memory size, rounded to bytes, is not a power of 2.
LegalizeRuleSet & minScalarEltSameAsIf(LegalityPredicate Predicate, unsigned TypeIdx, unsigned LargeTypeIdx)
Conditionally widen the scalar or elt to match the size of another.
LegalizeRuleSet & customForCartesianProduct(std::initializer_list< LLT > Types)
LegalizeRuleSet & lowerIfMemSizeNotByteSizePow2()
Lower a memory operation if the memory access size is not a round power of 2 byte size.
LegalizeRuleSet & moreElementsToNextPow2(unsigned TypeIdx)
Add more elements to the vector to reach the next power of two.
LegalizeRuleSet & narrowScalarIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Narrow the scalar to the one selected by the mutation if the predicate is true.
LegalizeRuleSet & lower()
The instruction is lowered.
LegalizeRuleSet & moreElementsIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Add more elements to reach the type selected by the mutation if the predicate is true.
LegalizeRuleSet & lowerFor(std::initializer_list< LLT > Types)
The instruction is lowered when type index 0 is any type in the given list.
LegalizeRuleSet & scalarizeIf(LegalityPredicate Predicate, unsigned TypeIdx)
LegalizeRuleSet & lowerIf(LegalityPredicate Predicate)
The instruction is lowered if predicate is true.
LegalizeRuleSet & clampScalar(unsigned TypeIdx, const LLT MinTy, const LLT MaxTy)
Limit the range of scalar sizes to MinTy and MaxTy.
LegalizeRuleSet & custom()
Unconditionally custom lower.
LegalizeRuleSet & minScalarSameAs(unsigned TypeIdx, unsigned LargeTypeIdx)
Widen the scalar to match the size of another.
LegalizeRuleSet & unsupportedIf(LegalityPredicate Predicate)
LegalizeRuleSet & minScalarOrEltIf(LegalityPredicate Predicate, unsigned TypeIdx, const LLT Ty)
Ensure the scalar or element is at least as wide as Ty.
LegalizeRuleSet & widenScalarIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Widen the scalar to the one selected by the mutation if the predicate is true.
LegalizeRuleSet & alwaysLegal()
LegalizeRuleSet & clampNumElements(unsigned TypeIdx, const LLT MinTy, const LLT MaxTy)
Limit the number of elements for the given vectors to at least MinTy's number of elements and at most...
LegalizeRuleSet & maxScalarIf(LegalityPredicate Predicate, unsigned TypeIdx, const LLT Ty)
Conditionally limit the maximum size of the scalar.
LegalizeRuleSet & customIf(LegalityPredicate Predicate)
LegalizeRuleSet & widenScalarToNextPow2(unsigned TypeIdx, unsigned MinSize=0)
Widen the scalar to the next power of two that is at least MinSize.
LegalizeRuleSet & scalarize(unsigned TypeIdx)
LegalizeRuleSet & legalForCartesianProduct(std::initializer_list< LLT > Types)
The instruction is legal when type indexes 0 and 1 are both in the given list.
LegalizeRuleSet & legalForTypesWithMemDesc(std::initializer_list< LegalityPredicates::TypePairAndMemDesc > TypesAndMemDesc)
The instruction is legal when type indexes 0 and 1 along with the memory size and minimum alignment i...
unsigned immIdx(unsigned ImmIdx)
LegalizeRuleSet & widenScalarOrEltToNextPow2(unsigned TypeIdx, unsigned MinSize=0)
Widen the scalar or vector element type to the next power of two that is at least MinSize.
LegalizeRuleSet & legalIf(LegalityPredicate Predicate)
The instruction is legal if predicate is true.
LLVM_ABI LegalizeResult lowerDynStackAlloc(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerBitCount(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerExtractInsertVectorElt(MachineInstr &MI)
Lower a vector extract or insert by writing the vector to a stack temporary and reloading the element...
LLVM_ABI LegalizeResult lowerAbsToCNeg(MachineInstr &MI)
const TargetLowering & getTargetLowering() const
LLVM_ABI LegalizeResult lowerFunnelShiftAsShifts(MachineInstr &MI)
LLVM_ABI MachineInstrBuilder createStackStoreLoad(const DstOp &Res, const SrcOp &Val)
Create a store of Val to a stack temporary and return a load as the same type as Res.
@ Legalized
Instruction has been legalized and the MachineFunction changed.
@ UnableToLegalize
Some kind of error has occurred and we could not legalize this instruction.
GISelChangeObserver & Observer
To keep track of changes made by the LegalizerHelper.
LLVM_ABI Register getDynStackAllocTargetPtr(Register SPReg, Register AllocSize, Align Alignment, LLT PtrTy)
MachineIRBuilder & MIRBuilder
Expose MIRBuilder so clients can set their own RecordInsertInstruction functions.
LegalizeRuleSet & getActionDefinitionsBuilder(unsigned Opcode)
Get the action definition builder for the given opcode.
const LegacyLegalizerInfo & getLegacyLegalizerInfo() const
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Helper class to build MachineInstr.
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
MachineInstrBuilder buildAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ADD Op0, Op1.
MachineInstrBuilder buildNot(const DstOp &Dst, const SrcOp &Src0)
Build and insert a bitwise not, NegOne = G_CONSTANT -1 Res = G_OR Op0, NegOne.
MachineInstrBuilder buildUnmerge(ArrayRef< LLT > Res, const SrcOp &Op)
Build and insert Res0, ... = G_UNMERGE_VALUES Op.
MachineInstrBuilder buildExtract(const DstOp &Res, const SrcOp &Src, uint64_t Index)
Build and insert Res0, ... = G_EXTRACT Src, Idx0.
MachineInstrBuilder buildICmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert a Res = G_ICMP Pred, Op0, Op1.
MachineBasicBlock::iterator getInsertPt()
Current insertion point for new instructions.
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ZEXT Op.
MachineInstrBuilder buildIntrinsic(Intrinsic::ID ID, ArrayRef< Register > Res, bool HasSideEffects, bool isConvergent)
Build and insert a G_INTRINSIC instruction.
MachineInstrBuilder buildCTLZ(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ Op0, Src0.
MachineInstrBuilder buildMergeLikeInstr(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ... or Res = G_BUILD_VECTOR Op0, ... or Res = G_CONCAT_VEC...
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_PTR_ADD Op0, Op1.
MachineInstrBuilder buildBitReverse(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITREVERSE Src.
MachineInstrBuilder buildStore(const SrcOp &Val, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert G_STORE Val, Addr, MMO.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildCTPOP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTPOP Op0, Src0.
MachineFunction & getMF()
Getter for the function we currently build.
MachineInstrBuilder buildExtOrTrunc(unsigned ExtOpc, const DstOp &Res, const SrcOp &Op)
Build and insert Res = ExtOpc, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes of...
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_TRUNC Op.
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
MachineInstrBuilder buildAnyExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ANYEXT Op0.
MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITCAST Src.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
MachineInstrBuilder buildMaskLowPtrBits(const DstOp &Res, const SrcOp &Op0, uint32_t NumBits)
Build and insert Res = G_PTRMASK Op0, G_CONSTANT (1 << NumBits) - 1.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
Representation of each machine instruction.
const MachineOperand & getOperand(unsigned i) const
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
Wrapper class representing virtual and physical registers.
Definition Register.h:19
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
Primary interface to the complete machine description for the target machine.
Target - Wrapper for Target specific information.
LLVM Value Representation.
Definition Value.h:75
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition TypeSize.h:253
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_PREL
MO_PREL - Indicates that the bits of the symbol operand represented by MO_G0 etc are PC relative.
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_TAGGED
MO_TAGGED - With MO_PAGE, indicates that the page includes a memory tag in bits 56-63.
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
LLVM_ABI LegalityPredicate scalarOrEltWiderThan(unsigned TypeIdx, unsigned Size)
True iff the specified type index is a scalar or a vector with an element type that's wider than the ...
LLVM_ABI LegalityPredicate isPointerVector(unsigned TypeIdx)
True iff the specified type index is a vector of pointers (with any address space).
LLVM_ABI LegalityPredicate typeInSet(unsigned TypeIdx, std::initializer_list< LLT > TypesInit)
True iff the given type index is one of the specified types.
LLVM_ABI LegalityPredicate smallerThan(unsigned TypeIdx0, unsigned TypeIdx1)
True iff the first type index has a smaller total bit size than second type index.
LLVM_ABI LegalityPredicate atomicOrderingAtLeastOrStrongerThan(unsigned MMOIdx, AtomicOrdering Ordering)
True iff the specified MMO index has at an atomic ordering of at Ordering or stronger.
Predicate any(Predicate P0, Predicate P1)
True iff P0 or P1 are true.
LLVM_ABI LegalityPredicate isVector(unsigned TypeIdx)
True iff the specified type index is a vector.
Predicate all(Predicate P0, Predicate P1)
True iff P0 and P1 are true.
LLVM_ABI LegalityPredicate typeIs(unsigned TypeIdx, LLT TypesInit)
True iff the given type index is the specified type.
LLVM_ABI LegalityPredicate scalarWiderThan(unsigned TypeIdx, unsigned Size)
True iff the specified type index is a scalar that's wider than the given size.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
LLVM_ABI LegalizeMutation moreElementsToNextPow2(unsigned TypeIdx, unsigned Min=0)
Add more elements to the type for the given type index to the next power of.
LLVM_ABI LegalizeMutation scalarize(unsigned TypeIdx)
Break up the vector type for the given type index into the element type.
LLVM_ABI LegalizeMutation widenScalarOrEltToNextPow2(unsigned TypeIdx, unsigned Min=0)
Widen the scalar type or vector element type for the given type index to the next power of 2.
LLVM_ABI LegalizeMutation changeTo(unsigned TypeIdx, LLT Ty)
Select this specific type for the given type index.
LLVM_ABI LegalizeMutation changeElementSizeTo(unsigned TypeIdx, unsigned FromTypeIdx)
Change the scalar size or element size to have the same scalar size as type index FromIndex.
operand_type_match m_Reg()
ConstantMatch< APInt > m_ICst(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
Invariant opcodes: All instruction sets have these as their low opcodes.
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:477
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI bool constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
Definition Utils.cpp:155
std::function< bool(const LegalityQuery &)> LegalityPredicate
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:288
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Add
Sum of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
DWARFExpression::Operation Op
constexpr bool isShiftedInt(int64_t x)
Checks if a signed integer is an N bit number shifted left by S.
Definition MathExtras.h:191
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:560
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition Utils.cpp:433
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1897
Align assumeAligned(uint64_t Value)
Treats the value 0 as a 1, so Align is always at least 1.
Definition Alignment.h:100
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition Alignment.h:197
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
ArrayRef< MemDesc > MMODescrs
Operations which require memory can use this to place requirements on the memory type for each MMO.
ArrayRef< LLT > Types
This class contains a discriminated union of information about pointers in memory operands,...