LLVM 23.0.0git
AArch64LegalizerInfo.cpp
Go to the documentation of this file.
1//===- AArch64LegalizerInfo.cpp ----------------------------------*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the Machinelegalizer class for
10/// AArch64.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
15#include "AArch64Subtarget.h"
16#include "llvm/ADT/STLExtras.h"
28#include "llvm/IR/Intrinsics.h"
29#include "llvm/IR/IntrinsicsAArch64.h"
30#include "llvm/IR/Type.h"
32#include <initializer_list>
33
34#define DEBUG_TYPE "aarch64-legalinfo"
35
36using namespace llvm;
37using namespace LegalizeActions;
38using namespace LegalizeMutations;
39using namespace LegalityPredicates;
40using namespace MIPatternMatch;
41
43 : ST(&ST) {
44 using namespace TargetOpcode;
45 const LLT p0 = LLT::pointer(0, 64);
46 const LLT s8 = LLT::scalar(8);
47 const LLT s16 = LLT::scalar(16);
48 const LLT s32 = LLT::scalar(32);
49 const LLT s64 = LLT::scalar(64);
50 const LLT s128 = LLT::scalar(128);
51 const LLT v16s8 = LLT::fixed_vector(16, 8);
52 const LLT v8s8 = LLT::fixed_vector(8, 8);
53 const LLT v4s8 = LLT::fixed_vector(4, 8);
54 const LLT v2s8 = LLT::fixed_vector(2, 8);
55 const LLT v8s16 = LLT::fixed_vector(8, 16);
56 const LLT v4s16 = LLT::fixed_vector(4, 16);
57 const LLT v2s16 = LLT::fixed_vector(2, 16);
58 const LLT v2s32 = LLT::fixed_vector(2, 32);
59 const LLT v4s32 = LLT::fixed_vector(4, 32);
60 const LLT v2s64 = LLT::fixed_vector(2, 64);
61 const LLT v2p0 = LLT::fixed_vector(2, p0);
62
63 const LLT nxv16s8 = LLT::scalable_vector(16, s8);
64 const LLT nxv8s16 = LLT::scalable_vector(8, s16);
65 const LLT nxv4s32 = LLT::scalable_vector(4, s32);
66 const LLT nxv2s64 = LLT::scalable_vector(2, s64);
67
68 const LLT bf16 = LLT::bfloat16();
69 const LLT v4bf16 = LLT::fixed_vector(4, bf16);
70
71 const LLT f16 = LLT::float16();
72 const LLT v4f16 = LLT::fixed_vector(4, f16);
73 const LLT v8f16 = LLT::fixed_vector(8, f16);
74
75 const LLT f32 = LLT::float32();
76 const LLT v2f32 = LLT::fixed_vector(2, f32);
77 const LLT v4f32 = LLT::fixed_vector(4, f32);
78
79 const LLT f64 = LLT::float64();
80 const LLT v2f64 = LLT::fixed_vector(2, f64);
81
82 const LLT f128 = LLT::float128();
83
84 const LLT i8 = LLT::integer(8);
85 const LLT v8i8 = LLT::fixed_vector(8, i8);
86 const LLT v16i8 = LLT::fixed_vector(16, i8);
87
88 const LLT i16 = LLT::integer(16);
89 const LLT v8i16 = LLT::fixed_vector(8, i16);
90 const LLT v4i16 = LLT::fixed_vector(4, i16);
91
92 const LLT i32 = LLT::integer(32);
93 const LLT v2i32 = LLT::fixed_vector(2, i32);
94 const LLT v4i32 = LLT::fixed_vector(4, i32);
95
96 const LLT i64 = LLT::integer(64);
97 const LLT v2i64 = LLT::fixed_vector(2, i64);
98
99 const LLT i128 = LLT::integer(128);
100
101 const LLT nxv16i8 = LLT::scalable_vector(16, i8);
102 const LLT nxv8i16 = LLT::scalable_vector(8, i16);
103 const LLT nxv4i32 = LLT::scalable_vector(4, i32);
104 const LLT nxv2i64 = LLT::scalable_vector(2, i64);
105
106 std::initializer_list<LLT> PackedVectorAllTypeList = {/* Begin 128bit types */
107 v16s8, v8s16, v4s32,
108 v2s64, v2p0,
109 /* End 128bit types */
110 /* Begin 64bit types */
111 v8s8, v4s16, v2s32};
112 std::initializer_list<LLT> ScalarAndPtrTypesList = {s8, s16, s32, s64, p0};
113 SmallVector<LLT, 8> PackedVectorAllTypesVec(PackedVectorAllTypeList);
114 SmallVector<LLT, 8> ScalarAndPtrTypesVec(ScalarAndPtrTypesList);
115
116 const TargetMachine &TM = ST.getTargetLowering()->getTargetMachine();
117
118 // FIXME: support subtargets which have neon/fp-armv8 disabled.
119 if (!ST.hasNEON() || !ST.hasFPARMv8()) {
121 return;
122 }
123
124 // Some instructions only support s16 if the subtarget has full 16-bit FP
125 // support.
126 const bool HasFP16 = ST.hasFullFP16();
127 const LLT &MinFPScalar = HasFP16 ? f16 : f32;
128
129 const bool HasCSSC = ST.hasCSSC();
130 const bool HasRCPC3 = ST.hasRCPC3();
131 const bool HasSVE = ST.hasSVE();
132
134 {G_IMPLICIT_DEF, G_FREEZE, G_CONSTANT_FOLD_BARRIER})
135 .legalFor({p0, s8, s16, s32, s64})
136 .legalFor({v2s8, v4s8, v8s8, v16s8, v2s16, v4s16, v8s16, v2s32, v4s32,
137 v2s64, v2p0})
138 .widenScalarToNextPow2(0)
139 .clampScalar(0, s8, s64)
142 .clampNumElements(0, v8s8, v16s8)
143 .clampNumElements(0, v4s16, v8s16)
144 .clampNumElements(0, v2s32, v4s32)
145 .clampMaxNumElements(0, s64, 2)
146 .clampMaxNumElements(0, p0, 2)
148
150 .legalFor({p0, s16, s32, s64})
151 .legalFor(PackedVectorAllTypeList)
155 .clampScalar(0, s16, s64)
156 .clampNumElements(0, v8s8, v16s8)
157 .clampNumElements(0, v4s16, v8s16)
158 .clampNumElements(0, v2s32, v4s32)
159 .clampMaxNumElements(0, s64, 2)
160 .clampMaxNumElements(0, p0, 2);
161
163 .legalIf(all(typeInSet(0, {s32, s64, p0}), typeInSet(1, {s8, s16, s32}),
164 smallerThan(1, 0)))
165 .widenScalarToNextPow2(0)
166 .clampScalar(0, s32, s64)
168 .minScalar(1, s8)
169 .maxScalarIf(typeInSet(0, {s32}), 1, s16)
170 .maxScalarIf(typeInSet(0, {s64, p0}), 1, s32);
171
173 .legalIf(all(typeInSet(0, {s16, s32, s64, p0}),
174 typeInSet(1, {s32, s64, s128, p0}), smallerThan(0, 1)))
175 .widenScalarToNextPow2(1)
176 .clampScalar(1, s32, s128)
178 .minScalar(0, s16)
179 .maxScalarIf(typeInSet(1, {s32}), 0, s16)
180 .maxScalarIf(typeInSet(1, {s64, p0}), 0, s32)
181 .maxScalarIf(typeInSet(1, {s128}), 0, s64);
182
183 getActionDefinitionsBuilder({G_ADD, G_SUB, G_AND, G_OR, G_XOR})
184 .legalFor({i32, i64, v8i8, v16i8, v4i16, v8i16, v2i32, v4i32, v2i64})
185 .legalFor(HasSVE, {nxv16i8, nxv8i16, nxv4i32, nxv2i64})
186 .widenScalarToNextPow2(0)
187 .clampScalar(0, s32, s64)
188 .clampMaxNumElements(0, s8, 16)
189 .clampMaxNumElements(0, s16, 8)
190 .clampNumElements(0, v2s32, v4s32)
191 .clampNumElements(0, v2s64, v2s64)
193 [=](const LegalityQuery &Query) {
194 return Query.Types[0].getNumElements() <= 2;
195 },
196 0, s32)
197 .minScalarOrEltIf(
198 [=](const LegalityQuery &Query) {
199 return Query.Types[0].getNumElements() <= 4;
200 },
201 0, s16)
202 .minScalarOrEltIf(
203 [=](const LegalityQuery &Query) {
204 return Query.Types[0].getNumElements() <= 16;
205 },
206 0, s8)
207 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
209
211 .legalFor({i32, i64, v8i8, v16i8, v4i16, v8i16, v2i32, v4i32, v2i64})
212 .widenScalarToNextPow2(0)
213 .clampScalar(0, s32, s64)
214 .clampMaxNumElements(0, s8, 16)
215 .clampMaxNumElements(0, s16, 8)
216 .clampNumElements(0, v2s32, v4s32)
217 .clampNumElements(0, v2s64, v2s64)
219 [=](const LegalityQuery &Query) {
220 return Query.Types[0].getNumElements() <= 2;
221 },
222 0, s32)
223 .minScalarOrEltIf(
224 [=](const LegalityQuery &Query) {
225 return Query.Types[0].getNumElements() <= 4;
226 },
227 0, s16)
228 .minScalarOrEltIf(
229 [=](const LegalityQuery &Query) {
230 return Query.Types[0].getNumElements() <= 16;
231 },
232 0, s8)
233 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
235
236 getActionDefinitionsBuilder({G_SHL, G_ASHR, G_LSHR})
237 .customIf([=](const LegalityQuery &Query) {
238 const auto &SrcTy = Query.Types[0];
239 const auto &AmtTy = Query.Types[1];
240 return !SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
241 AmtTy.getSizeInBits() == 32;
242 })
243 .legalFor({
244 {i32, i32},
245 {i32, i64},
246 {i64, i64},
247 {v8i8, v8i8},
248 {v16i8, v16i8},
249 {v4i16, v4i16},
250 {v8i16, v8i16},
251 {v2i32, v2i32},
252 {v4i32, v4i32},
253 {v2i64, v2i64},
254 })
255 .widenScalarToNextPow2(0)
256 .clampScalar(1, s32, s64)
257 .clampScalar(0, s32, s64)
258 .clampNumElements(0, v8s8, v16s8)
259 .clampNumElements(0, v4s16, v8s16)
260 .clampNumElements(0, v2s32, v4s32)
261 .clampNumElements(0, v2s64, v2s64)
263 .minScalarSameAs(1, 0)
267
269 .legalFor({{p0, i64}, {v2p0, v2i64}})
270 .clampScalarOrElt(1, s64, s64)
271 .clampNumElements(0, v2p0, v2p0);
272
273 getActionDefinitionsBuilder(G_PTRMASK).legalFor({{p0, s64}});
274
275 getActionDefinitionsBuilder({G_SDIV, G_UDIV})
276 .legalFor({i32, i64})
277 .libcallFor({i128})
278 .clampScalar(0, s32, s64)
280 .scalarize(0);
281
282 getActionDefinitionsBuilder({G_SREM, G_UREM, G_SDIVREM, G_UDIVREM})
283 .lowerFor({i8, i16, i32, i64, v2i32, v4i32, v2i64})
284 .libcallFor({i128})
286 .minScalarOrElt(0, s32)
287 .clampNumElements(0, v2s32, v4s32)
288 .clampNumElements(0, v2s64, v2s64)
289 .scalarize(0);
290
291 getActionDefinitionsBuilder({G_SMULO, G_UMULO})
292 .widenScalarToNextPow2(0, /*Min = */ 32)
293 .clampScalar(0, s32, s64)
294 .lower();
295
296 getActionDefinitionsBuilder({G_SMULH, G_UMULH})
297 .legalFor({i64, v16i8, v8i16, v4i32})
298 .lower();
299
300 getActionDefinitionsBuilder({G_SMULFIX, G_UMULFIX}).lower();
301
302 getActionDefinitionsBuilder({G_SMIN, G_SMAX, G_UMIN, G_UMAX})
303 .legalFor({v8i8, v16i8, v4i16, v8i16, v2i32, v4i32})
304 .legalFor(HasCSSC, {i32, i64})
305 .minScalar(HasCSSC, 0, s32)
306 .clampNumElements(0, v8s8, v16s8)
307 .clampNumElements(0, v4s16, v8s16)
308 .clampNumElements(0, v2s32, v4s32)
309 .lower();
310
311 // FIXME: Legal vector types are only legal with NEON.
313 .legalFor(HasCSSC, {i32, i64})
314 .legalFor({v16i8, v8i16, v4i32, v2i64, v2p0, v8i8, v4i16, v2i32})
315 .customIf([=](const LegalityQuery &Q) {
316 // TODO: Fix suboptimal codegen for 128+ bit types.
317 LLT SrcTy = Q.Types[0];
318 return SrcTy.isScalar() && SrcTy.getSizeInBits() < 128;
319 })
320 .widenScalarIf(
321 [=](const LegalityQuery &Query) { return Query.Types[0] == v4s8; },
322 [=](const LegalityQuery &Query) { return std::make_pair(0, v4i16); })
323 .widenScalarIf(
324 [=](const LegalityQuery &Query) { return Query.Types[0] == v2s16; },
325 [=](const LegalityQuery &Query) { return std::make_pair(0, v2i32); })
326 .clampNumElements(0, v8s8, v16s8)
327 .clampNumElements(0, v4s16, v8s16)
328 .clampNumElements(0, v2s32, v4s32)
329 .clampNumElements(0, v2s64, v2s64)
331 .lower();
332
334 {G_ABDS, G_ABDU, G_UAVGFLOOR, G_UAVGCEIL, G_SAVGFLOOR, G_SAVGCEIL})
335 .legalFor({v8i8, v16i8, v4i16, v8i16, v2i32, v4i32})
336 .lower();
337
339 {G_SADDE, G_SSUBE, G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_UADDO, G_USUBO})
340 .legalFor({{i32, i32}, {i64, i32}})
341 .clampScalar(0, s32, s64)
342 .clampScalar(1, s32, s64)
344
345 getActionDefinitionsBuilder({G_FSHL, G_FSHR})
346 .customFor({{i32, i32}, {i32, i64}, {i64, i64}})
347 .lower();
348
350 .legalFor({{i32, i64}, {i64, i64}})
351 .customIf([=](const LegalityQuery &Q) {
352 return Q.Types[0].isScalar() && Q.Types[1].getScalarSizeInBits() < 64;
353 })
354 .lower();
356
357 getActionDefinitionsBuilder({G_SBFX, G_UBFX})
358 .customFor({{s32, s32}, {s64, s64}});
359
360 auto always = [=](const LegalityQuery &Q) { return true; };
362 .legalFor(HasCSSC, {{i32, i32}, {i64, i64}})
363 .legalFor({{v8i8, v8i8}, {v16i8, v16i8}})
364 .customFor(!HasCSSC, {{s32, s32}, {s64, s64}})
365 .customFor({{s128, s128},
366 {v4s16, v4s16},
367 {v8s16, v8s16},
368 {v2s32, v2s32},
369 {v4s32, v4s32},
370 {v2s64, v2s64}})
371 .clampScalar(0, s32, s128)
374 .minScalarEltSameAsIf(always, 1, 0)
375 .maxScalarEltSameAsIf(always, 1, 0)
376 .clampNumElements(0, v8s8, v16s8)
377 .clampNumElements(0, v4s16, v8s16)
378 .clampNumElements(0, v2s32, v4s32)
379 .clampNumElements(0, v2s64, v2s64)
382
383 getActionDefinitionsBuilder({G_CTLZ, G_CTLS})
384 .legalFor({{i32, i32},
385 {i64, i64},
386 {v8i8, v8i8},
387 {v16i8, v16i8},
388 {v4i16, v4i16},
389 {v8i16, v8i16},
390 {v2i32, v2i32},
391 {v4i32, v4i32}})
392 .widenScalarToNextPow2(1, /*Min=*/32)
393 .clampScalar(1, s32, s64)
395 .clampNumElements(0, v8s8, v16s8)
396 .clampNumElements(0, v4s16, v8s16)
397 .clampNumElements(0, v2s32, v4s32)
400 .scalarSameSizeAs(0, 1);
401
402 getActionDefinitionsBuilder(G_CTLZ_ZERO_POISON).lower();
403
405 .lowerIf(isVector(0))
406 .widenScalarToNextPow2(1, /*Min=*/32)
407 .clampScalar(1, s32, s64)
408 .scalarSameSizeAs(0, 1)
409 .legalFor(HasCSSC, {s32, s64})
410 .customFor(!HasCSSC, {s32, s64});
411
412 getActionDefinitionsBuilder(G_CTTZ_ZERO_POISON).lower();
413
414 getActionDefinitionsBuilder(G_BITREVERSE)
415 .legalFor({i32, i64, v8i8, v16i8})
416 .widenScalarToNextPow2(0, /*Min = */ 32)
418 .clampScalar(0, s32, s64)
419 .clampNumElements(0, v8s8, v16s8)
420 .clampNumElements(0, v4s16, v8s16)
421 .clampNumElements(0, v2s32, v4s32)
422 .clampNumElements(0, v2s64, v2s64)
425 .lower();
426
428 .legalFor({i32, i64, v4i16, v8i16, v2i32, v4i32, v2i64})
430 .clampScalar(0, s32, s64)
431 .clampNumElements(0, v4s16, v8s16)
432 .clampNumElements(0, v2s32, v4s32)
433 .clampNumElements(0, v2s64, v2s64)
435
436 getActionDefinitionsBuilder({G_UADDSAT, G_SADDSAT, G_USUBSAT, G_SSUBSAT})
437 .legalFor({v8i8, v16i8, v4i16, v8i16, v2i32, v4i32, v2i64})
438 .legalFor(HasSVE, {nxv16i8, nxv8i16, nxv4i32, nxv2i64})
439 .clampNumElements(0, v8s8, v16s8)
440 .clampNumElements(0, v4s16, v8s16)
441 .clampNumElements(0, v2s32, v4s32)
442 .clampMaxNumElements(0, s64, 2)
445 .lower();
446
448 {G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FMA, G_FSQRT, G_FMAXNUM, G_FMINNUM,
449 G_FMAXIMUM, G_FMINIMUM, G_FCEIL, G_FFLOOR, G_FRINT, G_FNEARBYINT,
450 G_INTRINSIC_TRUNC, G_INTRINSIC_ROUND, G_INTRINSIC_ROUNDEVEN})
451 .legalFor({f32, f64, v2f32, v4f32, v2f64})
452 .legalFor(HasFP16, {f16, v4f16, v8f16})
453 .libcallFor({f128})
454 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
456 [=](const LegalityQuery &Q) {
457 return (!HasFP16 && Q.Types[0].getScalarType().isFloat16()) ||
458 Q.Types[0].getScalarType().isBFloat16();
459 },
460 changeElementTo(0, f32))
461 .clampNumElements(0, v4s16, v8s16)
462 .clampNumElements(0, v2s32, v4s32)
463 .clampNumElements(0, v2s64, v2s64)
465
466 getActionDefinitionsBuilder({G_FABS, G_FNEG})
467 .legalFor({f32, f64, v2f32, v4f32, v2f64})
468 .legalFor(HasFP16, {f16, v4f16, v8f16})
469 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
471 .clampNumElements(0, v4s16, v8s16)
472 .clampNumElements(0, v2s32, v4s32)
473 .clampNumElements(0, v2s64, v2s64)
475 .lowerFor({f16, v4f16, v8f16});
476
478 .libcallFor({f32, f64, f128})
479 .minScalar(0, f32)
480 .scalarize(0);
481
482 getActionDefinitionsBuilder({G_FCOS, G_FSIN, G_FPOW, G_FLOG, G_FLOG2,
483 G_FLOG10, G_FTAN, G_FEXP, G_FEXP2, G_FEXP10,
484 G_FACOS, G_FASIN, G_FATAN, G_FATAN2, G_FCOSH,
485 G_FSINH, G_FTANH, G_FMODF})
486 // We need a call for these, so we always need to scalarize.
487 .scalarize(0)
488 // Regardless of FP16 support, widen 16-bit elements to 32-bits.
489 .minScalar(0, f32)
490 .libcallFor({f32, f64, f128});
491 getActionDefinitionsBuilder({G_FPOWI, G_FLDEXP})
492 .scalarize(0)
493 .minScalar(0, f32)
494 .libcallFor({{f32, i32}, {f64, i32}, {f128, i32}});
495
496 getActionDefinitionsBuilder({G_LROUND, G_INTRINSIC_LRINT})
497 .legalFor({{i32, f32}, {i32, f64}, {i64, f32}, {i64, f64}})
498 .legalFor(HasFP16, {{i32, f16}, {i64, f16}})
499 .minScalar(1, s32)
500 .libcallFor({{s64, s128}})
501 .lower();
502 getActionDefinitionsBuilder({G_LLROUND, G_INTRINSIC_LLRINT})
503 .legalFor({{i64, f32}, {i64, f64}})
504 .legalFor(HasFP16, {{i64, f16}})
505 .minScalar(0, s64)
506 .minScalar(1, s32)
507 .libcallFor({{s64, s128}})
508 .lower();
509
510 // TODO: Custom legalization for mismatched types.
511 getActionDefinitionsBuilder(G_FCOPYSIGN)
513 [](const LegalityQuery &Query) { return Query.Types[0].isScalar(); },
514 [=](const LegalityQuery &Query) {
515 const LLT Ty = Query.Types[0];
516 return std::pair(0, LLT::fixed_vector(Ty == s16 ? 4 : 2, Ty));
517 })
518 .lower();
519
521
522 for (unsigned Op : {G_SEXTLOAD, G_ZEXTLOAD}) {
523 auto &Actions = getActionDefinitionsBuilder(Op);
524
525 if (Op == G_SEXTLOAD)
527
528 // Atomics have zero extending behavior.
529 Actions
530 .legalForTypesWithMemDesc({{s32, p0, s8, 8},
531 {s32, p0, s16, 8},
532 {s32, p0, s32, 8},
533 {s64, p0, s8, 2},
534 {s64, p0, s16, 2},
535 {s64, p0, s32, 4},
536 {s64, p0, s64, 8},
537 {p0, p0, s64, 8},
538 {v2s32, p0, s64, 8}})
539 .widenScalarToNextPow2(0)
540 .clampScalar(0, s32, s64)
541 // TODO: We could support sum-of-pow2's but the lowering code doesn't know
542 // how to do that yet.
543 .unsupportedIfMemSizeNotPow2()
544 // Lower anything left over into G_*EXT and G_LOAD
545 .lower();
546 }
547
548 auto IsPtrVecPred = [=](const LegalityQuery &Query) {
549 const LLT &ValTy = Query.Types[0];
550 return ValTy.isPointerVector() && ValTy.getAddressSpace() == 0;
551 };
552
554 .customIf([=](const LegalityQuery &Query) {
555 return HasRCPC3 && Query.Types[0] == s128 &&
556 Query.MMODescrs[0].Ordering == AtomicOrdering::Acquire;
557 })
558 .customIf([=](const LegalityQuery &Query) {
559 return Query.Types[0] == s128 &&
560 Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic;
561 })
562 .legalForTypesWithMemDesc({{s8, p0, s8, 8},
563 {s16, p0, s16, 8},
564 {s32, p0, s32, 8},
565 {s64, p0, s64, 8},
566 {p0, p0, s64, 8},
567 {s128, p0, s128, 8},
568 {v8s8, p0, s64, 8},
569 {v16s8, p0, s128, 8},
570 {v4s16, p0, s64, 8},
571 {v8s16, p0, s128, 8},
572 {v2s32, p0, s64, 8},
573 {v4s32, p0, s128, 8},
574 {v2s64, p0, s128, 8}})
575 // These extends are also legal
576 .legalForTypesWithMemDesc(
577 {{s32, p0, s8, 8}, {s32, p0, s16, 8}, {s64, p0, s32, 8}})
578 .legalForTypesWithMemDesc({
579 // SVE vscale x 128 bit base sizes
580 {nxv16s8, p0, nxv16s8, 8},
581 {nxv8s16, p0, nxv8s16, 8},
582 {nxv4s32, p0, nxv4s32, 8},
583 {nxv2s64, p0, nxv2s64, 8},
584 })
585 .widenScalarToNextPow2(0, /* MinSize = */ 8)
586 .clampMaxNumElements(0, s8, 16)
587 .clampMaxNumElements(0, s16, 8)
588 .clampMaxNumElements(0, s32, 4)
589 .clampMaxNumElements(0, s64, 2)
590 .clampMaxNumElements(0, p0, 2)
592 .clampScalar(0, s8, s64)
594 [=](const LegalityQuery &Query) {
595 // Clamp extending load results to 32-bits.
596 return Query.Types[0].isScalar() &&
597 Query.Types[0] != Query.MMODescrs[0].MemoryTy &&
598 Query.Types[0].getSizeInBits() > 32;
599 },
600 changeTo(0, s32))
601 // TODO: Use BITCAST for v2i8, v2i16 after G_TRUNC gets sorted out
602 .bitcastIf(typeInSet(0, {v4s8}),
603 [=](const LegalityQuery &Query) {
604 const LLT VecTy = Query.Types[0];
605 return std::pair(0, LLT::integer(VecTy.getSizeInBits()));
606 })
607 .customIf(IsPtrVecPred)
608 .scalarizeIf(typeInSet(0, {v2s16, v2s8}), 0)
609 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0);
610
612 .customIf([=](const LegalityQuery &Query) {
613 return HasRCPC3 && Query.Types[0] == s128 &&
614 Query.MMODescrs[0].Ordering == AtomicOrdering::Release;
615 })
616 .customIf([=](const LegalityQuery &Query) {
617 return Query.Types[0] == s128 &&
618 Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic;
619 })
620 .widenScalarIf(
621 all(scalarNarrowerThan(0, 32),
623 changeTo(0, s32))
625 {{s8, p0, s8, 8}, {s16, p0, s8, 8}, // truncstorei8 from s16
626 {s32, p0, s8, 8}, // truncstorei8 from s32
627 {s64, p0, s8, 8}, // truncstorei8 from s64
628 {s16, p0, s16, 8}, {s32, p0, s16, 8}, // truncstorei16 from s32
629 {s64, p0, s16, 8}, // truncstorei16 from s64
630 {s32, p0, s8, 8}, {s32, p0, s16, 8}, {s32, p0, s32, 8},
631 {s64, p0, s64, 8}, {s64, p0, s32, 8}, // truncstorei32 from s64
632 {p0, p0, s64, 8}, {s128, p0, s128, 8}, {v16s8, p0, s128, 8},
633 {v8s8, p0, s64, 8}, {v4s16, p0, s64, 8}, {v8s16, p0, s128, 8},
634 {v2s32, p0, s64, 8}, {v4s32, p0, s128, 8}, {v2s64, p0, s128, 8}})
635 .legalForTypesWithMemDesc({
636 // SVE vscale x 128 bit base sizes
637 // TODO: Add nxv2p0. Consider bitcastIf.
638 // See #92130
639 // https://github.com/llvm/llvm-project/pull/92130#discussion_r1616888461
640 {nxv16s8, p0, nxv16s8, 8},
641 {nxv8s16, p0, nxv8s16, 8},
642 {nxv4s32, p0, nxv4s32, 8},
643 {nxv2s64, p0, nxv2s64, 8},
644 })
645 .clampScalar(0, s8, s64)
646 .minScalarOrElt(0, s8)
647 .lowerIf([=](const LegalityQuery &Query) {
648 return Query.Types[0].isScalar() &&
649 Query.Types[0] != Query.MMODescrs[0].MemoryTy;
650 })
651 // Maximum: sN * k = 128
652 .clampMaxNumElements(0, s8, 16)
653 .clampMaxNumElements(0, s16, 8)
654 .clampMaxNumElements(0, s32, 4)
655 .clampMaxNumElements(0, s64, 2)
656 .clampMaxNumElements(0, p0, 2)
658 // TODO: Use BITCAST for v2i8, v2i16 after G_TRUNC gets sorted out
659 .bitcastIf(all(typeInSet(0, {v4s8}),
660 LegalityPredicate([=](const LegalityQuery &Query) {
661 return Query.Types[0].getSizeInBits() ==
662 Query.MMODescrs[0].MemoryTy.getSizeInBits();
663 })),
664 [=](const LegalityQuery &Query) {
665 const LLT VecTy = Query.Types[0];
666 return std::pair(0, LLT::integer(VecTy.getSizeInBits()));
667 })
668 .customIf(IsPtrVecPred)
669 .scalarizeIf(typeInSet(0, {v2s16, v2s8}), 0)
670 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
671 .lower();
672
673 getActionDefinitionsBuilder(G_INDEXED_STORE)
674 // Idx 0 == Ptr, Idx 1 == Val
675 // TODO: we can implement legalizations but as of now these are
676 // generated in a very specific way.
678 {p0, s8, s8, 8},
679 {p0, s16, s16, 8},
680 {p0, s32, s8, 8},
681 {p0, s32, s16, 8},
682 {p0, s32, s32, 8},
683 {p0, s64, s64, 8},
684 {p0, p0, p0, 8},
685 {p0, v8s8, v8s8, 8},
686 {p0, v16s8, v16s8, 8},
687 {p0, v4s16, v4s16, 8},
688 {p0, v8s16, v8s16, 8},
689 {p0, v2s32, v2s32, 8},
690 {p0, v4s32, v4s32, 8},
691 {p0, v2s64, v2s64, 8},
692 {p0, v2p0, v2p0, 8},
693 {p0, s128, s128, 8},
694 })
695 .unsupported();
696
697 auto IndexedLoadBasicPred = [=](const LegalityQuery &Query) {
698 LLT LdTy = Query.Types[0];
699 LLT PtrTy = Query.Types[1];
700 if (!llvm::is_contained(PackedVectorAllTypesVec, LdTy) &&
701 !llvm::is_contained(ScalarAndPtrTypesVec, LdTy) && LdTy != s128)
702 return false;
703 if (PtrTy != p0)
704 return false;
705 return true;
706 };
707 getActionDefinitionsBuilder(G_INDEXED_LOAD)
710 .legalIf(IndexedLoadBasicPred)
711 .unsupported();
712 getActionDefinitionsBuilder({G_INDEXED_SEXTLOAD, G_INDEXED_ZEXTLOAD})
713 .unsupportedIf(
715 .legalIf(all(typeInSet(0, {s16, s32, s64}),
716 LegalityPredicate([=](const LegalityQuery &Q) {
717 LLT LdTy = Q.Types[0];
718 LLT PtrTy = Q.Types[1];
719 LLT MemTy = Q.MMODescrs[0].MemoryTy;
720 if (PtrTy != p0)
721 return false;
722 if (LdTy == s16)
723 return MemTy == s8;
724 if (LdTy == s32)
725 return MemTy == s8 || MemTy == s16;
726 if (LdTy == s64)
727 return MemTy == s8 || MemTy == s16 || MemTy == s32;
728 return false;
729 })))
730 .unsupported();
731
732 // Constants
734 .legalFor({p0, s8, s16, s32, s64})
735 .widenScalarToNextPow2(0)
736 .clampScalar(0, s8, s64);
737 getActionDefinitionsBuilder(G_FCONSTANT)
738 .legalFor({s16, s32, s64, s128});
739
740 // FIXME: fix moreElementsToNextPow2
742 .legalFor({{i32, i32}, {i32, i64}, {i32, p0}})
744 .minScalarOrElt(1, s8)
745 .clampScalar(1, s32, s64)
746 .clampScalar(0, s32, s32)
749 [=](const LegalityQuery &Query) {
750 const LLT &Ty = Query.Types[0];
751 const LLT &SrcTy = Query.Types[1];
752 return Ty.isVector() && !SrcTy.isPointerVector() &&
753 Ty.getElementType() != SrcTy.getElementType();
754 },
755 0, 1)
756 .minScalarOrEltIf(
757 [=](const LegalityQuery &Query) { return Query.Types[1] == v2s16; },
758 1, s32)
759 .minScalarOrEltIf(
760 [=](const LegalityQuery &Query) {
761 return Query.Types[1].isPointerVector();
762 },
763 0, s64)
765 .clampNumElements(1, v8s8, v16s8)
766 .clampNumElements(1, v4s16, v8s16)
767 .clampNumElements(1, v2s32, v4s32)
768 .clampNumElements(1, v2s64, v2s64)
769 .clampNumElements(1, v2p0, v2p0)
770 .customIf(isVector(0));
771
773 .legalFor({{i32, f32},
774 {i32, f64},
775 {v4i32, v4f32},
776 {v2i32, v2f32},
777 {v2i64, v2f64}})
778 .legalFor(HasFP16, {{i32, f16}, {v4i16, v4f16}, {v8i16, v8f16}})
780 .clampScalar(0, s32, s32)
782 [=](const LegalityQuery &Q) {
783 return (!HasFP16 && Q.Types[1].getScalarType().isFloat16()) ||
784 Q.Types[1].getScalarType().isBFloat16();
785 },
786 changeElementTo(1, f32))
787 .scalarizeIf(scalarOrEltWiderThan(1, 64), 1)
789 [=](const LegalityQuery &Query) {
790 const LLT &Ty = Query.Types[0];
791 const LLT &SrcTy = Query.Types[1];
792 return Ty.isVector() && !SrcTy.isPointerVector() &&
793 Ty.getElementType() != SrcTy.getElementType();
794 },
795 0, 1)
796 .clampNumElements(1, v4s16, v8s16)
797 .clampNumElements(1, v2s32, v4s32)
798 .clampMaxNumElements(1, s64, 2)
800 .libcallFor({{s32, s128}});
801
802 // Extensions
803 auto ExtLegalFunc = [=](const LegalityQuery &Query) {
804 unsigned DstSize = Query.Types[0].getSizeInBits();
805
806 // Handle legal vectors using legalFor
807 if (Query.Types[0].isVector())
808 return false;
809
810 if (DstSize < 8 || DstSize >= 128 || !isPowerOf2_32(DstSize))
811 return false; // Extending to a scalar s128 needs narrowing.
812
813 const LLT &SrcTy = Query.Types[1];
814
815 // Make sure we fit in a register otherwise. Don't bother checking that
816 // the source type is below 128 bits. We shouldn't be allowing anything
817 // through which is wider than the destination in the first place.
818 unsigned SrcSize = SrcTy.getSizeInBits();
819 if (SrcSize < 8 || !isPowerOf2_32(SrcSize))
820 return false;
821
822 return true;
823 };
824 getActionDefinitionsBuilder({G_ZEXT, G_SEXT, G_ANYEXT})
825 .legalIf(ExtLegalFunc)
826 .legalFor({{v8s16, v8s8}, {v4s32, v4s16}, {v2s64, v2s32}})
827 .clampScalar(0, s64, s64) // Just for s128, others are handled above.
829 .clampMaxNumElements(1, s8, 8)
830 .clampMaxNumElements(1, s16, 4)
831 .clampMaxNumElements(1, s32, 2)
832 // Tries to convert a large EXTEND into two smaller EXTENDs
833 .lowerIf([=](const LegalityQuery &Query) {
834 return (Query.Types[0].getScalarSizeInBits() >
835 Query.Types[1].getScalarSizeInBits() * 2) &&
836 Query.Types[0].isVector() &&
837 (Query.Types[1].getScalarSizeInBits() == 8 ||
838 Query.Types[1].getScalarSizeInBits() == 16);
839 })
840 .clampMinNumElements(1, s8, 8)
841 .clampMinNumElements(1, s16, 4)
843
845 .legalFor({{v8s8, v8s16}, {v4s16, v4s32}, {v2s32, v2s64}})
847 .clampMaxNumElements(0, s8, 8)
848 .clampMaxNumElements(0, s16, 4)
849 .clampMaxNumElements(0, s32, 2)
851 [=](const LegalityQuery &Query) { return Query.Types[0].isVector(); },
852 0, s8)
853 .lowerIf([=](const LegalityQuery &Query) {
854 LLT DstTy = Query.Types[0];
855 LLT SrcTy = Query.Types[1];
856 return DstTy.isVector() && SrcTy.getSizeInBits() > 128 &&
857 DstTy.getScalarSizeInBits() * 2 <= SrcTy.getScalarSizeInBits();
858 })
859 .clampMinNumElements(0, s8, 8)
860 .clampMinNumElements(0, s16, 4)
861 .alwaysLegal();
862
863 getActionDefinitionsBuilder({G_TRUNC_SSAT_S, G_TRUNC_SSAT_U, G_TRUNC_USAT_U})
864 .legalFor({{v8i8, v8i16}, {v4i16, v4i32}, {v2i32, v2i64}})
865 .clampNumElements(0, v2s32, v2s32);
866
867 getActionDefinitionsBuilder(G_SEXT_INREG)
868 .legalFor({i32, i64, v8i8, v16i8, v4i16, v8i16, v2i32, v4i32, v2i64})
869 .maxScalar(0, s64)
870 .clampNumElements(0, v8s8, v16s8)
871 .clampNumElements(0, v4s16, v8s16)
872 .clampNumElements(0, v2s32, v4s32)
873 .clampMaxNumElements(0, s64, 2)
874 .lower();
875
876 // FP conversions
878 .legalFor(
879 {{f16, f32}, {f16, f64}, {f32, f64}, {v4f16, v4f32}, {v2f32, v2f64}})
880 .legalFor(ST.hasBF16(), {{bf16, f32}, {v4bf16, v4f32}})
881 .libcallFor({{f16, f128}, {f32, f128}, {f64, f128}})
883 .customIf([](const LegalityQuery &Q) {
884 LLT DstTy = Q.Types[0];
885 LLT SrcTy = Q.Types[1];
886 return SrcTy.getScalarSizeInBits() == 64 &&
887 DstTy.getScalarSizeInBits() == 16;
888 })
889 .lowerFor({{bf16, f32}, {v4bf16, v4f32}})
890 // Clamp based on input
891 .clampNumElements(1, v4s32, v4s32)
892 .clampNumElements(1, v2s64, v2s64)
893 .scalarize(0);
894
895 getActionDefinitionsBuilder(G_FPEXT)
896 .legalFor({{f32, f16},
897 {f64, f16},
898 {f32, bf16},
899 {f64, f32},
900 {v4f32, v4f16},
901 {v4f32, v4bf16},
902 {v2f64, v2f32}})
903 .libcallFor({{f128, f64}, {f128, f32}, {f128, f16}})
906 [](const LegalityQuery &Q) {
907 LLT DstTy = Q.Types[0];
908 LLT SrcTy = Q.Types[1];
909 return SrcTy.isVector() && DstTy.isVector() &&
910 SrcTy.getScalarSizeInBits() == 16 &&
911 DstTy.getScalarSizeInBits() == 64;
912 },
913 changeElementTo(1, f32))
914 .clampNumElements(0, v4s32, v4s32)
915 .clampNumElements(0, v2s64, v2s64)
916 .scalarize(0);
917
918 // Conversions
919 getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
920 .legalFor({{i32, f32},
921 {i64, f32},
922 {i32, f64},
923 {i64, f64},
924 {v2i32, v2f32},
925 {v4i32, v4f32},
926 {v2i64, v2f64}})
927 .legalFor(HasFP16,
928 {{i32, f16}, {i64, f16}, {v4i16, v4f16}, {v8i16, v8f16}})
929 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
931 // The range of a fp16 value fits into an i17, so we can lower the width
932 // to i64.
934 [=](const LegalityQuery &Query) {
935 return Query.Types[1] == f16 && Query.Types[0].getSizeInBits() > 64;
936 },
937 changeTo(0, i64))
940 .minScalar(0, s32)
941 .widenScalarOrEltToNextPow2OrMinSize(1, /*MinSize=*/HasFP16 ? 16 : 32)
943 [=](const LegalityQuery &Query) {
944 return Query.Types[0].getScalarSizeInBits() <= 64 &&
945 Query.Types[0].getScalarSizeInBits() >
946 Query.Types[1].getScalarSizeInBits();
947 },
949 .widenScalarIf(
950 [=](const LegalityQuery &Query) {
951 return Query.Types[1].getScalarSizeInBits() <= 64 &&
952 Query.Types[0].getScalarSizeInBits() <
953 Query.Types[1].getScalarSizeInBits();
954 },
956 .clampNumElements(0, v4s16, v8s16)
957 .clampNumElements(0, v2s32, v4s32)
958 .clampMaxNumElements(0, s64, 2)
959 .libcallFor(
960 {{i32, f128}, {i64, f128}, {i128, f128}, {i128, f32}, {i128, f64}});
961
962 getActionDefinitionsBuilder({G_FPTOSI_SAT, G_FPTOUI_SAT})
963 .legalFor({{i32, f32},
964 {i64, f32},
965 {i32, f64},
966 {i64, f64},
967 {v2i32, v2f32},
968 {v4i32, v4f32},
969 {v2i64, v2f64}})
970 .legalFor(
971 HasFP16,
972 {{i16, f16}, {i32, f16}, {i64, f16}, {v4i16, v4f16}, {v8i16, v8f16}})
973 // Handle types larger than i64 by scalarizing/lowering.
974 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
976 // The range of a fp16 value fits into an i17, so we can lower the width
977 // to i64.
979 [=](const LegalityQuery &Query) {
980 return Query.Types[1] == f16 && Query.Types[0].getSizeInBits() > 64;
981 },
982 changeTo(0, i64))
983 .lowerIf(::any(scalarWiderThan(0, 64), scalarWiderThan(1, 64)), 0)
985 .widenScalarToNextPow2(0, /*MinSize=*/32)
986 .minScalar(0, s32)
987 .widenScalarOrEltToNextPow2OrMinSize(1, /*MinSize=*/HasFP16 ? 16 : 32)
989 [=](const LegalityQuery &Query) {
990 unsigned ITySize = Query.Types[0].getScalarSizeInBits();
991 return (ITySize == 16 || ITySize == 32 || ITySize == 64) &&
992 ITySize > Query.Types[1].getScalarSizeInBits();
993 },
995 .widenScalarIf(
996 [=](const LegalityQuery &Query) {
997 unsigned FTySize = Query.Types[1].getScalarSizeInBits();
998 return (FTySize == 16 || FTySize == 32 || FTySize == 64) &&
999 Query.Types[0].getScalarSizeInBits() < FTySize;
1000 },
1003 .clampNumElements(0, v4s16, v8s16)
1004 .clampNumElements(0, v2s32, v4s32)
1005 .clampMaxNumElements(0, s64, 2);
1006
1007 getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
1008 .legalFor({{f32, i32},
1009 {f64, i32},
1010 {f32, i64},
1011 {f64, i64},
1012 {v2f32, v2i32},
1013 {v4f32, v4i32},
1014 {v2f64, v2i64}})
1015 .legalFor(HasFP16,
1016 {{f16, i32}, {f16, i64}, {v4f16, v4i16}, {v8f16, v8i16}})
1017 .unsupportedIf([&](const LegalityQuery &Query) {
1018 return Query.Types[0].getScalarType().isBFloat16();
1019 })
1020 .scalarizeIf(scalarOrEltWiderThan(1, 64), 1)
1024 .minScalar(1, f32)
1025 .lowerIf([](const LegalityQuery &Query) {
1026 return Query.Types[1].isVector() &&
1027 Query.Types[1].getScalarSizeInBits() == 64 &&
1028 Query.Types[0].getScalarSizeInBits() == 16;
1029 })
1030 .widenScalarOrEltToNextPow2OrMinSize(0, /*MinSize=*/HasFP16 ? 16 : 32)
1031 .scalarizeIf(
1032 // v2i64->v2f32 needs to scalarize to avoid double-rounding issues.
1033 [](const LegalityQuery &Query) {
1034 return Query.Types[0].getScalarSizeInBits() == 32 &&
1035 Query.Types[1].getScalarSizeInBits() == 64;
1036 },
1037 0)
1038 .widenScalarIf(
1039 [](const LegalityQuery &Query) {
1040 return Query.Types[1].getScalarSizeInBits() <= 64 &&
1041 Query.Types[0].getScalarSizeInBits() <
1042 Query.Types[1].getScalarSizeInBits();
1043 },
1045 .widenScalarIf(
1046 [](const LegalityQuery &Query) {
1047 return Query.Types[0].getScalarSizeInBits() <= 64 &&
1048 Query.Types[0].getScalarSizeInBits() >
1049 Query.Types[1].getScalarSizeInBits();
1050 },
1052 .clampNumElements(0, v4s16, v8s16)
1053 .clampNumElements(0, v2s32, v4s32)
1054 .clampMaxNumElements(0, s64, 2)
1055 .libcallFor({{f16, i128},
1056 {f32, i128},
1057 {f64, i128},
1058 {f128, i128},
1059 {f128, i32},
1060 {f128, i64}});
1061
1062 // Control-flow
1063 getActionDefinitionsBuilder(G_BR).alwaysLegal();
1064 getActionDefinitionsBuilder(G_BRCOND)
1065 .legalFor({s32})
1066 .clampScalar(0, s32, s32);
1067 getActionDefinitionsBuilder(G_BRINDIRECT).legalFor({p0});
1068
1069 getActionDefinitionsBuilder(G_SELECT)
1070 .legalFor({{s32, s32}, {s64, s32}, {p0, s32}})
1071 .widenScalarToNextPow2(0)
1072 .clampScalar(0, s32, s64)
1073 .clampScalar(1, s32, s32)
1076 .lowerIf(isVector(0));
1077
1078 // Pointer-handling
1079 getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0});
1080
1081 if (TM.getCodeModel() == CodeModel::Small)
1082 getActionDefinitionsBuilder(G_GLOBAL_VALUE).custom();
1083 else
1084 getActionDefinitionsBuilder(G_GLOBAL_VALUE).legalFor({p0});
1085
1086 getActionDefinitionsBuilder(G_PTRAUTH_GLOBAL_VALUE)
1087 .legalIf(all(typeIs(0, p0), typeIs(1, p0)));
1088
1089 getActionDefinitionsBuilder(G_PTRTOINT)
1090 .legalFor({{i64, p0}, {v2i64, v2p0}})
1091 .widenScalarToNextPow2(0, 64)
1092 .clampScalar(0, s64, s64)
1093 .clampMaxNumElements(0, s64, 2);
1094
1095 getActionDefinitionsBuilder(G_INTTOPTR)
1096 .unsupportedIf([&](const LegalityQuery &Query) {
1097 return Query.Types[0].getSizeInBits() != Query.Types[1].getSizeInBits();
1098 })
1099 .legalFor({{p0, i64}, {v2p0, v2i64}})
1100 .clampMaxNumElements(1, s64, 2);
1101
1102 // Casts for 32 and 64-bit width type are just copies.
1103 // Same for 128-bit width type, except they are on the FPR bank.
1104 getActionDefinitionsBuilder(G_BITCAST)
1106 // Keeping 32-bit instructions legal to prevent regression in some tests
1107 .legalForCartesianProduct({s32, v2s16, v4s8})
1108 .legalForCartesianProduct({s64, v8s8, v4s16, v2s32})
1109 .legalForCartesianProduct({s128, v16s8, v8s16, v4s32, v2s64, v2p0})
1110 .customIf([=](const LegalityQuery &Query) {
1111 // Handle casts from i1 vectors to scalars.
1112 LLT DstTy = Query.Types[0];
1113 LLT SrcTy = Query.Types[1];
1114 return DstTy.isScalar() && SrcTy.isVector() &&
1115 SrcTy.getScalarSizeInBits() == 1;
1116 })
1117 .lowerIf([=](const LegalityQuery &Query) {
1118 return Query.Types[0].isVector() != Query.Types[1].isVector();
1119 })
1121 .clampNumElements(0, v8s8, v16s8)
1122 .clampNumElements(0, v4s16, v8s16)
1123 .clampNumElements(0, v2s32, v4s32)
1124 .clampMaxNumElements(0, s64, 2)
1125 .lower();
1126
1127 getActionDefinitionsBuilder(G_VASTART).legalFor({p0});
1128
1129 // va_list must be a pointer, but most sized types are pretty easy to handle
1130 // as the destination.
1131 getActionDefinitionsBuilder(G_VAARG)
1132 .customForCartesianProduct({s8, s16, s32, s64, p0}, {p0})
1133 .clampScalar(0, s8, s64)
1134 .widenScalarToNextPow2(0, /*Min*/ 8);
1135
1136 getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG_WITH_SUCCESS)
1137 .lowerIf(
1138 all(typeInSet(0, {s8, s16, s32, s64, s128}), typeIs(2, p0)));
1139
1140 bool UseOutlineAtomics = ST.outlineAtomics() && !ST.hasLSE();
1141
1142 getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG)
1143 .legalFor(!UseOutlineAtomics, {{s32, p0}, {s64, p0}})
1144 .customFor(!UseOutlineAtomics, {{s128, p0}})
1145 .libcallFor(UseOutlineAtomics,
1146 {{s8, p0}, {s16, p0}, {s32, p0}, {s64, p0}, {s128, p0}})
1147 .clampScalar(0, s32, s64);
1148
1149 getActionDefinitionsBuilder({G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD,
1150 G_ATOMICRMW_SUB, G_ATOMICRMW_AND, G_ATOMICRMW_OR,
1151 G_ATOMICRMW_XOR})
1152 .legalFor(!UseOutlineAtomics, {{s32, p0}, {s64, p0}})
1153 .libcallFor(UseOutlineAtomics,
1154 {{s8, p0}, {s16, p0}, {s32, p0}, {s64, p0}})
1155 .clampScalar(0, s32, s64);
1156
1157 // Do not outline these atomics operations, as per comment in
1158 // AArch64ISelLowering.cpp's shouldExpandAtomicRMWInIR().
1159 getActionDefinitionsBuilder(
1160 {G_ATOMICRMW_MIN, G_ATOMICRMW_MAX, G_ATOMICRMW_UMIN, G_ATOMICRMW_UMAX})
1161 .legalIf(all(typeInSet(0, {s32, s64}), typeIs(1, p0)))
1162 .clampScalar(0, s32, s64);
1163
1164 getActionDefinitionsBuilder(G_BLOCK_ADDR).legalFor({p0});
1165
1166 // Merge/Unmerge
1167 for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
1168 unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
1169 unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0;
1170 getActionDefinitionsBuilder(Op)
1171 .widenScalarToNextPow2(LitTyIdx, 8)
1172 .widenScalarToNextPow2(BigTyIdx, 32)
1173 .clampScalar(LitTyIdx, s8, s64)
1174 .clampScalar(BigTyIdx, s32, s128)
1175 .legalIf([=](const LegalityQuery &Q) {
1176 switch (Q.Types[BigTyIdx].getSizeInBits()) {
1177 case 32:
1178 case 64:
1179 case 128:
1180 break;
1181 default:
1182 return false;
1183 }
1184 switch (Q.Types[LitTyIdx].getSizeInBits()) {
1185 case 8:
1186 case 16:
1187 case 32:
1188 case 64:
1189 return true;
1190 default:
1191 return false;
1192 }
1193 });
1194 }
1195
1196 // TODO : nxv4s16, nxv2s16, nxv2s32
1197 getActionDefinitionsBuilder(G_EXTRACT_VECTOR_ELT)
1198 .legalFor(HasSVE, {{s16, nxv16s8, s64},
1199 {s16, nxv8s16, s64},
1200 {s32, nxv4s32, s64},
1201 {s64, nxv2s64, s64}})
1202 .unsupportedIf([=](const LegalityQuery &Query) {
1203 const LLT &EltTy = Query.Types[1].getElementType();
1204 if (Query.Types[1].isScalableVector())
1205 return false;
1206 return Query.Types[0] != EltTy;
1207 })
1208 .minScalar(2, s64)
1209 .customIf([=](const LegalityQuery &Query) {
1210 const LLT &VecTy = Query.Types[1];
1211 return VecTy == v8s8 || VecTy == v16s8 || VecTy == v2s16 ||
1212 VecTy == v4s16 || VecTy == v8s16 || VecTy == v2s32 ||
1213 VecTy == v4s32 || VecTy == v2s64 || VecTy == v2p0;
1214 })
1215 .minScalarOrEltIf(
1216 [=](const LegalityQuery &Query) {
1217 // We want to promote to <M x s1> to <M x s64> if that wouldn't
1218 // cause the total vec size to be > 128b.
1219 return Query.Types[1].isFixedVector() &&
1220 Query.Types[1].getNumElements() <= 2;
1221 },
1222 0, s64)
1223 .minScalarOrEltIf(
1224 [=](const LegalityQuery &Query) {
1225 return Query.Types[1].isFixedVector() &&
1226 Query.Types[1].getNumElements() <= 4;
1227 },
1228 0, s32)
1229 .minScalarOrEltIf(
1230 [=](const LegalityQuery &Query) {
1231 return Query.Types[1].isFixedVector() &&
1232 Query.Types[1].getNumElements() <= 8;
1233 },
1234 0, s16)
1235 .minScalarOrEltIf(
1236 [=](const LegalityQuery &Query) {
1237 return Query.Types[1].isFixedVector() &&
1238 Query.Types[1].getNumElements() <= 16;
1239 },
1240 0, s8)
1241 .minScalarOrElt(0, s8) // Worst case, we need at least s8.
1242 .moreElementsToNextPow2(1)
1243 .clampMaxNumElements(1, s64, 2)
1244 .clampMaxNumElements(1, s32, 4)
1245 .clampMaxNumElements(1, s16, 8)
1246 .clampMaxNumElements(1, s8, 16)
1247 .clampMaxNumElements(1, p0, 2)
1248 .scalarizeIf(scalarOrEltWiderThan(1, 64), 1);
1249
1250 getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT)
1251 .legalIf(
1252 typeInSet(0, {v8s8, v16s8, v4s16, v8s16, v2s32, v4s32, v2s64, v2p0}))
1253 .legalFor(HasSVE, {{nxv16s8, s32, s64},
1254 {nxv8s16, s32, s64},
1255 {nxv4s32, s32, s64},
1256 {nxv2s64, s64, s64}})
1258 .widenVectorEltsToVectorMinSize(0, 64)
1259 .clampNumElements(0, v8s8, v16s8)
1260 .clampNumElements(0, v4s16, v8s16)
1261 .clampNumElements(0, v2s32, v4s32)
1262 .clampMaxNumElements(0, s64, 2)
1263 .clampMaxNumElements(0, p0, 2)
1264 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0);
1265
1266 getActionDefinitionsBuilder(G_BUILD_VECTOR)
1267 .legalFor({{v8s8, s8},
1268 {v16s8, s8},
1269 {v4s16, s16},
1270 {v8s16, s16},
1271 {v2s32, s32},
1272 {v4s32, s32},
1273 {v2s64, s64},
1274 {v2p0, p0}})
1275 .clampNumElements(0, v4s32, v4s32)
1276 .clampNumElements(0, v2s64, v2s64)
1277 .minScalarOrElt(0, s8)
1278 .widenVectorEltsToVectorMinSize(0, 64)
1279 .widenScalarOrEltToNextPow2(0)
1280 .minScalarSameAs(1, 0);
1281
1282 getActionDefinitionsBuilder(G_BUILD_VECTOR_TRUNC).lower();
1283
1284 getActionDefinitionsBuilder(G_SHUFFLE_VECTOR)
1285 .legalIf([=](const LegalityQuery &Query) {
1286 const LLT &DstTy = Query.Types[0];
1287 const LLT &SrcTy = Query.Types[1];
1288 // For now just support the TBL2 variant which needs the source vectors
1289 // to be the same size as the dest.
1290 if (DstTy != SrcTy)
1291 return false;
1292 return llvm::is_contained(
1293 {v8s8, v16s8, v4s16, v8s16, v2s32, v4s32, v2s64}, DstTy);
1294 })
1295 .moreElementsIf(
1296 [](const LegalityQuery &Query) {
1297 return Query.Types[0].getNumElements() >
1298 Query.Types[1].getNumElements();
1299 },
1300 changeTo(1, 0))
1302 .moreElementsIf(
1303 [](const LegalityQuery &Query) {
1304 return Query.Types[0].getNumElements() <
1305 Query.Types[1].getNumElements();
1306 },
1307 changeTo(0, 1))
1308 .widenScalarOrEltToNextPow2OrMinSize(0, 8)
1309 .clampNumElements(0, v8s8, v16s8)
1310 .clampNumElements(0, v4s16, v8s16)
1311 .clampNumElements(0, v4s32, v4s32)
1312 .clampNumElements(0, v2s64, v2s64)
1313 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
1314 .bitcastIf(isPointerVector(0), [=](const LegalityQuery &Query) {
1315 // Bitcast pointers vector to i64.
1316 const LLT DstTy = Query.Types[0];
1317 return std::pair(
1318 0, LLT::vector(DstTy.getElementCount(), LLT::integer(64)));
1319 });
1320
1321 getActionDefinitionsBuilder(G_CONCAT_VECTORS)
1322 .legalFor({{v16s8, v8s8}, {v8s16, v4s16}, {v4s32, v2s32}})
1323 .customIf([=](const LegalityQuery &Query) {
1324 return Query.Types[0].isFixedVector() &&
1325 Query.Types[0].getScalarSizeInBits() < 8;
1326 })
1327 .bitcastIf(
1328 [=](const LegalityQuery &Query) {
1329 return Query.Types[0].isFixedVector() &&
1330 Query.Types[1].isFixedVector() &&
1331 Query.Types[0].getScalarSizeInBits() >= 8 &&
1332 isPowerOf2_64(Query.Types[0].getScalarSizeInBits()) &&
1333 Query.Types[0].getSizeInBits() <= 128 &&
1334 Query.Types[1].getSizeInBits() <= 64;
1335 },
1336 [=](const LegalityQuery &Query) {
1337 const LLT DstTy = Query.Types[0];
1338 const LLT SrcTy = Query.Types[1];
1339 return std::pair(
1340 0, DstTy.changeElementSize(SrcTy.getSizeInBits())
1343 SrcTy.getNumElements())));
1344 });
1345
1346 getActionDefinitionsBuilder(G_EXTRACT_SUBVECTOR)
1347 .legalFor({{v8s8, v16s8}, {v4s16, v8s16}, {v2s32, v4s32}})
1349 .immIdx(0); // Inform verifier imm idx 0 is handled.
1350
1351 // TODO: {nxv16s8, s8}, {nxv8s16, s16}
1352 getActionDefinitionsBuilder(G_SPLAT_VECTOR)
1353 .legalFor(HasSVE, {{nxv4s32, s32}, {nxv2s64, s64}});
1354
1355 getActionDefinitionsBuilder(G_JUMP_TABLE).legalFor({p0});
1356
1357 getActionDefinitionsBuilder(G_BRJT).legalFor({{p0, s64}});
1358
1359 getActionDefinitionsBuilder({G_TRAP, G_DEBUGTRAP, G_UBSANTRAP}).alwaysLegal();
1360
1361 getActionDefinitionsBuilder(G_DYN_STACKALLOC).custom();
1362
1363 getActionDefinitionsBuilder({G_STACKSAVE, G_STACKRESTORE}).lower();
1364
1365 if (ST.hasMOPS()) {
1366 // G_BZERO is not supported. Currently it is only emitted by
1367 // PreLegalizerCombiner for G_MEMSET with zero constant.
1368 getActionDefinitionsBuilder(G_BZERO).unsupported();
1369
1370 getActionDefinitionsBuilder(G_MEMSET)
1371 .legalForCartesianProduct({p0}, {s64}, {s64})
1372 .customForCartesianProduct({p0}, {s8}, {s64})
1373 .immIdx(0); // Inform verifier imm idx 0 is handled.
1374
1375 getActionDefinitionsBuilder({G_MEMCPY, G_MEMMOVE})
1376 .legalForCartesianProduct({p0}, {p0}, {s64})
1377 .immIdx(0); // Inform verifier imm idx 0 is handled.
1378
1379 // G_MEMCPY_INLINE does not have a tailcall immediate
1380 getActionDefinitionsBuilder(G_MEMCPY_INLINE)
1381 .legalForCartesianProduct({p0}, {p0}, {s64});
1382
1383 } else {
1384 getActionDefinitionsBuilder({G_BZERO, G_MEMCPY, G_MEMMOVE, G_MEMSET})
1385 .libcall();
1386 }
1387
1388 // For fadd reductions we have pairwise operations available. We treat the
1389 // usual legal types as legal and handle the lowering to pairwise instructions
1390 // later.
1391 getActionDefinitionsBuilder(G_VECREDUCE_FADD)
1392 .legalFor({{f32, v2f32}, {f32, v4f32}, {f64, v2f64}})
1393 .legalFor(HasFP16, {{f16, v4f16}, {f16, v8f16}})
1394 .minScalarOrElt(0, MinFPScalar)
1395 .clampMaxNumElements(1, s64, 2)
1396 .clampMaxNumElements(1, s32, 4)
1397 .clampMaxNumElements(1, s16, 8)
1398 .moreElementsToNextPow2(1)
1399 .scalarize(1)
1400 .lower();
1401
1402 // For fmul reductions we need to split up into individual operations. We
1403 // clamp to 128 bit vectors then to 64bit vectors to produce a cascade of
1404 // smaller types, followed by scalarizing what remains.
1405 getActionDefinitionsBuilder(G_VECREDUCE_FMUL)
1406 .minScalarOrElt(0, MinFPScalar)
1407 .clampMaxNumElements(1, s64, 2)
1408 .clampMaxNumElements(1, s32, 4)
1409 .clampMaxNumElements(1, s16, 8)
1410 .clampMaxNumElements(1, s32, 2)
1411 .clampMaxNumElements(1, s16, 4)
1412 .scalarize(1)
1413 .lower();
1414
1415 getActionDefinitionsBuilder({G_VECREDUCE_SEQ_FADD, G_VECREDUCE_SEQ_FMUL})
1416 .scalarize(2)
1417 .lower();
1418
1419 getActionDefinitionsBuilder(G_VECREDUCE_ADD)
1420 .legalFor({{i8, v8i8},
1421 {i8, v16i8},
1422 {i16, v4i16},
1423 {i16, v8i16},
1424 {i32, v2i32},
1425 {i32, v4i32},
1426 {i64, v2i64}})
1428 .clampMaxNumElements(1, s64, 2)
1429 .clampMaxNumElements(1, s32, 4)
1430 .clampMaxNumElements(1, s16, 8)
1431 .clampMaxNumElements(1, s8, 16)
1432 .widenVectorEltsToVectorMinSize(1, 64)
1433 .scalarize(1);
1434
1435 getActionDefinitionsBuilder({G_VECREDUCE_FMIN, G_VECREDUCE_FMAX,
1436 G_VECREDUCE_FMINIMUM, G_VECREDUCE_FMAXIMUM})
1437 .legalFor({{f32, v2f32}, {f32, v4f32}, {f64, v2f64}})
1438 .legalFor(HasFP16, {{f16, v4f16}, {f16, v8f16}})
1439 .minScalarOrElt(0, MinFPScalar)
1440 .clampMaxNumElements(1, s64, 2)
1441 .clampMaxNumElements(1, s32, 4)
1442 .clampMaxNumElements(1, s16, 8)
1443 .scalarize(1)
1444 .lower();
1445
1446 getActionDefinitionsBuilder(G_VECREDUCE_MUL)
1447 .clampMaxNumElements(1, s32, 2)
1448 .clampMaxNumElements(1, s16, 4)
1449 .clampMaxNumElements(1, s8, 8)
1450 .scalarize(1)
1451 .lower();
1452
1453 getActionDefinitionsBuilder(
1454 {G_VECREDUCE_SMIN, G_VECREDUCE_SMAX, G_VECREDUCE_UMIN, G_VECREDUCE_UMAX})
1455 .legalFor({{i8, v8i8},
1456 {i8, v16i8},
1457 {i16, v4i16},
1458 {i16, v8i16},
1459 {i32, v2i32},
1460 {i32, v4i32}})
1461 .moreElementsIf(
1462 [=](const LegalityQuery &Query) {
1463 return Query.Types[1].isVector() &&
1464 Query.Types[1].getElementType() != s8 &&
1465 Query.Types[1].getNumElements() & 1;
1466 },
1468 .clampMaxNumElements(1, s64, 2)
1469 .clampMaxNumElements(1, s32, 4)
1470 .clampMaxNumElements(1, s16, 8)
1471 .clampMaxNumElements(1, s8, 16)
1472 .scalarize(1)
1473 .lower();
1474
1475 getActionDefinitionsBuilder(
1476 {G_VECREDUCE_OR, G_VECREDUCE_AND, G_VECREDUCE_XOR})
1477 // Try to break down into smaller vectors as long as they're at least 64
1478 // bits. This lets us use vector operations for some parts of the
1479 // reduction.
1480 .fewerElementsIf(
1481 [=](const LegalityQuery &Q) {
1482 LLT SrcTy = Q.Types[1];
1483 if (SrcTy.isScalar())
1484 return false;
1485 if (!isPowerOf2_32(SrcTy.getNumElements()))
1486 return false;
1487 // We can usually perform 64b vector operations.
1488 return SrcTy.getSizeInBits() > 64;
1489 },
1490 [=](const LegalityQuery &Q) {
1491 LLT SrcTy = Q.Types[1];
1492 return std::make_pair(1, SrcTy.divide(2));
1493 })
1494 .scalarize(1)
1495 .lower();
1496
1497 // TODO: Update this to correct handling when adding AArch64/SVE support.
1498 getActionDefinitionsBuilder(G_VECTOR_COMPRESS).lower();
1499
1500 // Access to floating-point environment.
1501 getActionDefinitionsBuilder({G_GET_FPENV, G_SET_FPENV, G_RESET_FPENV,
1502 G_GET_FPMODE, G_SET_FPMODE, G_RESET_FPMODE})
1503 .libcall();
1504
1505 getActionDefinitionsBuilder(G_IS_FPCLASS).lower();
1506
1507 getActionDefinitionsBuilder(G_PREFETCH).custom();
1508
1509 getActionDefinitionsBuilder({G_SCMP, G_UCMP}).lower();
1510
1511 getActionDefinitionsBuilder({G_INTRINSIC, G_INTRINSIC_W_SIDE_EFFECTS})
1512 .alwaysLegal();
1513 getActionDefinitionsBuilder(G_FENCE).alwaysLegal();
1514 getActionDefinitionsBuilder(G_INVOKE_REGION_START).alwaysLegal();
1515
1516 getLegacyLegalizerInfo().computeTables();
1517 verify(*ST.getInstrInfo());
1518}
1519
1522 LostDebugLocObserver &LocObserver) const {
1523 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1524 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
1525 GISelChangeObserver &Observer = Helper.Observer;
1526 switch (MI.getOpcode()) {
1527 default:
1528 // No idea what to do.
1529 return false;
1530 case TargetOpcode::G_VAARG:
1531 return legalizeVaArg(MI, MRI, MIRBuilder);
1532 case TargetOpcode::G_LOAD:
1533 case TargetOpcode::G_STORE:
1534 return legalizeLoadStore(MI, MRI, MIRBuilder, Observer);
1535 case TargetOpcode::G_SHL:
1536 case TargetOpcode::G_ASHR:
1537 case TargetOpcode::G_LSHR:
1538 return legalizeShlAshrLshr(MI, MRI, MIRBuilder, Observer);
1539 case TargetOpcode::G_GLOBAL_VALUE:
1540 return legalizeSmallCMGlobalValue(MI, MRI, MIRBuilder, Observer);
1541 case TargetOpcode::G_SBFX:
1542 case TargetOpcode::G_UBFX:
1543 return legalizeBitfieldExtract(MI, MRI, Helper);
1544 case TargetOpcode::G_FSHL:
1545 case TargetOpcode::G_FSHR:
1546 return legalizeFunnelShift(MI, MRI, MIRBuilder, Observer, Helper);
1547 case TargetOpcode::G_ROTR:
1548 return legalizeRotate(MI, MRI, Helper);
1549 case TargetOpcode::G_CTPOP:
1550 return legalizeCTPOP(MI, MRI, Helper);
1551 case TargetOpcode::G_ATOMIC_CMPXCHG:
1552 return legalizeAtomicCmpxchg128(MI, MRI, Helper);
1553 case TargetOpcode::G_CTTZ:
1554 return legalizeCTTZ(MI, Helper);
1555 case TargetOpcode::G_BZERO:
1556 case TargetOpcode::G_MEMCPY:
1557 case TargetOpcode::G_MEMMOVE:
1558 case TargetOpcode::G_MEMSET:
1559 return legalizeMemOps(MI, Helper);
1560 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1561 return legalizeExtractVectorElt(MI, MRI, Helper);
1562 case TargetOpcode::G_DYN_STACKALLOC:
1563 return legalizeDynStackAlloc(MI, Helper);
1564 case TargetOpcode::G_PREFETCH:
1565 return legalizePrefetch(MI, Helper);
1566 case TargetOpcode::G_ABS:
1567 return Helper.lowerAbsToCNeg(MI);
1568 case TargetOpcode::G_ICMP:
1569 return legalizeICMP(MI, MRI, MIRBuilder);
1570 case TargetOpcode::G_BITCAST:
1571 return legalizeBitcast(MI, Helper);
1572 case TargetOpcode::G_CONCAT_VECTORS:
1573 return legalizeConcatVectors(MI, MRI, MIRBuilder);
1574 case TargetOpcode::G_FPTRUNC:
1575 // In order to lower f16 to f64 properly, we need to use f32 as an
1576 // intermediary
1577 return legalizeFptrunc(MI, MIRBuilder, MRI);
1578 }
1579
1580 llvm_unreachable("expected switch to return");
1581}
1582
1583bool AArch64LegalizerInfo::legalizeBitcast(MachineInstr &MI,
1584 LegalizerHelper &Helper) const {
1585 assert(MI.getOpcode() == TargetOpcode::G_BITCAST && "Unexpected opcode");
1586 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
1587 // We're trying to handle casts from i1 vectors to scalars but reloading from
1588 // stack.
1589 if (!DstTy.isScalar() || !SrcTy.isVector() ||
1590 SrcTy.getElementType() != LLT::scalar(1))
1591 return false;
1592
1593 Helper.createStackStoreLoad(DstReg, SrcReg);
1594 MI.eraseFromParent();
1595 return true;
1596}
1597
1598bool AArch64LegalizerInfo::legalizeFunnelShift(MachineInstr &MI,
1600 MachineIRBuilder &MIRBuilder,
1601 GISelChangeObserver &Observer,
1602 LegalizerHelper &Helper) const {
1603 assert(MI.getOpcode() == TargetOpcode::G_FSHL ||
1604 MI.getOpcode() == TargetOpcode::G_FSHR);
1605
1606 // Keep as G_FSHR if shift amount is a G_CONSTANT, else use generic
1607 // lowering
1608 Register ShiftNo = MI.getOperand(3).getReg();
1609 LLT ShiftTy = MRI.getType(ShiftNo);
1610 auto VRegAndVal = getIConstantVRegValWithLookThrough(ShiftNo, MRI);
1611
1612 // Adjust shift amount according to Opcode (FSHL/FSHR)
1613 // Convert FSHL to FSHR
1614 LLT OperationTy = MRI.getType(MI.getOperand(0).getReg());
1615 APInt BitWidth(ShiftTy.getSizeInBits(), OperationTy.getSizeInBits(), false);
1616
1617 // Lower non-constant shifts and leave zero shifts to the optimizer.
1618 if (!VRegAndVal || VRegAndVal->Value.urem(BitWidth) == 0)
1619 return (Helper.lowerFunnelShiftAsShifts(MI) ==
1621
1622 APInt Amount = VRegAndVal->Value.urem(BitWidth);
1623
1624 Amount = MI.getOpcode() == TargetOpcode::G_FSHL ? BitWidth - Amount : Amount;
1625
1626 // If the instruction is G_FSHR, has a 64-bit G_CONSTANT for shift amount
1627 // in the range of 0 <-> BitWidth, it is legal
1628 if (ShiftTy.getSizeInBits() == 64 && MI.getOpcode() == TargetOpcode::G_FSHR &&
1629 VRegAndVal->Value.ult(BitWidth))
1630 return true;
1631
1632 // Cast the ShiftNumber to a 64-bit type
1633 auto Cast64 = MIRBuilder.buildConstant(LLT::integer(64), Amount.zext(64));
1634
1635 if (MI.getOpcode() == TargetOpcode::G_FSHR) {
1636 Observer.changingInstr(MI);
1637 MI.getOperand(3).setReg(Cast64.getReg(0));
1638 Observer.changedInstr(MI);
1639 }
1640 // If Opcode is FSHL, remove the FSHL instruction and create a FSHR
1641 // instruction
1642 else if (MI.getOpcode() == TargetOpcode::G_FSHL) {
1643 MIRBuilder.buildInstr(TargetOpcode::G_FSHR, {MI.getOperand(0).getReg()},
1644 {MI.getOperand(1).getReg(), MI.getOperand(2).getReg(),
1645 Cast64.getReg(0)});
1646 MI.eraseFromParent();
1647 }
1648 return true;
1649}
1650
1651bool AArch64LegalizerInfo::legalizeICMP(MachineInstr &MI,
1653 MachineIRBuilder &MIRBuilder) const {
1654 Register DstReg = MI.getOperand(0).getReg();
1655 Register SrcReg1 = MI.getOperand(2).getReg();
1656 Register SrcReg2 = MI.getOperand(3).getReg();
1657 LLT DstTy = MRI.getType(DstReg);
1658 LLT SrcTy = MRI.getType(SrcReg1);
1659
1660 // Check the vector types are legal
1661 if (DstTy.getScalarSizeInBits() != SrcTy.getScalarSizeInBits() ||
1662 DstTy.getNumElements() != SrcTy.getNumElements() ||
1663 (DstTy.getSizeInBits() != 64 && DstTy.getSizeInBits() != 128))
1664 return false;
1665
1666 // Lowers G_ICMP NE => G_ICMP EQ to allow better pattern matching for
1667 // following passes
1668 CmpInst::Predicate Pred = (CmpInst::Predicate)MI.getOperand(1).getPredicate();
1669 if (Pred != CmpInst::ICMP_NE)
1670 return true;
1671 Register CmpReg =
1672 MIRBuilder
1673 .buildICmp(CmpInst::ICMP_EQ, MRI.getType(DstReg), SrcReg1, SrcReg2)
1674 .getReg(0);
1675 MIRBuilder.buildNot(DstReg, CmpReg);
1676
1677 MI.eraseFromParent();
1678 return true;
1679}
1680
1681bool AArch64LegalizerInfo::legalizeRotate(MachineInstr &MI,
1683 LegalizerHelper &Helper) const {
1684 // To allow for imported patterns to match, we ensure that the rotate amount
1685 // is 64b with an extension.
1686 Register AmtReg = MI.getOperand(2).getReg();
1687 LLT AmtTy = MRI.getType(AmtReg);
1688 (void)AmtTy;
1689 assert(AmtTy.isScalar() && "Expected a scalar rotate");
1690 assert(AmtTy.getSizeInBits() < 64 && "Expected this rotate to be legal");
1691 auto NewAmt = Helper.MIRBuilder.buildZExt(LLT::integer(64), AmtReg);
1692 Helper.Observer.changingInstr(MI);
1693 MI.getOperand(2).setReg(NewAmt.getReg(0));
1694 Helper.Observer.changedInstr(MI);
1695 return true;
1696}
1697
1698bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(
1700 GISelChangeObserver &Observer) const {
1701 assert(MI.getOpcode() == TargetOpcode::G_GLOBAL_VALUE);
1702 // We do this custom legalization to convert G_GLOBAL_VALUE into target ADRP +
1703 // G_ADD_LOW instructions.
1704 // By splitting this here, we can optimize accesses in the small code model by
1705 // folding in the G_ADD_LOW into the load/store offset.
1706 auto &GlobalOp = MI.getOperand(1);
1707 // Don't modify an intrinsic call.
1708 if (GlobalOp.isSymbol())
1709 return true;
1710 const auto* GV = GlobalOp.getGlobal();
1711 if (GV->isThreadLocal())
1712 return true; // Don't want to modify TLS vars.
1713
1714 auto &TM = ST->getTargetLowering()->getTargetMachine();
1715 unsigned OpFlags = ST->ClassifyGlobalReference(GV, TM);
1716
1717 if (OpFlags & AArch64II::MO_GOT)
1718 return true;
1719
1720 auto Offset = GlobalOp.getOffset();
1721 Register DstReg = MI.getOperand(0).getReg();
1722 auto ADRP = MIRBuilder.buildInstr(AArch64::ADRP, {LLT::pointer(0, 64)}, {})
1723 .addGlobalAddress(GV, Offset, OpFlags | AArch64II::MO_PAGE);
1724 // Set the regclass on the dest reg too.
1725 MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass);
1726
1727 // MO_TAGGED on the page indicates a tagged address. Set the tag now. We do so
1728 // by creating a MOVK that sets bits 48-63 of the register to (global address
1729 // + 0x100000000 - PC) >> 48. The additional 0x100000000 offset here is to
1730 // prevent an incorrect tag being generated during relocation when the
1731 // global appears before the code section. Without the offset, a global at
1732 // `0x0f00'0000'0000'1000` (i.e. at `0x1000` with tag `0xf`) that's referenced
1733 // by code at `0x2000` would result in `0x0f00'0000'0000'1000 - 0x2000 =
1734 // 0x0eff'ffff'ffff'f000`, meaning the tag would be incorrectly set to `0xe`
1735 // instead of `0xf`.
1736 // This assumes that we're in the small code model so we can assume a binary
1737 // size of <= 4GB, which makes the untagged PC relative offset positive. The
1738 // binary must also be loaded into address range [0, 2^48). Both of these
1739 // properties need to be ensured at runtime when using tagged addresses.
1740 if (OpFlags & AArch64II::MO_TAGGED) {
1741 assert(!Offset &&
1742 "Should not have folded in an offset for a tagged global!");
1743 ADRP = MIRBuilder.buildInstr(AArch64::MOVKXi, {LLT::pointer(0, 64)}, {ADRP})
1744 .addGlobalAddress(GV, 0x100000000,
1746 .addImm(48);
1747 MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass);
1748 }
1749
1750 MIRBuilder.buildInstr(AArch64::G_ADD_LOW, {DstReg}, {ADRP})
1751 .addGlobalAddress(GV, Offset,
1753 MI.eraseFromParent();
1754 return true;
1755}
1756
1758 MachineInstr &MI) const {
1759 MachineIRBuilder &MIB = Helper.MIRBuilder;
1760 MachineRegisterInfo &MRI = *MIB.getMRI();
1761
1762 auto LowerUnaryOp = [&MI, &MIB](unsigned Opcode) {
1763 MIB.buildInstr(Opcode, {MI.getOperand(0)}, {MI.getOperand(2)});
1764 MI.eraseFromParent();
1765 return true;
1766 };
1767 auto LowerBinOp = [&MI, &MIB](unsigned Opcode) {
1768 MIB.buildInstr(Opcode, {MI.getOperand(0)},
1769 {MI.getOperand(2), MI.getOperand(3)});
1770 MI.eraseFromParent();
1771 return true;
1772 };
1773 auto LowerTriOp = [&MI, &MIB](unsigned Opcode) {
1774 MIB.buildInstr(Opcode, {MI.getOperand(0)},
1775 {MI.getOperand(2), MI.getOperand(3), MI.getOperand(4)});
1776 MI.eraseFromParent();
1777 return true;
1778 };
1779
1780 Intrinsic::ID IntrinsicID = cast<GIntrinsic>(MI).getIntrinsicID();
1781 switch (IntrinsicID) {
1782 case Intrinsic::vacopy: {
1783 unsigned PtrSize = ST->isTargetILP32() ? 4 : 8;
1784 unsigned VaListSize =
1785 (ST->isTargetDarwin() || ST->isTargetWindows())
1786 ? PtrSize
1787 : ST->isTargetILP32() ? 20 : 32;
1788
1789 MachineFunction &MF = *MI.getMF();
1791 LLT::scalar(VaListSize * 8));
1792 MIB.buildLoad(Val, MI.getOperand(2),
1795 VaListSize, Align(PtrSize)));
1796 MIB.buildStore(Val, MI.getOperand(1),
1799 VaListSize, Align(PtrSize)));
1800 MI.eraseFromParent();
1801 return true;
1802 }
1803 case Intrinsic::get_dynamic_area_offset: {
1804 MIB.buildConstant(MI.getOperand(0).getReg(), 0);
1805 MI.eraseFromParent();
1806 return true;
1807 }
1808 case Intrinsic::aarch64_mops_memset_tag: {
1809 assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
1810 // Anyext the value being set to 64 bit (only the bottom 8 bits are read by
1811 // the instruction).
1812 auto &Value = MI.getOperand(3);
1813 Register ExtValueReg = MIB.buildAnyExt(LLT::integer(64), Value).getReg(0);
1814 Value.setReg(ExtValueReg);
1815 return true;
1816 }
1817 case Intrinsic::aarch64_prefetch: {
1818 auto &AddrVal = MI.getOperand(1);
1819
1820 int64_t IsWrite = MI.getOperand(2).getImm();
1821 int64_t Target = MI.getOperand(3).getImm();
1822 int64_t IsStream = MI.getOperand(4).getImm();
1823 int64_t IsData = MI.getOperand(5).getImm();
1824
1825 unsigned PrfOp = (IsWrite << 4) | // Load/Store bit
1826 (!IsData << 3) | // IsDataCache bit
1827 (Target << 1) | // Cache level bits
1828 (unsigned)IsStream; // Stream bit
1829
1830 MIB.buildInstr(AArch64::G_AARCH64_PREFETCH).addImm(PrfOp).add(AddrVal);
1831 MI.eraseFromParent();
1832 return true;
1833 }
1834 case Intrinsic::aarch64_range_prefetch: {
1835 auto &AddrVal = MI.getOperand(1);
1836
1837 int64_t IsWrite = MI.getOperand(2).getImm();
1838 int64_t IsStream = MI.getOperand(3).getImm();
1839 unsigned PrfOp = (IsStream << 2) | IsWrite;
1840
1841 MIB.buildInstr(AArch64::G_AARCH64_RANGE_PREFETCH)
1842 .addImm(PrfOp)
1843 .add(AddrVal)
1844 .addUse(MI.getOperand(4).getReg()); // Metadata
1845 MI.eraseFromParent();
1846 return true;
1847 }
1848 case Intrinsic::aarch64_prefetch_ir: {
1849 auto &AddrVal = MI.getOperand(1);
1850 MIB.buildInstr(AArch64::G_AARCH64_PREFETCH).addImm(24).add(AddrVal);
1851 MI.eraseFromParent();
1852 return true;
1853 }
1854 case Intrinsic::aarch64_neon_uaddv:
1855 case Intrinsic::aarch64_neon_saddv:
1856 case Intrinsic::aarch64_neon_umaxv:
1857 case Intrinsic::aarch64_neon_smaxv:
1858 case Intrinsic::aarch64_neon_uminv:
1859 case Intrinsic::aarch64_neon_sminv: {
1860 bool IsSigned = IntrinsicID == Intrinsic::aarch64_neon_saddv ||
1861 IntrinsicID == Intrinsic::aarch64_neon_smaxv ||
1862 IntrinsicID == Intrinsic::aarch64_neon_sminv;
1863
1864 auto OldDst = MI.getOperand(0).getReg();
1865 auto OldDstTy = MRI.getType(OldDst);
1866 LLT NewDstTy = MRI.getType(MI.getOperand(2).getReg()).getElementType();
1867 if (OldDstTy == NewDstTy)
1868 return true;
1869
1870 auto NewDst = MRI.createGenericVirtualRegister(NewDstTy);
1871
1872 Helper.Observer.changingInstr(MI);
1873 MI.getOperand(0).setReg(NewDst);
1874 Helper.Observer.changedInstr(MI);
1875
1876 MIB.setInsertPt(MIB.getMBB(), ++MIB.getInsertPt());
1877 MIB.buildExtOrTrunc(IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT,
1878 OldDst, NewDst);
1879
1880 return true;
1881 }
1882 case Intrinsic::aarch64_neon_uaddlp:
1883 case Intrinsic::aarch64_neon_saddlp: {
1884 unsigned Opc = IntrinsicID == Intrinsic::aarch64_neon_uaddlp
1885 ? AArch64::G_UADDLP
1886 : AArch64::G_SADDLP;
1887 MIB.buildInstr(Opc, {MI.getOperand(0)}, {MI.getOperand(2)});
1888 MI.eraseFromParent();
1889
1890 return true;
1891 }
1892 case Intrinsic::aarch64_neon_uaddlv:
1893 case Intrinsic::aarch64_neon_saddlv: {
1894 unsigned Opc = IntrinsicID == Intrinsic::aarch64_neon_uaddlv
1895 ? AArch64::G_UADDLV
1896 : AArch64::G_SADDLV;
1897 Register DstReg = MI.getOperand(0).getReg();
1898 Register SrcReg = MI.getOperand(2).getReg();
1899 LLT DstTy = MRI.getType(DstReg);
1900
1901 LLT MidTy, ExtTy;
1902 if (DstTy.isScalar() && DstTy.getScalarSizeInBits() <= 32) {
1903 ExtTy = LLT::integer(32);
1904 MidTy = LLT::fixed_vector(4, ExtTy);
1905 } else {
1906 ExtTy = LLT::integer(64);
1907 MidTy = LLT::fixed_vector(2, ExtTy);
1908 }
1909
1910 Register MidReg =
1911 MIB.buildInstr(Opc, {MidTy}, {SrcReg})->getOperand(0).getReg();
1912 Register ZeroReg =
1913 MIB.buildConstant(LLT::integer(64), 0)->getOperand(0).getReg();
1914 Register ExtReg = MIB.buildInstr(AArch64::G_EXTRACT_VECTOR_ELT, {ExtTy},
1915 {MidReg, ZeroReg})
1916 .getReg(0);
1917
1918 if (DstTy.getScalarSizeInBits() < 32)
1919 MIB.buildTrunc(DstReg, ExtReg);
1920 else
1921 MIB.buildCopy(DstReg, ExtReg);
1922
1923 MI.eraseFromParent();
1924
1925 return true;
1926 }
1927 case Intrinsic::aarch64_neon_smax:
1928 return LowerBinOp(TargetOpcode::G_SMAX);
1929 case Intrinsic::aarch64_neon_smin:
1930 return LowerBinOp(TargetOpcode::G_SMIN);
1931 case Intrinsic::aarch64_neon_umax:
1932 return LowerBinOp(TargetOpcode::G_UMAX);
1933 case Intrinsic::aarch64_neon_umin:
1934 return LowerBinOp(TargetOpcode::G_UMIN);
1935 case Intrinsic::aarch64_neon_fmax:
1936 return LowerBinOp(TargetOpcode::G_FMAXIMUM);
1937 case Intrinsic::aarch64_neon_fmin:
1938 return LowerBinOp(TargetOpcode::G_FMINIMUM);
1939 case Intrinsic::aarch64_neon_fmaxnm:
1940 return LowerBinOp(TargetOpcode::G_FMAXNUM);
1941 case Intrinsic::aarch64_neon_fminnm:
1942 return LowerBinOp(TargetOpcode::G_FMINNUM);
1943 case Intrinsic::aarch64_neon_pmull:
1944 case Intrinsic::aarch64_neon_pmull64:
1945 return LowerBinOp(AArch64::G_PMULL);
1946 case Intrinsic::aarch64_neon_smull:
1947 return LowerBinOp(AArch64::G_SMULL);
1948 case Intrinsic::aarch64_neon_umull:
1949 return LowerBinOp(AArch64::G_UMULL);
1950 case Intrinsic::aarch64_neon_sabd:
1951 return LowerBinOp(TargetOpcode::G_ABDS);
1952 case Intrinsic::aarch64_neon_uabd:
1953 return LowerBinOp(TargetOpcode::G_ABDU);
1954 case Intrinsic::aarch64_neon_uhadd:
1955 return LowerBinOp(TargetOpcode::G_UAVGFLOOR);
1956 case Intrinsic::aarch64_neon_urhadd:
1957 return LowerBinOp(TargetOpcode::G_UAVGCEIL);
1958 case Intrinsic::aarch64_neon_shadd:
1959 return LowerBinOp(TargetOpcode::G_SAVGFLOOR);
1960 case Intrinsic::aarch64_neon_srhadd:
1961 return LowerBinOp(TargetOpcode::G_SAVGCEIL);
1962 case Intrinsic::aarch64_neon_sqshrn: {
1963 if (!MRI.getType(MI.getOperand(0).getReg()).isVector())
1964 return true;
1965 // Create right shift instruction. Store the output register in Shr.
1966 auto Shr = MIB.buildInstr(AArch64::G_VASHR,
1967 {MRI.getType(MI.getOperand(2).getReg())},
1968 {MI.getOperand(2), MI.getOperand(3).getImm()});
1969 // Build the narrow intrinsic, taking in Shr.
1970 MIB.buildInstr(TargetOpcode::G_TRUNC_SSAT_S, {MI.getOperand(0)}, {Shr});
1971 MI.eraseFromParent();
1972 return true;
1973 }
1974 case Intrinsic::aarch64_neon_sqshrun: {
1975 if (!MRI.getType(MI.getOperand(0).getReg()).isVector())
1976 return true;
1977 // Create right shift instruction. Store the output register in Shr.
1978 auto Shr = MIB.buildInstr(AArch64::G_VASHR,
1979 {MRI.getType(MI.getOperand(2).getReg())},
1980 {MI.getOperand(2), MI.getOperand(3).getImm()});
1981 // Build the narrow intrinsic, taking in Shr.
1982 MIB.buildInstr(TargetOpcode::G_TRUNC_SSAT_U, {MI.getOperand(0)}, {Shr});
1983 MI.eraseFromParent();
1984 return true;
1985 }
1986 case Intrinsic::aarch64_neon_sqrshrn: {
1987 if (!MRI.getType(MI.getOperand(0).getReg()).isVector())
1988 return true;
1989 // Create right shift instruction. Store the output register in Shr.
1990 auto Shr = MIB.buildInstr(AArch64::G_SRSHR_I,
1991 {MRI.getType(MI.getOperand(2).getReg())},
1992 {MI.getOperand(2), MI.getOperand(3).getImm()});
1993 // Build the narrow intrinsic, taking in Shr.
1994 MIB.buildInstr(TargetOpcode::G_TRUNC_SSAT_S, {MI.getOperand(0)}, {Shr});
1995 MI.eraseFromParent();
1996 return true;
1997 }
1998 case Intrinsic::aarch64_neon_sqrshrun: {
1999 if (!MRI.getType(MI.getOperand(0).getReg()).isVector())
2000 return true;
2001 // Create right shift instruction. Store the output register in Shr.
2002 auto Shr = MIB.buildInstr(AArch64::G_SRSHR_I,
2003 {MRI.getType(MI.getOperand(2).getReg())},
2004 {MI.getOperand(2), MI.getOperand(3).getImm()});
2005 // Build the narrow intrinsic, taking in Shr.
2006 MIB.buildInstr(TargetOpcode::G_TRUNC_SSAT_U, {MI.getOperand(0)}, {Shr});
2007 MI.eraseFromParent();
2008 return true;
2009 }
2010 case Intrinsic::aarch64_neon_uqrshrn: {
2011 if (!MRI.getType(MI.getOperand(0).getReg()).isVector())
2012 return true;
2013 // Create right shift instruction. Store the output register in Shr.
2014 auto Shr = MIB.buildInstr(AArch64::G_URSHR_I,
2015 {MRI.getType(MI.getOperand(2).getReg())},
2016 {MI.getOperand(2), MI.getOperand(3).getImm()});
2017 // Build the narrow intrinsic, taking in Shr.
2018 MIB.buildInstr(TargetOpcode::G_TRUNC_USAT_U, {MI.getOperand(0)}, {Shr});
2019 MI.eraseFromParent();
2020 return true;
2021 }
2022 case Intrinsic::aarch64_neon_uqshrn: {
2023 if (!MRI.getType(MI.getOperand(0).getReg()).isVector())
2024 return true;
2025 // Create right shift instruction. Store the output register in Shr.
2026 auto Shr = MIB.buildInstr(AArch64::G_VLSHR,
2027 {MRI.getType(MI.getOperand(2).getReg())},
2028 {MI.getOperand(2), MI.getOperand(3).getImm()});
2029 // Build the narrow intrinsic, taking in Shr.
2030 MIB.buildInstr(TargetOpcode::G_TRUNC_USAT_U, {MI.getOperand(0)}, {Shr});
2031 MI.eraseFromParent();
2032 return true;
2033 }
2034 case Intrinsic::aarch64_neon_sqshlu: {
2035 // Check if last operand is constant vector dup
2036 auto ShiftAmount = isConstantOrConstantSplatVector(
2037 *MRI.getVRegDef(MI.getOperand(3).getReg()), MRI);
2038 if (ShiftAmount) {
2039 // If so, create a new intrinsic with the correct shift amount
2040 MIB.buildInstr(AArch64::G_SQSHLU_I, {MI.getOperand(0)},
2041 {MI.getOperand(2)})
2042 .addImm(ShiftAmount->getSExtValue());
2043 MI.eraseFromParent();
2044 return true;
2045 }
2046 return false;
2047 }
2048 case Intrinsic::aarch64_neon_vsli: {
2049 MIB.buildInstr(
2050 AArch64::G_SLI, {MI.getOperand(0)},
2051 {MI.getOperand(2), MI.getOperand(3), MI.getOperand(4).getImm()});
2052 MI.eraseFromParent();
2053 break;
2054 }
2055 case Intrinsic::aarch64_neon_vsri: {
2056 MIB.buildInstr(
2057 AArch64::G_SRI, {MI.getOperand(0)},
2058 {MI.getOperand(2), MI.getOperand(3), MI.getOperand(4).getImm()});
2059 MI.eraseFromParent();
2060 break;
2061 }
2062 case Intrinsic::aarch64_neon_abs: {
2063 // Lower the intrinsic to G_ABS.
2064 MIB.buildInstr(TargetOpcode::G_ABS, {MI.getOperand(0)}, {MI.getOperand(2)});
2065 MI.eraseFromParent();
2066 return true;
2067 }
2068 case Intrinsic::aarch64_neon_sqadd: {
2069 if (MRI.getType(MI.getOperand(0).getReg()).isVector())
2070 return LowerBinOp(TargetOpcode::G_SADDSAT);
2071 break;
2072 }
2073 case Intrinsic::aarch64_neon_sqsub: {
2074 if (MRI.getType(MI.getOperand(0).getReg()).isVector())
2075 return LowerBinOp(TargetOpcode::G_SSUBSAT);
2076 break;
2077 }
2078 case Intrinsic::aarch64_neon_uqadd: {
2079 if (MRI.getType(MI.getOperand(0).getReg()).isVector())
2080 return LowerBinOp(TargetOpcode::G_UADDSAT);
2081 break;
2082 }
2083 case Intrinsic::aarch64_neon_uqsub: {
2084 if (MRI.getType(MI.getOperand(0).getReg()).isVector())
2085 return LowerBinOp(TargetOpcode::G_USUBSAT);
2086 break;
2087 }
2088 case Intrinsic::aarch64_neon_udot:
2089 return LowerTriOp(AArch64::G_UDOT);
2090 case Intrinsic::aarch64_neon_sdot:
2091 return LowerTriOp(AArch64::G_SDOT);
2092 case Intrinsic::aarch64_neon_usdot:
2093 return LowerTriOp(AArch64::G_USDOT);
2094 case Intrinsic::aarch64_neon_sqxtn:
2095 return LowerUnaryOp(TargetOpcode::G_TRUNC_SSAT_S);
2096 case Intrinsic::aarch64_neon_sqxtun:
2097 return LowerUnaryOp(TargetOpcode::G_TRUNC_SSAT_U);
2098 case Intrinsic::aarch64_neon_uqxtn:
2099 return LowerUnaryOp(TargetOpcode::G_TRUNC_USAT_U);
2100 case Intrinsic::aarch64_neon_fcvtzu:
2101 return LowerUnaryOp(TargetOpcode::G_FPTOUI_SAT);
2102 case Intrinsic::aarch64_neon_fcvtzs:
2103 return LowerUnaryOp(TargetOpcode::G_FPTOSI_SAT);
2104
2105 case Intrinsic::vector_reverse:
2106 // TODO: Add support for vector_reverse
2107 return false;
2108 }
2109
2110 return true;
2111}
2112
2113bool AArch64LegalizerInfo::legalizeShlAshrLshr(
2115 GISelChangeObserver &Observer) const {
2116 assert(MI.getOpcode() == TargetOpcode::G_ASHR ||
2117 MI.getOpcode() == TargetOpcode::G_LSHR ||
2118 MI.getOpcode() == TargetOpcode::G_SHL);
2119 // If the shift amount is a G_CONSTANT, promote it to a 64 bit type so the
2120 // imported patterns can select it later. Either way, it will be legal.
2121 Register AmtReg = MI.getOperand(2).getReg();
2122 LLT AmtRegEltTy = MRI.getType(AmtReg).getScalarType();
2123 auto VRegAndVal = getIConstantVRegValWithLookThrough(AmtReg, MRI);
2124 if (!VRegAndVal)
2125 return true;
2126 // Check the shift amount is in range for an immediate form.
2127 int64_t Amount = VRegAndVal->Value.getSExtValue();
2128 if (Amount > 31)
2129 return true; // This will have to remain a register variant.
2130 auto ExtCst =
2131 MIRBuilder.buildConstant(AmtRegEltTy.changeElementSize(64), Amount);
2132 Observer.changingInstr(MI);
2133 MI.getOperand(2).setReg(ExtCst.getReg(0));
2134 Observer.changedInstr(MI);
2135 return true;
2136}
2137
2139 MachineRegisterInfo &MRI) {
2140 Base = Root;
2141 Offset = 0;
2142
2143 Register NewBase;
2144 int64_t NewOffset;
2145 if (mi_match(Root, MRI, m_GPtrAdd(m_Reg(NewBase), m_ICst(NewOffset))) &&
2146 isShiftedInt<7, 3>(NewOffset)) {
2147 Base = NewBase;
2148 Offset = NewOffset;
2149 }
2150}
2151
2152// FIXME: This should be removed and replaced with the generic bitcast legalize
2153// action.
2154bool AArch64LegalizerInfo::legalizeLoadStore(
2156 GISelChangeObserver &Observer) const {
2157 assert(MI.getOpcode() == TargetOpcode::G_STORE ||
2158 MI.getOpcode() == TargetOpcode::G_LOAD);
2159 // Here we just try to handle vector loads/stores where our value type might
2160 // have pointer elements, which the SelectionDAG importer can't handle. To
2161 // allow the existing patterns for s64 to fire for p0, we just try to bitcast
2162 // the value to use s64 types.
2163
2164 // Custom legalization requires the instruction, if not deleted, must be fully
2165 // legalized. In order to allow further legalization of the inst, we create
2166 // a new instruction and erase the existing one.
2167
2168 Register ValReg = MI.getOperand(0).getReg();
2169 const LLT ValTy = MRI.getType(ValReg);
2170
2171 if (ValTy == LLT::scalar(128)) {
2172
2173 AtomicOrdering Ordering = (*MI.memoperands_begin())->getSuccessOrdering();
2174 bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD;
2175 bool IsLoadAcquire = IsLoad && Ordering == AtomicOrdering::Acquire;
2176 bool IsStoreRelease = !IsLoad && Ordering == AtomicOrdering::Release;
2177 bool IsRcpC3 =
2178 ST->hasLSE2() && ST->hasRCPC3() && (IsLoadAcquire || IsStoreRelease);
2179
2180 LLT s64 = LLT::integer(64);
2181
2182 unsigned Opcode;
2183 if (IsRcpC3) {
2184 Opcode = IsLoad ? AArch64::LDIAPPX : AArch64::STILPX;
2185 } else {
2186 // For LSE2, loads/stores should have been converted to monotonic and had
2187 // a fence inserted after them.
2188 assert(Ordering == AtomicOrdering::Monotonic ||
2189 Ordering == AtomicOrdering::Unordered);
2190 assert(ST->hasLSE2() && "ldp/stp not single copy atomic without +lse2");
2191
2192 Opcode = IsLoad ? AArch64::LDPXi : AArch64::STPXi;
2193 }
2194
2195 MachineInstrBuilder NewI;
2196 if (IsLoad) {
2197 NewI = MIRBuilder.buildInstr(Opcode, {s64, s64}, {});
2198 MIRBuilder.buildMergeLikeInstr(
2199 ValReg, {NewI->getOperand(0), NewI->getOperand(1)});
2200 } else {
2201 auto Split = MIRBuilder.buildUnmerge(s64, MI.getOperand(0));
2202 NewI = MIRBuilder.buildInstr(
2203 Opcode, {}, {Split->getOperand(0), Split->getOperand(1)});
2204 }
2205
2206 if (IsRcpC3) {
2207 NewI.addUse(MI.getOperand(1).getReg());
2208 } else {
2209 Register Base;
2210 int Offset;
2211 matchLDPSTPAddrMode(MI.getOperand(1).getReg(), Base, Offset, MRI);
2212 NewI.addUse(Base);
2213 NewI.addImm(Offset / 8);
2214 }
2215
2216 NewI.cloneMemRefs(MI);
2217 constrainSelectedInstRegOperands(*NewI, *ST->getInstrInfo(),
2218 *MRI.getTargetRegisterInfo(),
2219 *ST->getRegBankInfo());
2220 MI.eraseFromParent();
2221 return true;
2222 }
2223
2224 if (!ValTy.isPointerVector() ||
2225 ValTy.getElementType().getAddressSpace() != 0) {
2226 LLVM_DEBUG(dbgs() << "Tried to do custom legalization on wrong load/store");
2227 return false;
2228 }
2229
2230 unsigned PtrSize = ValTy.getElementType().getSizeInBits();
2231 const LLT NewTy = LLT::vector(ValTy.getElementCount(), LLT::integer(PtrSize));
2232 auto &MMO = **MI.memoperands_begin();
2233 MMO.setType(NewTy);
2234
2235 if (MI.getOpcode() == TargetOpcode::G_STORE) {
2236 auto Bitcast = MIRBuilder.buildBitcast(NewTy, ValReg);
2237 MIRBuilder.buildStore(Bitcast.getReg(0), MI.getOperand(1), MMO);
2238 } else {
2239 auto NewLoad = MIRBuilder.buildLoad(NewTy, MI.getOperand(1), MMO);
2240 MIRBuilder.buildBitcast(ValReg, NewLoad);
2241 }
2242 MI.eraseFromParent();
2243 return true;
2244}
2245
2246bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI,
2248 MachineIRBuilder &MIRBuilder) const {
2249 MachineFunction &MF = MIRBuilder.getMF();
2250 Align Alignment(MI.getOperand(2).getImm());
2251 Register Dst = MI.getOperand(0).getReg();
2252 Register ListPtr = MI.getOperand(1).getReg();
2253
2254 LLT PtrTy = MRI.getType(ListPtr);
2255 LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
2256
2257 const unsigned PtrSize = PtrTy.getSizeInBits() / 8;
2258 const Align PtrAlign = Align(PtrSize);
2259 auto List = MIRBuilder.buildLoad(
2260 PtrTy, ListPtr,
2261 *MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOLoad,
2262 PtrTy, PtrAlign));
2263
2264 MachineInstrBuilder DstPtr;
2265 if (Alignment > PtrAlign) {
2266 // Realign the list to the actual required alignment.
2267 auto AlignMinus1 =
2268 MIRBuilder.buildConstant(IntPtrTy, Alignment.value() - 1);
2269 auto ListTmp = MIRBuilder.buildPtrAdd(PtrTy, List, AlignMinus1.getReg(0));
2270 DstPtr = MIRBuilder.buildMaskLowPtrBits(PtrTy, ListTmp, Log2(Alignment));
2271 } else
2272 DstPtr = List;
2273
2274 LLT ValTy = MRI.getType(Dst);
2275 uint64_t ValSize = ValTy.getSizeInBits() / 8;
2276 MIRBuilder.buildLoad(
2277 Dst, DstPtr,
2278 *MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOLoad,
2279 ValTy, std::max(Alignment, PtrAlign)));
2280
2281 auto Size = MIRBuilder.buildConstant(IntPtrTy, alignTo(ValSize, PtrAlign));
2282
2283 auto NewList = MIRBuilder.buildPtrAdd(PtrTy, DstPtr, Size.getReg(0));
2284
2285 MIRBuilder.buildStore(NewList, ListPtr,
2286 *MF.getMachineMemOperand(MachinePointerInfo(),
2288 PtrTy, PtrAlign));
2289
2290 MI.eraseFromParent();
2291 return true;
2292}
2293
2294bool AArch64LegalizerInfo::legalizeBitfieldExtract(
2295 MachineInstr &MI, MachineRegisterInfo &MRI, LegalizerHelper &Helper) const {
2296 // Only legal if we can select immediate forms.
2297 // TODO: Lower this otherwise.
2298 return getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI) &&
2299 getIConstantVRegValWithLookThrough(MI.getOperand(3).getReg(), MRI);
2300}
2301
2302bool AArch64LegalizerInfo::legalizeCTPOP(MachineInstr &MI,
2304 LegalizerHelper &Helper) const {
2305 // When there is no integer popcount instruction (FEAT_CSSC isn't available),
2306 // it can be more efficiently lowered to the following sequence that uses
2307 // AdvSIMD registers/instructions as long as the copies to/from the AdvSIMD
2308 // registers are cheap.
2309 // FMOV D0, X0 // copy 64-bit int to vector, high bits zero'd
2310 // CNT V0.8B, V0.8B // 8xbyte pop-counts
2311 // ADDV B0, V0.8B // sum 8xbyte pop-counts
2312 // UMOV X0, V0.B[0] // copy byte result back to integer reg
2313 //
2314 // For 128 bit vector popcounts, we lower to the following sequence:
2315 // cnt.16b v0, v0 // v8s16, v4s32, v2s64
2316 // uaddlp.8h v0, v0 // v8s16, v4s32, v2s64
2317 // uaddlp.4s v0, v0 // v4s32, v2s64
2318 // uaddlp.2d v0, v0 // v2s64
2319 //
2320 // For 64 bit vector popcounts, we lower to the following sequence:
2321 // cnt.8b v0, v0 // v4s16, v2s32
2322 // uaddlp.4h v0, v0 // v4s16, v2s32
2323 // uaddlp.2s v0, v0 // v2s32
2324
2325 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2326 Register Dst = MI.getOperand(0).getReg();
2327 Register Val = MI.getOperand(1).getReg();
2328 LLT Ty = MRI.getType(Val);
2329
2330 LLT i64 = LLT::integer(64);
2331 LLT i32 = LLT::integer(32);
2332 LLT i16 = LLT::integer(16);
2333 LLT i8 = LLT::integer(8);
2334 unsigned Size = Ty.getSizeInBits();
2335
2336 assert(Ty == MRI.getType(Dst) &&
2337 "Expected src and dst to have the same type!");
2338
2339 if (ST->hasCSSC() && Ty.isScalar() && Size == 128) {
2340
2341 auto Split = MIRBuilder.buildUnmerge(i64, Val);
2342 auto CTPOP1 = MIRBuilder.buildCTPOP(i64, Split->getOperand(0));
2343 auto CTPOP2 = MIRBuilder.buildCTPOP(i64, Split->getOperand(1));
2344 auto Add = MIRBuilder.buildAdd(i64, CTPOP1, CTPOP2);
2345
2346 MIRBuilder.buildZExt(Dst, Add);
2347 MI.eraseFromParent();
2348 return true;
2349 }
2350
2351 if (!ST->hasNEON() ||
2352 MI.getMF()->getFunction().hasFnAttribute(Attribute::NoImplicitFloat)) {
2353 // Use generic lowering when custom lowering is not possible.
2354 return Ty.isScalar() && (Size == 32 || Size == 64) &&
2355 Helper.lowerBitCount(MI) ==
2357 }
2358
2359 // Pre-conditioning: widen Val up to the nearest vector type.
2360 // s32,s64,v4s16,v2s32 -> v8i8
2361 // v8s16,v4s32,v2s64 -> v16i8
2362 LLT VTy = Size == 128 ? LLT::fixed_vector(16, i8) : LLT::fixed_vector(8, i8);
2363 if (Ty.isScalar()) {
2364 assert((Size == 32 || Size == 64 || Size == 128) && "Expected only 32, 64, or 128 bit scalars!");
2365 if (Size == 32) {
2366 Val = MIRBuilder.buildZExt(i64, Val).getReg(0);
2367 }
2368 }
2369 Val = MIRBuilder.buildBitcast(VTy, Val).getReg(0);
2370
2371 // Count bits in each byte-sized lane.
2372 auto CTPOP = MIRBuilder.buildCTPOP(VTy, Val);
2373
2374 // Sum across lanes.
2375 if (ST->hasDotProd() && Ty.isVector() && Ty.getNumElements() >= 2 &&
2376 Ty.getScalarSizeInBits() != 16) {
2377 LLT Dt = Ty == LLT::fixed_vector(2, i64) ? LLT::fixed_vector(4, i32) : Ty;
2378 auto Zeros = MIRBuilder.buildConstant(Dt, 0);
2379 auto Ones = MIRBuilder.buildConstant(VTy, 1);
2380 MachineInstrBuilder Sum;
2381
2382 if (Ty == LLT::fixed_vector(2, i64)) {
2383 auto UDOT =
2384 MIRBuilder.buildInstr(AArch64::G_UDOT, {Dt}, {Zeros, Ones, CTPOP});
2385 Sum = MIRBuilder.buildInstr(AArch64::G_UADDLP, {Ty}, {UDOT});
2386 } else if (Ty == LLT::fixed_vector(4, i32)) {
2387 Sum = MIRBuilder.buildInstr(AArch64::G_UDOT, {Dt}, {Zeros, Ones, CTPOP});
2388 } else if (Ty == LLT::fixed_vector(2, i32)) {
2389 Sum = MIRBuilder.buildInstr(AArch64::G_UDOT, {Dt}, {Zeros, Ones, CTPOP});
2390 } else {
2391 llvm_unreachable("unexpected vector shape");
2392 }
2393
2394 Sum->getOperand(0).setReg(Dst);
2395 MI.eraseFromParent();
2396 return true;
2397 }
2398
2399 Register HSum = CTPOP.getReg(0);
2400 unsigned Opc;
2401 SmallVector<LLT> HAddTys;
2402 if (Ty.isScalar()) {
2403 Opc = Intrinsic::aarch64_neon_uaddlv;
2404 HAddTys.push_back(i32);
2405 } else if (Ty == LLT::fixed_vector(8, i16)) {
2406 Opc = Intrinsic::aarch64_neon_uaddlp;
2407 HAddTys.push_back(LLT::fixed_vector(8, i16));
2408 } else if (Ty == LLT::fixed_vector(4, i32)) {
2409 Opc = Intrinsic::aarch64_neon_uaddlp;
2410 HAddTys.push_back(LLT::fixed_vector(8, i16));
2411 HAddTys.push_back(LLT::fixed_vector(4, i32));
2412 } else if (Ty == LLT::fixed_vector(2, i64)) {
2413 Opc = Intrinsic::aarch64_neon_uaddlp;
2414 HAddTys.push_back(LLT::fixed_vector(8, i16));
2415 HAddTys.push_back(LLT::fixed_vector(4, i32));
2416 HAddTys.push_back(LLT::fixed_vector(2, i64));
2417 } else if (Ty == LLT::fixed_vector(4, i16)) {
2418 Opc = Intrinsic::aarch64_neon_uaddlp;
2419 HAddTys.push_back(LLT::fixed_vector(4, i16));
2420 } else if (Ty == LLT::fixed_vector(2, i32)) {
2421 Opc = Intrinsic::aarch64_neon_uaddlp;
2422 HAddTys.push_back(LLT::fixed_vector(4, i16));
2423 HAddTys.push_back(LLT::fixed_vector(2, i32));
2424 } else
2425 llvm_unreachable("unexpected vector shape");
2427 for (LLT HTy : HAddTys) {
2428 UADD = MIRBuilder.buildIntrinsic(Opc, {HTy}).addUse(HSum);
2429 HSum = UADD.getReg(0);
2430 }
2431
2432 // Post-conditioning.
2433 if (Ty.isScalar() && (Size == 64 || Size == 128))
2434 MIRBuilder.buildZExt(Dst, UADD);
2435 else
2436 UADD->getOperand(0).setReg(Dst);
2437 MI.eraseFromParent();
2438 return true;
2439}
2440
2441bool AArch64LegalizerInfo::legalizeAtomicCmpxchg128(
2442 MachineInstr &MI, MachineRegisterInfo &MRI, LegalizerHelper &Helper) const {
2443 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2444 LLT i64 = LLT::integer(64);
2445 auto Addr = MI.getOperand(1).getReg();
2446 auto DesiredI = MIRBuilder.buildUnmerge({i64, i64}, MI.getOperand(2));
2447 auto NewI = MIRBuilder.buildUnmerge({i64, i64}, MI.getOperand(3));
2448 auto DstLo = MRI.createGenericVirtualRegister(i64);
2449 auto DstHi = MRI.createGenericVirtualRegister(i64);
2450
2451 MachineInstrBuilder CAS;
2452 if (ST->hasLSE()) {
2453 // We have 128-bit CASP instructions taking XSeqPair registers, which are
2454 // s128. We need the merge/unmerge to bracket the expansion and pair up with
2455 // the rest of the MIR so we must reassemble the extracted registers into a
2456 // 128-bit known-regclass one with code like this:
2457 //
2458 // %in1 = REG_SEQUENCE Lo, Hi ; One for each input
2459 // %out = CASP %in1, ...
2460 // %OldLo = G_EXTRACT %out, 0
2461 // %OldHi = G_EXTRACT %out, 64
2462 auto Ordering = (*MI.memoperands_begin())->getMergedOrdering();
2463 unsigned Opcode;
2464 switch (Ordering) {
2466 Opcode = AArch64::CASPAX;
2467 break;
2469 Opcode = AArch64::CASPLX;
2470 break;
2473 Opcode = AArch64::CASPALX;
2474 break;
2475 default:
2476 Opcode = AArch64::CASPX;
2477 break;
2478 }
2479
2480 LLT s128 = LLT::scalar(128);
2481 auto CASDst = MRI.createGenericVirtualRegister(s128);
2482 auto CASDesired = MRI.createGenericVirtualRegister(s128);
2483 auto CASNew = MRI.createGenericVirtualRegister(s128);
2484 MIRBuilder.buildInstr(TargetOpcode::REG_SEQUENCE, {CASDesired}, {})
2485 .addUse(DesiredI->getOperand(0).getReg())
2486 .addImm(AArch64::sube64)
2487 .addUse(DesiredI->getOperand(1).getReg())
2488 .addImm(AArch64::subo64);
2489 MIRBuilder.buildInstr(TargetOpcode::REG_SEQUENCE, {CASNew}, {})
2490 .addUse(NewI->getOperand(0).getReg())
2491 .addImm(AArch64::sube64)
2492 .addUse(NewI->getOperand(1).getReg())
2493 .addImm(AArch64::subo64);
2494
2495 CAS = MIRBuilder.buildInstr(Opcode, {CASDst}, {CASDesired, CASNew, Addr});
2496
2497 MIRBuilder.buildExtract({DstLo}, {CASDst}, 0);
2498 MIRBuilder.buildExtract({DstHi}, {CASDst}, 64);
2499 } else {
2500 // The -O0 CMP_SWAP_128 is friendlier to generate code for because LDXP/STXP
2501 // can take arbitrary registers so it just has the normal GPR64 operands the
2502 // rest of AArch64 is expecting.
2503 auto Ordering = (*MI.memoperands_begin())->getMergedOrdering();
2504 unsigned Opcode;
2505 switch (Ordering) {
2507 Opcode = AArch64::CMP_SWAP_128_ACQUIRE;
2508 break;
2510 Opcode = AArch64::CMP_SWAP_128_RELEASE;
2511 break;
2514 Opcode = AArch64::CMP_SWAP_128;
2515 break;
2516 default:
2517 Opcode = AArch64::CMP_SWAP_128_MONOTONIC;
2518 break;
2519 }
2520
2521 auto Scratch = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2522 CAS = MIRBuilder.buildInstr(Opcode, {DstLo, DstHi, Scratch},
2523 {Addr, DesiredI->getOperand(0),
2524 DesiredI->getOperand(1), NewI->getOperand(0),
2525 NewI->getOperand(1)});
2526 }
2527
2528 CAS.cloneMemRefs(MI);
2529 constrainSelectedInstRegOperands(*CAS, *ST->getInstrInfo(),
2530 *MRI.getTargetRegisterInfo(),
2531 *ST->getRegBankInfo());
2532
2533 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {DstLo, DstHi});
2534 MI.eraseFromParent();
2535 return true;
2536}
2537
2538bool AArch64LegalizerInfo::legalizeCTTZ(MachineInstr &MI,
2539 LegalizerHelper &Helper) const {
2540 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2541 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
2542 LLT Ty = MRI.getType(MI.getOperand(1).getReg());
2543 auto BitReverse = MIRBuilder.buildBitReverse(Ty, MI.getOperand(1));
2544 MIRBuilder.buildCTLZ(MI.getOperand(0).getReg(), BitReverse);
2545 MI.eraseFromParent();
2546 return true;
2547}
2548
2549bool AArch64LegalizerInfo::legalizeMemOps(MachineInstr &MI,
2550 LegalizerHelper &Helper) const {
2551 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2552
2553 // Tagged version MOPSMemorySetTagged is legalised in legalizeIntrinsic
2554 if (MI.getOpcode() == TargetOpcode::G_MEMSET) {
2555 // Anyext the value being set to 64 bit (only the bottom 8 bits are read by
2556 // the instruction).
2557 auto &Value = MI.getOperand(1);
2558 Register ExtValueReg =
2559 MIRBuilder.buildAnyExt(LLT::integer(64), Value).getReg(0);
2560 Value.setReg(ExtValueReg);
2561 return true;
2562 }
2563
2564 return false;
2565}
2566
2567bool AArch64LegalizerInfo::legalizeExtractVectorElt(
2568 MachineInstr &MI, MachineRegisterInfo &MRI, LegalizerHelper &Helper) const {
2569 const GExtractVectorElement *Element = cast<GExtractVectorElement>(&MI);
2570 auto VRegAndVal =
2572 if (VRegAndVal)
2573 return true;
2574 LLT VecTy = MRI.getType(Element->getVectorReg());
2575 if (VecTy.isScalableVector())
2576 return true;
2577 return Helper.lowerExtractInsertVectorElt(MI) !=
2579}
2580
2581bool AArch64LegalizerInfo::legalizeDynStackAlloc(
2582 MachineInstr &MI, LegalizerHelper &Helper) const {
2583 MachineFunction &MF = *MI.getParent()->getParent();
2584 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2585 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
2586
2587 // If stack probing is not enabled for this function, use the default
2588 // lowering.
2589 if (!MF.getFunction().hasFnAttribute("probe-stack") ||
2590 MF.getFunction().getFnAttribute("probe-stack").getValueAsString() !=
2591 "inline-asm") {
2592 Helper.lowerDynStackAlloc(MI);
2593 return true;
2594 }
2595
2596 Register Dst = MI.getOperand(0).getReg();
2597 Register AllocSize = MI.getOperand(1).getReg();
2598 Align Alignment = assumeAligned(MI.getOperand(2).getImm());
2599
2600 assert(MRI.getType(Dst) == LLT::pointer(0, 64) &&
2601 "Unexpected type for dynamic alloca");
2602 assert(MRI.getType(AllocSize) == LLT::scalar(64) &&
2603 "Unexpected type for dynamic alloca");
2604
2605 LLT PtrTy = MRI.getType(Dst);
2606 Register SPReg =
2608 Register SPTmp =
2609 Helper.getDynStackAllocTargetPtr(SPReg, AllocSize, Alignment, PtrTy);
2610 auto NewMI =
2611 MIRBuilder.buildInstr(AArch64::PROBED_STACKALLOC_DYN, {}, {SPTmp});
2612 MRI.setRegClass(NewMI.getReg(0), &AArch64::GPR64commonRegClass);
2613 MIRBuilder.setInsertPt(*NewMI->getParent(), NewMI);
2614 MIRBuilder.buildCopy(Dst, SPTmp);
2615
2616 MI.eraseFromParent();
2617 return true;
2618}
2619
2620bool AArch64LegalizerInfo::legalizePrefetch(MachineInstr &MI,
2621 LegalizerHelper &Helper) const {
2622 MachineIRBuilder &MIB = Helper.MIRBuilder;
2623 auto &AddrVal = MI.getOperand(0);
2624
2625 int64_t IsWrite = MI.getOperand(1).getImm();
2626 int64_t Locality = MI.getOperand(2).getImm();
2627 int64_t IsData = MI.getOperand(3).getImm();
2628
2629 bool IsStream = Locality == 0;
2630 if (Locality != 0) {
2631 assert(Locality <= 3 && "Prefetch locality out-of-range");
2632 // The locality degree is the opposite of the cache speed.
2633 // Put the number the other way around.
2634 // The encoding starts at 0 for level 1
2635 Locality = 3 - Locality;
2636 }
2637
2638 unsigned PrfOp = (IsWrite << 4) | (!IsData << 3) | (Locality << 1) | IsStream;
2639
2640 MIB.buildInstr(AArch64::G_AARCH64_PREFETCH).addImm(PrfOp).add(AddrVal);
2641 MI.eraseFromParent();
2642 return true;
2643}
2644
2645bool AArch64LegalizerInfo::legalizeConcatVectors(
2647 MachineIRBuilder &MIRBuilder) const {
2648 // Widen sub-byte element vectors to byte-sized elements before concatenating.
2649 // This is analogous to SDAG's integer type promotion for sub-byte types.
2651 Register DstReg = Concat.getReg(0);
2652 LLT DstTy = MRI.getType(DstReg);
2653 assert(DstTy.getScalarSizeInBits() < 8 && "Expected dst ty to be < 8b");
2654
2655 unsigned WideEltSize =
2656 std::max(8u, (unsigned)PowerOf2Ceil(DstTy.getScalarSizeInBits()));
2657 LLT SrcTy = MRI.getType(Concat.getSourceReg(0));
2658 LLT WideSrcTy = SrcTy.changeElementSize(WideEltSize);
2659 LLT WideDstTy = DstTy.changeElementSize(WideEltSize);
2660
2661 SmallVector<Register> WideSrcs;
2662 for (unsigned I = 0; I < Concat.getNumSources(); ++I) {
2663 auto Wide = MIRBuilder.buildAnyExt(WideSrcTy, Concat.getSourceReg(I));
2664 WideSrcs.push_back(Wide.getReg(0));
2665 }
2666
2667 auto WideConcat = MIRBuilder.buildConcatVectors(WideDstTy, WideSrcs);
2668 MIRBuilder.buildTrunc(DstReg, WideConcat);
2669 MI.eraseFromParent();
2670 return true;
2671}
2672
2673bool AArch64LegalizerInfo::legalizeFptrunc(MachineInstr &MI,
2674 MachineIRBuilder &MIRBuilder,
2675 MachineRegisterInfo &MRI) const {
2676 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
2677
2678 // This function legalizes f64 -> bf16 and f64 -> f16 truncations via f64 ->
2679 // f32 G_FPTRUNC_ODD and f32 -> [b]f16 G_FPTRUNC, which apparently avoids the
2680 // usual double-rounding issue that could be present from using twin
2681 // G_FPTRUNC.
2682
2683 if (DstTy.isBFloat16() && SrcTy.isFloat64()) {
2684 auto Mid =
2685 MIRBuilder.buildInstr(AArch64::G_FPTRUNC_ODD, {LLT::float32()}, {Src});
2686 MIRBuilder.buildInstr(AArch64::G_FPTRUNC, {Dst}, {Mid});
2687 MI.eraseFromParent();
2688 return true;
2689 }
2690
2691 assert(SrcTy.isFixedVector() && isPowerOf2_32(SrcTy.getNumElements()) &&
2692 "Expected a power of 2 elements");
2693
2694 // We must mutate types here as FPTrunc may be used on a IEEE floating point
2695 // or a brainfloat.
2696 LLT v2s16 = DstTy.changeElementCount(2);
2697 LLT v4s16 = DstTy.changeElementCount(4);
2698 LLT v2s32 = SrcTy.changeElementCount(2).changeElementSize(32);
2699 LLT v4s32 = SrcTy.changeElementCount(4).changeElementSize(32);
2700 LLT v2s64 = SrcTy.changeElementCount(2);
2701
2702 SmallVector<Register> RegsToUnmergeTo;
2703 SmallVector<Register> TruncOddDstRegs;
2704 SmallVector<Register> RegsToMerge;
2705
2706 unsigned ElemCount = SrcTy.getNumElements();
2707
2708 // Find the biggest size chunks we can work with
2709 int StepSize = ElemCount % 4 ? 2 : 4;
2710
2711 // If we have a power of 2 greater than 2, we need to first unmerge into
2712 // enough pieces
2713 if (ElemCount <= 2)
2714 RegsToUnmergeTo.push_back(Src);
2715 else {
2716 for (unsigned i = 0; i < ElemCount / 2; ++i)
2717 RegsToUnmergeTo.push_back(MRI.createGenericVirtualRegister(v2s64));
2718
2719 MIRBuilder.buildUnmerge(RegsToUnmergeTo, Src);
2720 }
2721
2722 // Create all of the round-to-odd instructions and store them
2723 for (auto SrcReg : RegsToUnmergeTo) {
2724 Register Mid =
2725 MIRBuilder.buildInstr(AArch64::G_FPTRUNC_ODD, {v2s32}, {SrcReg})
2726 .getReg(0);
2727 TruncOddDstRegs.push_back(Mid);
2728 }
2729
2730 // Truncate 4s32 to 4s16 if we can to reduce instruction count, otherwise
2731 // truncate 2s32 to 2s16.
2732 unsigned Index = 0;
2733 for (unsigned LoopIter = 0; LoopIter < ElemCount / StepSize; ++LoopIter) {
2734 if (StepSize == 4) {
2735 Register ConcatDst =
2736 MIRBuilder
2738 {v4s32}, {TruncOddDstRegs[Index++], TruncOddDstRegs[Index++]})
2739 .getReg(0);
2740
2741 RegsToMerge.push_back(
2742 MIRBuilder.buildFPTrunc(v4s16, ConcatDst).getReg(0));
2743 } else {
2744 RegsToMerge.push_back(
2745 MIRBuilder.buildFPTrunc(v2s16, TruncOddDstRegs[Index++]).getReg(0));
2746 }
2747 }
2748
2749 // If there is only one register, replace the destination
2750 if (RegsToMerge.size() == 1) {
2751 MRI.replaceRegWith(Dst, RegsToMerge.pop_back_val());
2752 MI.eraseFromParent();
2753 return true;
2754 }
2755
2756 // Merge the rest of the instructions & replace the register
2757 Register Fin = MIRBuilder.buildMergeLikeInstr(DstTy, RegsToMerge).getReg(0);
2758 MRI.replaceRegWith(Dst, Fin);
2759 MI.eraseFromParent();
2760 return true;
2761}
static void matchLDPSTPAddrMode(Register Root, Register &Base, int &Offset, MachineRegisterInfo &MRI)
This file declares the targeting of the Machinelegalizer class for AArch64.
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static Error unsupported(const char *Str, const Triple &T)
Definition MachO.cpp:77
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
IRTranslator LLVM IR MI
Interface for Targets to specify which operations they can successfully select and how the others sho...
#define I(x, y, z)
Definition MD5.cpp:57
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineIRBuilder class.
Promote Memory to Register
Definition Mem2Reg.cpp:110
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
ppc ctr loops verify
if(PassOpts->AAPipeline)
static constexpr MCPhysReg SPReg
This file contains some templates that are useful if you are working with the STL at all.
#define LLVM_DEBUG(...)
Definition Debug.h:119
static constexpr int Concat[]
bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI, LostDebugLocObserver &LocObserver) const override
Called for instructions with the Custom LegalizationAction.
bool legalizeIntrinsic(LegalizerHelper &Helper, MachineInstr &MI) const override
AArch64LegalizerInfo(const AArch64Subtarget &ST)
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1055
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1709
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1585
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:740
@ ICMP_NE
not equal
Definition InstrTypes.h:762
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition Function.cpp:763
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:728
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
static constexpr LLT float64()
Get a 64-bit IEEE double value.
LLT changeElementCount(ElementCount EC) const
Return a vector or scalar with the same element type and the new element count.
constexpr bool isScalableVector() const
Returns true if the LLT is a scalable vector.
constexpr unsigned getScalarSizeInBits() const
constexpr bool isScalar() const
static constexpr LLT scalable_vector(unsigned MinNumElements, unsigned ScalarSizeInBits)
Get a low-level scalable vector of some number of elements and element width.
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
LLT getScalarType() const
constexpr bool isPointerVector() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
static constexpr LLT float128()
Get a 128-bit IEEE quad value.
constexpr bool isVector() const
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr ElementCount getElementCount() const
LLT divide(int Factor) const
Return a type that is Factor times smaller.
static constexpr LLT float16()
Get a 16-bit IEEE half value.
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
constexpr bool isFixedVector() const
Returns true if the LLT is a fixed vector.
static LLT integer(unsigned SizeInBits)
static constexpr LLT bfloat16()
LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
static constexpr LLT float32()
Get a 32-bit IEEE float value.
bool isFloat64() const
LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
LLVM_ABI void computeTables()
Compute any ancillary tables needed to quickly decide how an operation should be handled.
LegalizeRuleSet & minScalar(unsigned TypeIdx, const LLT Ty)
Ensure the scalar is at least as wide as Ty.
LegalizeRuleSet & widenScalarOrEltToNextPow2OrMinSize(unsigned TypeIdx, unsigned MinSize=0)
Widen the scalar or vector element type to the next power of two that is at least MinSize.
LegalizeRuleSet & legalFor(std::initializer_list< LLT > Types)
The instruction is legal when type index 0 is any type in the given list.
LegalizeRuleSet & maxScalarEltSameAsIf(LegalityPredicate Predicate, unsigned TypeIdx, unsigned SmallTypeIdx)
Conditionally narrow the scalar or elt to match the size of another.
LegalizeRuleSet & unsupported()
The instruction is unsupported.
LegalizeRuleSet & scalarSameSizeAs(unsigned TypeIdx, unsigned SameSizeIdx)
Change the type TypeIdx to have the same scalar size as type SameSizeIdx.
LegalizeRuleSet & bitcastIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
The specified type index is coerced if predicate is true.
LegalizeRuleSet & libcallFor(std::initializer_list< LLT > Types)
LegalizeRuleSet & minScalarOrElt(unsigned TypeIdx, const LLT Ty)
Ensure the scalar or element is at least as wide as Ty.
LegalizeRuleSet & clampMaxNumElements(unsigned TypeIdx, const LLT EltTy, unsigned MaxElements)
Limit the number of elements in EltTy vectors to at most MaxElements.
LegalizeRuleSet & clampMinNumElements(unsigned TypeIdx, const LLT EltTy, unsigned MinElements)
Limit the number of elements in EltTy vectors to at least MinElements.
LegalizeRuleSet & widenVectorEltsToVectorMinSize(unsigned TypeIdx, unsigned VectorSize)
Ensure the vector size is at least as wide as VectorSize by promoting the element.
LegalizeRuleSet & lowerIfMemSizeNotPow2()
Lower a memory operation if the memory size, rounded to bytes, is not a power of 2.
LegalizeRuleSet & minScalarEltSameAsIf(LegalityPredicate Predicate, unsigned TypeIdx, unsigned LargeTypeIdx)
Conditionally widen the scalar or elt to match the size of another.
LegalizeRuleSet & customForCartesianProduct(std::initializer_list< LLT > Types)
LegalizeRuleSet & lowerIfMemSizeNotByteSizePow2()
Lower a memory operation if the memory access size is not a round power of 2 byte size.
LegalizeRuleSet & moreElementsToNextPow2(unsigned TypeIdx)
Add more elements to the vector to reach the next power of two.
LegalizeRuleSet & narrowScalarIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Narrow the scalar to the one selected by the mutation if the predicate is true.
LegalizeRuleSet & lower()
The instruction is lowered.
LegalizeRuleSet & moreElementsIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Add more elements to reach the type selected by the mutation if the predicate is true.
LegalizeRuleSet & lowerFor(std::initializer_list< LLT > Types)
The instruction is lowered when type index 0 is any type in the given list.
LegalizeRuleSet & scalarizeIf(LegalityPredicate Predicate, unsigned TypeIdx)
LegalizeRuleSet & lowerIf(LegalityPredicate Predicate)
The instruction is lowered if predicate is true.
LegalizeRuleSet & clampScalar(unsigned TypeIdx, const LLT MinTy, const LLT MaxTy)
Limit the range of scalar sizes to MinTy and MaxTy.
LegalizeRuleSet & custom()
Unconditionally custom lower.
LegalizeRuleSet & minScalarSameAs(unsigned TypeIdx, unsigned LargeTypeIdx)
Widen the scalar to match the size of another.
LegalizeRuleSet & unsupportedIf(LegalityPredicate Predicate)
LegalizeRuleSet & minScalarOrEltIf(LegalityPredicate Predicate, unsigned TypeIdx, const LLT Ty)
Ensure the scalar or element is at least as wide as Ty.
LegalizeRuleSet & widenScalarIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Widen the scalar to the one selected by the mutation if the predicate is true.
LegalizeRuleSet & alwaysLegal()
LegalizeRuleSet & clampNumElements(unsigned TypeIdx, const LLT MinTy, const LLT MaxTy)
Limit the number of elements for the given vectors to at least MinTy's number of elements and at most...
LegalizeRuleSet & maxScalarIf(LegalityPredicate Predicate, unsigned TypeIdx, const LLT Ty)
Conditionally limit the maximum size of the scalar.
LegalizeRuleSet & customIf(LegalityPredicate Predicate)
LegalizeRuleSet & widenScalarToNextPow2(unsigned TypeIdx, unsigned MinSize=0)
Widen the scalar to the next power of two that is at least MinSize.
LegalizeRuleSet & scalarize(unsigned TypeIdx)
LegalizeRuleSet & legalForCartesianProduct(std::initializer_list< LLT > Types)
The instruction is legal when type indexes 0 and 1 are both in the given list.
LegalizeRuleSet & legalForTypesWithMemDesc(std::initializer_list< LegalityPredicates::TypePairAndMemDesc > TypesAndMemDesc)
The instruction is legal when type indexes 0 and 1 along with the memory size and minimum alignment i...
LegalizeRuleSet & legalIf(LegalityPredicate Predicate)
The instruction is legal if predicate is true.
LLVM_ABI LegalizeResult lowerDynStackAlloc(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerBitCount(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerExtractInsertVectorElt(MachineInstr &MI)
Lower a vector extract or insert by writing the vector to a stack temporary and reloading the element...
LLVM_ABI LegalizeResult lowerAbsToCNeg(MachineInstr &MI)
const TargetLowering & getTargetLowering() const
LLVM_ABI LegalizeResult lowerFunnelShiftAsShifts(MachineInstr &MI)
LLVM_ABI MachineInstrBuilder createStackStoreLoad(const DstOp &Res, const SrcOp &Val)
Create a store of Val to a stack temporary and return a load as the same type as Res.
@ Legalized
Instruction has been legalized and the MachineFunction changed.
@ UnableToLegalize
Some kind of error has occurred and we could not legalize this instruction.
GISelChangeObserver & Observer
To keep track of changes made by the LegalizerHelper.
LLVM_ABI Register getDynStackAllocTargetPtr(Register SPReg, Register AllocSize, Align Alignment, LLT PtrTy)
MachineIRBuilder & MIRBuilder
Expose MIRBuilder so clients can set their own RecordInsertInstruction functions.
LegalizeRuleSet & getActionDefinitionsBuilder(unsigned Opcode)
Get the action definition builder for the given opcode.
const LegacyLegalizerInfo & getLegacyLegalizerInfo() const
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Helper class to build MachineInstr.
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
MachineInstrBuilder buildAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ADD Op0, Op1.
MachineInstrBuilder buildNot(const DstOp &Dst, const SrcOp &Src0)
Build and insert a bitwise not, NegOne = G_CONSTANT -1 Res = G_OR Op0, NegOne.
MachineInstrBuilder buildUnmerge(ArrayRef< LLT > Res, const SrcOp &Op)
Build and insert Res0, ... = G_UNMERGE_VALUES Op.
MachineInstrBuilder buildExtract(const DstOp &Res, const SrcOp &Src, uint64_t Index)
Build and insert Res0, ... = G_EXTRACT Src, Idx0.
MachineInstrBuilder buildICmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert a Res = G_ICMP Pred, Op0, Op1.
MachineBasicBlock::iterator getInsertPt()
Current insertion point for new instructions.
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ZEXT Op.
MachineInstrBuilder buildConcatVectors(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_CONCAT_VECTORS Op0, ...
MachineInstrBuilder buildIntrinsic(Intrinsic::ID ID, ArrayRef< Register > Res, bool HasSideEffects, bool isConvergent)
Build and insert a G_INTRINSIC instruction.
MachineInstrBuilder buildCTLZ(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ Op0, Src0.
MachineInstrBuilder buildMergeLikeInstr(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ... or Res = G_BUILD_VECTOR Op0, ... or Res = G_CONCAT_VEC...
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_PTR_ADD Op0, Op1.
MachineInstrBuilder buildBitReverse(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITREVERSE Src.
MachineInstrBuilder buildStore(const SrcOp &Val, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert G_STORE Val, Addr, MMO.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildCTPOP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTPOP Op0, Src0.
MachineFunction & getMF()
Getter for the function we currently build.
MachineInstrBuilder buildExtOrTrunc(unsigned ExtOpc, const DstOp &Res, const SrcOp &Op)
Build and insert Res = ExtOpc, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes of...
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_TRUNC Op.
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
MachineInstrBuilder buildAnyExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ANYEXT Op0.
MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITCAST Src.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildFPTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FPTRUNC Op.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
MachineInstrBuilder buildMaskLowPtrBits(const DstOp &Res, const SrcOp &Op0, uint32_t NumBits)
Build and insert Res = G_PTRMASK Op0, G_CONSTANT (1 << NumBits) - 1.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addUse(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
Representation of each machine instruction.
const MachineOperand & getOperand(unsigned i) const
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
LLVM_ABI void setRegClass(Register Reg, const TargetRegisterClass *RC)
setRegClass - Set the register class of the specified virtual register.
LLVM_ABI Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
const TargetRegisterInfo * getTargetRegisterInfo() const
LLVM_ABI void replaceRegWith(Register FromReg, Register ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
Primary interface to the complete machine description for the target machine.
Target - Wrapper for Target specific information.
LLVM Value Representation.
Definition Value.h:75
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition TypeSize.h:252
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_PREL
MO_PREL - Indicates that the bits of the symbol operand represented by MO_G0 etc are PC relative.
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_TAGGED
MO_TAGGED - With MO_PAGE, indicates that the page includes a memory tag in bits 56-63.
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
LLVM_ABI LegalityPredicate scalarOrEltWiderThan(unsigned TypeIdx, unsigned Size)
True iff the specified type index is a scalar or a vector with an element type that's wider than the ...
LLVM_ABI LegalityPredicate isPointerVector(unsigned TypeIdx)
True iff the specified type index is a vector of pointers (with any address space).
LLVM_ABI LegalityPredicate typeInSet(unsigned TypeIdx, std::initializer_list< LLT > TypesInit)
True iff the given type index is one of the specified types.
LLVM_ABI LegalityPredicate smallerThan(unsigned TypeIdx0, unsigned TypeIdx1)
True iff the first type index has a smaller total bit size than second type index.
LLVM_ABI LegalityPredicate atomicOrderingAtLeastOrStrongerThan(unsigned MMOIdx, AtomicOrdering Ordering)
True iff the specified MMO index has at an atomic ordering of at Ordering or stronger.
Predicate any(Predicate P0, Predicate P1)
True iff P0 or P1 are true.
LLVM_ABI LegalityPredicate isVector(unsigned TypeIdx)
True iff the specified type index is a vector.
Predicate all(Predicate P0, Predicate P1)
True iff P0 and P1 are true.
LLVM_ABI LegalityPredicate typeIs(unsigned TypeIdx, LLT TypesInit)
True iff the given type index is the specified type.
LLVM_ABI LegalityPredicate scalarWiderThan(unsigned TypeIdx, unsigned Size)
True iff the specified type index is a scalar that's wider than the given size.
LLVM_ABI LegalityPredicate scalarNarrowerThan(unsigned TypeIdx, unsigned Size)
True iff the specified type index is a scalar that's narrower than the given size.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
LLVM_ABI LegalizeMutation moreElementsToNextPow2(unsigned TypeIdx, unsigned Min=0)
Add more elements to the type for the given type index to the next power of.
LLVM_ABI LegalizeMutation scalarize(unsigned TypeIdx)
Break up the vector type for the given type index into the element type.
LLVM_ABI LegalizeMutation changeElementTo(unsigned TypeIdx, unsigned FromTypeIdx)
Keep the same scalar or element type as the given type index.
LLVM_ABI LegalizeMutation widenScalarOrEltToNextPow2(unsigned TypeIdx, unsigned Min=0)
Widen the scalar type or vector element type for the given type index to the next power of 2.
LLVM_ABI LegalizeMutation changeTo(unsigned TypeIdx, LLT Ty)
Select this specific type for the given type index.
LLVM_ABI LegalizeMutation changeElementSizeTo(unsigned TypeIdx, unsigned FromTypeIdx)
Change the scalar size or element size to have the same scalar size as type index FromIndex.
operand_type_match m_Reg()
ConstantMatch< APInt > m_ICst(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
Invariant opcodes: All instruction sets have these as their low opcodes.
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:558
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI void constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
Definition Utils.cpp:156
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
LLVM_ABI std::optional< APInt > isConstantOrConstantSplatVector(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a constant integer or a splat vector of constant integers.
Definition Utils.cpp:1527
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition MathExtras.h:385
std::function< bool(const LegalityQuery &)> LegalityPredicate
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
constexpr uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Add
Sum of integers.
DWARFExpression::Operation Op
constexpr bool isShiftedInt(int64_t x)
Checks if a signed integer is an N bit number shifted left by S.
Definition MathExtras.h:182
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition Utils.cpp:433
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1946
Align assumeAligned(uint64_t Value)
Treats the value 0 as a 1, so Align is always at least 1.
Definition Alignment.h:100
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition Alignment.h:197
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
ArrayRef< MemDesc > MMODescrs
Operations which require memory can use this to place requirements on the memory type for each MMO.
ArrayRef< LLT > Types
This class contains a discriminated union of information about pointers in memory operands,...