LLVM 23.0.0git
AArch64LegalizerInfo.cpp
Go to the documentation of this file.
1//===- AArch64LegalizerInfo.cpp ----------------------------------*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the Machinelegalizer class for
10/// AArch64.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
15#include "AArch64Subtarget.h"
16#include "llvm/ADT/STLExtras.h"
28#include "llvm/IR/Intrinsics.h"
29#include "llvm/IR/IntrinsicsAArch64.h"
30#include "llvm/IR/Type.h"
32#include <initializer_list>
33
34#define DEBUG_TYPE "aarch64-legalinfo"
35
36using namespace llvm;
37using namespace LegalizeActions;
38using namespace LegalizeMutations;
39using namespace LegalityPredicates;
40using namespace MIPatternMatch;
41
43 : ST(&ST) {
44 using namespace TargetOpcode;
45 const LLT p0 = LLT::pointer(0, 64);
46 const LLT s8 = LLT::scalar(8);
47 const LLT s16 = LLT::scalar(16);
48 const LLT s32 = LLT::scalar(32);
49 const LLT s64 = LLT::scalar(64);
50 const LLT s128 = LLT::scalar(128);
51 const LLT v16s8 = LLT::fixed_vector(16, 8);
52 const LLT v8s8 = LLT::fixed_vector(8, 8);
53 const LLT v4s8 = LLT::fixed_vector(4, 8);
54 const LLT v2s8 = LLT::fixed_vector(2, 8);
55 const LLT v8s16 = LLT::fixed_vector(8, 16);
56 const LLT v4s16 = LLT::fixed_vector(4, 16);
57 const LLT v2s16 = LLT::fixed_vector(2, 16);
58 const LLT v2s32 = LLT::fixed_vector(2, 32);
59 const LLT v4s32 = LLT::fixed_vector(4, 32);
60 const LLT v2s64 = LLT::fixed_vector(2, 64);
61 const LLT v2p0 = LLT::fixed_vector(2, p0);
62
63 const LLT nxv16s8 = LLT::scalable_vector(16, s8);
64 const LLT nxv8s16 = LLT::scalable_vector(8, s16);
65 const LLT nxv4s32 = LLT::scalable_vector(4, s32);
66 const LLT nxv2s64 = LLT::scalable_vector(2, s64);
67
68 const LLT bf16 = LLT::bfloat16();
69 const LLT v4bf16 = LLT::fixed_vector(4, bf16);
70
71 const LLT f16 = LLT::float16();
72 const LLT v4f16 = LLT::fixed_vector(4, f16);
73 const LLT v8f16 = LLT::fixed_vector(8, f16);
74
75 const LLT f32 = LLT::float32();
76 const LLT v2f32 = LLT::fixed_vector(2, f32);
77 const LLT v4f32 = LLT::fixed_vector(4, f32);
78
79 const LLT f64 = LLT::float64();
80 const LLT v2f64 = LLT::fixed_vector(2, f64);
81
82 const LLT f128 = LLT::float128();
83
84 const LLT i8 = LLT::integer(8);
85 const LLT v8i8 = LLT::fixed_vector(8, i8);
86 const LLT v16i8 = LLT::fixed_vector(16, i8);
87
88 const LLT i16 = LLT::integer(16);
89 const LLT v8i16 = LLT::fixed_vector(8, i16);
90 const LLT v4i16 = LLT::fixed_vector(4, i16);
91
92 const LLT i32 = LLT::integer(32);
93 const LLT v2i32 = LLT::fixed_vector(2, i32);
94 const LLT v4i32 = LLT::fixed_vector(4, i32);
95
96 const LLT i64 = LLT::integer(64);
97 const LLT v2i64 = LLT::fixed_vector(2, i64);
98
99 const LLT i128 = LLT::integer(128);
100
101 std::initializer_list<LLT> PackedVectorAllTypeList = {/* Begin 128bit types */
102 v16s8, v8s16, v4s32,
103 v2s64, v2p0,
104 /* End 128bit types */
105 /* Begin 64bit types */
106 v8s8, v4s16, v2s32};
107 std::initializer_list<LLT> ScalarAndPtrTypesList = {s8, s16, s32, s64, p0};
108 SmallVector<LLT, 8> PackedVectorAllTypesVec(PackedVectorAllTypeList);
109 SmallVector<LLT, 8> ScalarAndPtrTypesVec(ScalarAndPtrTypesList);
110
111 const TargetMachine &TM = ST.getTargetLowering()->getTargetMachine();
112
113 // FIXME: support subtargets which have neon/fp-armv8 disabled.
114 if (!ST.hasNEON() || !ST.hasFPARMv8()) {
116 return;
117 }
118
119 // Some instructions only support s16 if the subtarget has full 16-bit FP
120 // support.
121 const bool HasFP16 = ST.hasFullFP16();
122 const LLT &MinFPScalar = HasFP16 ? f16 : f32;
123
124 const bool HasCSSC = ST.hasCSSC();
125 const bool HasRCPC3 = ST.hasRCPC3();
126 const bool HasSVE = ST.hasSVE();
127
129 {G_IMPLICIT_DEF, G_FREEZE, G_CONSTANT_FOLD_BARRIER})
130 .legalFor({p0, s8, s16, s32, s64})
131 .legalFor({v2s8, v4s8, v8s8, v16s8, v2s16, v4s16, v8s16, v2s32, v4s32,
132 v2s64, v2p0})
133 .widenScalarToNextPow2(0)
134 .clampScalar(0, s8, s64)
137 .clampNumElements(0, v8s8, v16s8)
138 .clampNumElements(0, v4s16, v8s16)
139 .clampNumElements(0, v2s32, v4s32)
140 .clampMaxNumElements(0, s64, 2)
141 .clampMaxNumElements(0, p0, 2)
143
145 .legalFor({p0, s16, s32, s64})
146 .legalFor(PackedVectorAllTypeList)
150 .clampScalar(0, s16, s64)
151 .clampNumElements(0, v8s8, v16s8)
152 .clampNumElements(0, v4s16, v8s16)
153 .clampNumElements(0, v2s32, v4s32)
154 .clampMaxNumElements(0, s64, 2)
155 .clampMaxNumElements(0, p0, 2);
156
158 .legalIf(all(typeInSet(0, {s32, s64, p0}), typeInSet(1, {s8, s16, s32}),
159 smallerThan(1, 0)))
160 .widenScalarToNextPow2(0)
161 .clampScalar(0, s32, s64)
163 .minScalar(1, s8)
164 .maxScalarIf(typeInSet(0, {s32}), 1, s16)
165 .maxScalarIf(typeInSet(0, {s64, p0}), 1, s32);
166
168 .legalIf(all(typeInSet(0, {s16, s32, s64, p0}),
169 typeInSet(1, {s32, s64, s128, p0}), smallerThan(0, 1)))
170 .widenScalarToNextPow2(1)
171 .clampScalar(1, s32, s128)
173 .minScalar(0, s16)
174 .maxScalarIf(typeInSet(1, {s32}), 0, s16)
175 .maxScalarIf(typeInSet(1, {s64, p0}), 0, s32)
176 .maxScalarIf(typeInSet(1, {s128}), 0, s64);
177
178 getActionDefinitionsBuilder({G_ADD, G_SUB, G_AND, G_OR, G_XOR})
179 .legalFor({i32, i64, v8i8, v16i8, v4i16, v8i16, v2i32, v4i32, v2i64})
180 .legalFor(HasSVE, {nxv16s8, nxv8s16, nxv4s32, nxv2s64})
181 .widenScalarToNextPow2(0)
182 .clampScalar(0, s32, s64)
183 .clampMaxNumElements(0, s8, 16)
184 .clampMaxNumElements(0, s16, 8)
185 .clampNumElements(0, v2s32, v4s32)
186 .clampNumElements(0, v2s64, v2s64)
188 [=](const LegalityQuery &Query) {
189 return Query.Types[0].getNumElements() <= 2;
190 },
191 0, s32)
192 .minScalarOrEltIf(
193 [=](const LegalityQuery &Query) {
194 return Query.Types[0].getNumElements() <= 4;
195 },
196 0, s16)
197 .minScalarOrEltIf(
198 [=](const LegalityQuery &Query) {
199 return Query.Types[0].getNumElements() <= 16;
200 },
201 0, s8)
202 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
204
206 .legalFor({i32, i64, v8i8, v16i8, v4i16, v8i16, v2i32, v4i32, v2i64})
207 .widenScalarToNextPow2(0)
208 .clampScalar(0, s32, s64)
209 .clampMaxNumElements(0, s8, 16)
210 .clampMaxNumElements(0, s16, 8)
211 .clampNumElements(0, v2s32, v4s32)
212 .clampNumElements(0, v2s64, v2s64)
214 [=](const LegalityQuery &Query) {
215 return Query.Types[0].getNumElements() <= 2;
216 },
217 0, s32)
218 .minScalarOrEltIf(
219 [=](const LegalityQuery &Query) {
220 return Query.Types[0].getNumElements() <= 4;
221 },
222 0, s16)
223 .minScalarOrEltIf(
224 [=](const LegalityQuery &Query) {
225 return Query.Types[0].getNumElements() <= 16;
226 },
227 0, s8)
228 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
230
231 getActionDefinitionsBuilder({G_SHL, G_ASHR, G_LSHR})
232 .customIf([=](const LegalityQuery &Query) {
233 const auto &SrcTy = Query.Types[0];
234 const auto &AmtTy = Query.Types[1];
235 return !SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
236 AmtTy.getSizeInBits() == 32;
237 })
238 .legalFor({
239 {i32, i32},
240 {i32, i64},
241 {i64, i64},
242 {v8i8, v8i8},
243 {v16i8, v16i8},
244 {v4i16, v4i16},
245 {v8i16, v8i16},
246 {v2i32, v2i32},
247 {v4i32, v4i32},
248 {v2i64, v2i64},
249 })
250 .widenScalarToNextPow2(0)
251 .clampScalar(1, s32, s64)
252 .clampScalar(0, s32, s64)
253 .clampNumElements(0, v8s8, v16s8)
254 .clampNumElements(0, v4s16, v8s16)
255 .clampNumElements(0, v2s32, v4s32)
256 .clampNumElements(0, v2s64, v2s64)
258 .minScalarSameAs(1, 0)
262
264 .legalFor({{p0, i64}, {v2p0, v2i64}})
265 .clampScalarOrElt(1, s64, s64)
266 .clampNumElements(0, v2p0, v2p0);
267
268 getActionDefinitionsBuilder(G_PTRMASK).legalFor({{p0, s64}});
269
270 getActionDefinitionsBuilder({G_SDIV, G_UDIV})
271 .legalFor({i32, i64})
272 .libcallFor({i128})
273 .clampScalar(0, s32, s64)
275 .scalarize(0);
276
277 getActionDefinitionsBuilder({G_SREM, G_UREM, G_SDIVREM, G_UDIVREM})
278 .lowerFor({i8, i16, i32, i64, v2i32, v4i32, v2i64})
279 .libcallFor({i128})
281 .minScalarOrElt(0, s32)
282 .clampNumElements(0, v2s32, v4s32)
283 .clampNumElements(0, v2s64, v2s64)
284 .scalarize(0);
285
286 getActionDefinitionsBuilder({G_SMULO, G_UMULO})
287 .widenScalarToNextPow2(0, /*Min = */ 32)
288 .clampScalar(0, s32, s64)
289 .lower();
290
291 getActionDefinitionsBuilder({G_SMULH, G_UMULH})
292 .legalFor({s64, v16s8, v8s16, v4s32})
293 .lower();
294
295 getActionDefinitionsBuilder({G_SMIN, G_SMAX, G_UMIN, G_UMAX})
296 .legalFor({v8i8, v16i8, v4i16, v8i16, v2i32, v4i32})
297 .legalFor(HasCSSC, {i32, i64})
298 .minScalar(HasCSSC, 0, s32)
299 .clampNumElements(0, v8s8, v16s8)
300 .clampNumElements(0, v4s16, v8s16)
301 .clampNumElements(0, v2s32, v4s32)
302 .lower();
303
304 // FIXME: Legal vector types are only legal with NEON.
306 .legalFor(HasCSSC, {s32, s64})
307 .legalFor({v16i8, v8i16, v4i32, v2i64, v2p0, v8i8, v4i16, v2i32})
308 .customIf([=](const LegalityQuery &Q) {
309 // TODO: Fix suboptimal codegen for 128+ bit types.
310 LLT SrcTy = Q.Types[0];
311 return SrcTy.isScalar() && SrcTy.getSizeInBits() < 128;
312 })
313 .widenScalarIf(
314 [=](const LegalityQuery &Query) { return Query.Types[0] == v4s8; },
315 [=](const LegalityQuery &Query) { return std::make_pair(0, v4i16); })
316 .widenScalarIf(
317 [=](const LegalityQuery &Query) { return Query.Types[0] == v2s16; },
318 [=](const LegalityQuery &Query) { return std::make_pair(0, v2i32); })
319 .clampNumElements(0, v8s8, v16s8)
320 .clampNumElements(0, v4s16, v8s16)
321 .clampNumElements(0, v2s32, v4s32)
322 .clampNumElements(0, v2s64, v2s64)
324 .lower();
325
327 {G_ABDS, G_ABDU, G_UAVGFLOOR, G_UAVGCEIL, G_SAVGFLOOR, G_SAVGCEIL})
328 .legalFor({v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
329 .lower();
330
332 {G_SADDE, G_SSUBE, G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_UADDO, G_USUBO})
333 .legalFor({{s32, s32}, {s64, s32}})
334 .clampScalar(0, s32, s64)
335 .clampScalar(1, s32, s64)
337
338 getActionDefinitionsBuilder({G_FSHL, G_FSHR})
339 .customFor({{i32, i32}, {i32, i64}, {i64, i64}})
340 .lower();
341
343 .legalFor({{i32, i64}, {i64, i64}})
344 .customIf([=](const LegalityQuery &Q) {
345 return Q.Types[0].isScalar() && Q.Types[1].getScalarSizeInBits() < 64;
346 })
347 .lower();
349
350 getActionDefinitionsBuilder({G_SBFX, G_UBFX})
351 .customFor({{s32, s32}, {s64, s64}});
352
353 auto always = [=](const LegalityQuery &Q) { return true; };
355 .legalFor(HasCSSC, {{s32, s32}, {s64, s64}})
356 .legalFor({{v8s8, v8s8}, {v16s8, v16s8}})
357 .customFor(!HasCSSC, {{s32, s32}, {s64, s64}})
358 .customFor({{s128, s128},
359 {v4s16, v4s16},
360 {v8s16, v8s16},
361 {v2s32, v2s32},
362 {v4s32, v4s32},
363 {v2s64, v2s64}})
364 .clampScalar(0, s32, s128)
367 .minScalarEltSameAsIf(always, 1, 0)
368 .maxScalarEltSameAsIf(always, 1, 0)
369 .clampNumElements(0, v8s8, v16s8)
370 .clampNumElements(0, v4s16, v8s16)
371 .clampNumElements(0, v2s32, v4s32)
372 .clampNumElements(0, v2s64, v2s64)
375
376 getActionDefinitionsBuilder({G_CTLZ, G_CTLS})
377 .legalFor({{s32, s32},
378 {s64, s64},
379 {v8s8, v8s8},
380 {v16s8, v16s8},
381 {v4s16, v4s16},
382 {v8s16, v8s16},
383 {v2s32, v2s32},
384 {v4s32, v4s32}})
385 .widenScalarToNextPow2(1, /*Min=*/32)
386 .clampScalar(1, s32, s64)
388 .clampNumElements(0, v8s8, v16s8)
389 .clampNumElements(0, v4s16, v8s16)
390 .clampNumElements(0, v2s32, v4s32)
393 .scalarSameSizeAs(0, 1);
394
395 getActionDefinitionsBuilder(G_CTLZ_ZERO_UNDEF).lower();
396
398 .lowerIf(isVector(0))
399 .widenScalarToNextPow2(1, /*Min=*/32)
400 .clampScalar(1, s32, s64)
401 .scalarSameSizeAs(0, 1)
402 .legalFor(HasCSSC, {s32, s64})
403 .customFor(!HasCSSC, {s32, s64});
404
405 getActionDefinitionsBuilder(G_CTTZ_ZERO_UNDEF).lower();
406
407 getActionDefinitionsBuilder(G_BITREVERSE)
408 .legalFor({s32, s64, v8s8, v16s8})
409 .widenScalarToNextPow2(0, /*Min = */ 32)
411 .clampScalar(0, s32, s64)
412 .clampNumElements(0, v8s8, v16s8)
413 .clampNumElements(0, v4s16, v8s16)
414 .clampNumElements(0, v2s32, v4s32)
415 .clampNumElements(0, v2s64, v2s64)
418 .lower();
419
421 .legalFor({s32, s64, v4s16, v8s16, v2s32, v4s32, v2s64})
423 .clampScalar(0, s32, s64)
424 .clampNumElements(0, v4s16, v8s16)
425 .clampNumElements(0, v2s32, v4s32)
426 .clampNumElements(0, v2s64, v2s64)
428
429 getActionDefinitionsBuilder({G_UADDSAT, G_SADDSAT, G_USUBSAT, G_SSUBSAT})
430 .legalFor({v8s8, v16s8, v4s16, v8s16, v2s32, v4s32, v2s64})
431 .legalFor(HasSVE, {nxv16s8, nxv8s16, nxv4s32, nxv2s64})
432 .clampNumElements(0, v8s8, v16s8)
433 .clampNumElements(0, v4s16, v8s16)
434 .clampNumElements(0, v2s32, v4s32)
435 .clampMaxNumElements(0, s64, 2)
438 .lower();
439
441 {G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FMA, G_FSQRT, G_FMAXNUM, G_FMINNUM,
442 G_FMAXIMUM, G_FMINIMUM, G_FCEIL, G_FFLOOR, G_FRINT, G_FNEARBYINT,
443 G_INTRINSIC_TRUNC, G_INTRINSIC_ROUND, G_INTRINSIC_ROUNDEVEN})
444 .legalFor({f32, f64, v2f32, v4f32, v2f64})
445 .legalFor(HasFP16, {f16, v4f16, v8f16})
446 .libcallFor({f128})
447 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
448 .minScalarOrElt(0, MinFPScalar)
449 .clampNumElements(0, v4s16, v8s16)
450 .clampNumElements(0, v2s32, v4s32)
451 .clampNumElements(0, v2s64, v2s64)
453
454 getActionDefinitionsBuilder({G_FABS, G_FNEG})
455 .legalFor({f32, f64, v2f32, v4f32, v2f64})
456 .legalFor(HasFP16, {f16, v4f16, v8f16})
457 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
459 .clampNumElements(0, v4s16, v8s16)
460 .clampNumElements(0, v2s32, v4s32)
461 .clampNumElements(0, v2s64, v2s64)
463 .lowerFor({f16, v4f16, v8f16});
464
466 .libcallFor({f32, f64, f128})
467 .minScalar(0, f32)
468 .scalarize(0);
469
470 getActionDefinitionsBuilder({G_FCOS, G_FSIN, G_FPOW, G_FLOG, G_FLOG2,
471 G_FLOG10, G_FTAN, G_FEXP, G_FEXP2, G_FEXP10,
472 G_FACOS, G_FASIN, G_FATAN, G_FATAN2, G_FCOSH,
473 G_FSINH, G_FTANH, G_FMODF})
474 // We need a call for these, so we always need to scalarize.
475 .scalarize(0)
476 // Regardless of FP16 support, widen 16-bit elements to 32-bits.
477 .minScalar(0, f32)
478 .libcallFor({f32, f64, f128});
479 getActionDefinitionsBuilder({G_FPOWI, G_FLDEXP})
480 .scalarize(0)
481 .minScalar(0, f32)
482 .libcallFor({{f32, i32}, {f64, i32}, {f128, i32}});
483
484 getActionDefinitionsBuilder({G_LROUND, G_INTRINSIC_LRINT})
485 .legalFor({{s32, s32}, {s32, s64}, {s64, s32}, {s64, s64}})
486 .legalFor(HasFP16, {{s32, s16}, {s64, s16}})
487 .minScalar(1, s32)
488 .libcallFor({{s64, s128}})
489 .lower();
490 getActionDefinitionsBuilder({G_LLROUND, G_INTRINSIC_LLRINT})
491 .legalFor({{s64, s32}, {s64, s64}})
492 .legalFor(HasFP16, {{s64, s16}})
493 .minScalar(0, s64)
494 .minScalar(1, s32)
495 .libcallFor({{s64, s128}})
496 .lower();
497
498 // TODO: Custom legalization for mismatched types.
499 getActionDefinitionsBuilder(G_FCOPYSIGN)
501 [](const LegalityQuery &Query) { return Query.Types[0].isScalar(); },
502 [=](const LegalityQuery &Query) {
503 const LLT Ty = Query.Types[0];
504 return std::pair(0, LLT::fixed_vector(Ty == s16 ? 4 : 2, Ty));
505 })
506 .lower();
507
509
510 for (unsigned Op : {G_SEXTLOAD, G_ZEXTLOAD}) {
511 auto &Actions = getActionDefinitionsBuilder(Op);
512
513 if (Op == G_SEXTLOAD)
515
516 // Atomics have zero extending behavior.
517 Actions
518 .legalForTypesWithMemDesc({{s32, p0, s8, 8},
519 {s32, p0, s16, 8},
520 {s32, p0, s32, 8},
521 {s64, p0, s8, 2},
522 {s64, p0, s16, 2},
523 {s64, p0, s32, 4},
524 {s64, p0, s64, 8},
525 {p0, p0, s64, 8},
526 {v2s32, p0, s64, 8}})
527 .widenScalarToNextPow2(0)
528 .clampScalar(0, s32, s64)
529 // TODO: We could support sum-of-pow2's but the lowering code doesn't know
530 // how to do that yet.
531 .unsupportedIfMemSizeNotPow2()
532 // Lower anything left over into G_*EXT and G_LOAD
533 .lower();
534 }
535
536 auto IsPtrVecPred = [=](const LegalityQuery &Query) {
537 const LLT &ValTy = Query.Types[0];
538 return ValTy.isPointerVector() && ValTy.getAddressSpace() == 0;
539 };
540
542 .customIf([=](const LegalityQuery &Query) {
543 return HasRCPC3 && Query.Types[0] == s128 &&
544 Query.MMODescrs[0].Ordering == AtomicOrdering::Acquire;
545 })
546 .customIf([=](const LegalityQuery &Query) {
547 return Query.Types[0] == s128 &&
548 Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic;
549 })
550 .legalForTypesWithMemDesc({{s8, p0, s8, 8},
551 {s16, p0, s16, 8},
552 {s32, p0, s32, 8},
553 {s64, p0, s64, 8},
554 {p0, p0, s64, 8},
555 {s128, p0, s128, 8},
556 {v8s8, p0, s64, 8},
557 {v16s8, p0, s128, 8},
558 {v4s16, p0, s64, 8},
559 {v8s16, p0, s128, 8},
560 {v2s32, p0, s64, 8},
561 {v4s32, p0, s128, 8},
562 {v2s64, p0, s128, 8}})
563 // These extends are also legal
564 .legalForTypesWithMemDesc(
565 {{s32, p0, s8, 8}, {s32, p0, s16, 8}, {s64, p0, s32, 8}})
566 .legalForTypesWithMemDesc({
567 // SVE vscale x 128 bit base sizes
568 {nxv16s8, p0, nxv16s8, 8},
569 {nxv8s16, p0, nxv8s16, 8},
570 {nxv4s32, p0, nxv4s32, 8},
571 {nxv2s64, p0, nxv2s64, 8},
572 })
573 .widenScalarToNextPow2(0, /* MinSize = */ 8)
574 .clampMaxNumElements(0, s8, 16)
575 .clampMaxNumElements(0, s16, 8)
576 .clampMaxNumElements(0, s32, 4)
577 .clampMaxNumElements(0, s64, 2)
578 .clampMaxNumElements(0, p0, 2)
580 .clampScalar(0, s8, s64)
582 [=](const LegalityQuery &Query) {
583 // Clamp extending load results to 32-bits.
584 return Query.Types[0].isScalar() &&
585 Query.Types[0] != Query.MMODescrs[0].MemoryTy &&
586 Query.Types[0].getSizeInBits() > 32;
587 },
588 changeTo(0, s32))
589 // TODO: Use BITCAST for v2i8, v2i16 after G_TRUNC gets sorted out
590 .bitcastIf(typeInSet(0, {v4s8}),
591 [=](const LegalityQuery &Query) {
592 const LLT VecTy = Query.Types[0];
593 return std::pair(0, LLT::integer(VecTy.getSizeInBits()));
594 })
595 .customIf(IsPtrVecPred)
596 .scalarizeIf(typeInSet(0, {v2s16, v2s8}), 0)
597 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0);
598
600 .customIf([=](const LegalityQuery &Query) {
601 return HasRCPC3 && Query.Types[0] == s128 &&
602 Query.MMODescrs[0].Ordering == AtomicOrdering::Release;
603 })
604 .customIf([=](const LegalityQuery &Query) {
605 return Query.Types[0] == s128 &&
606 Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic;
607 })
608 .widenScalarIf(
609 all(scalarNarrowerThan(0, 32),
611 changeTo(0, s32))
613 {{s8, p0, s8, 8}, {s16, p0, s8, 8}, // truncstorei8 from s16
614 {s32, p0, s8, 8}, // truncstorei8 from s32
615 {s64, p0, s8, 8}, // truncstorei8 from s64
616 {s16, p0, s16, 8}, {s32, p0, s16, 8}, // truncstorei16 from s32
617 {s64, p0, s16, 8}, // truncstorei16 from s64
618 {s32, p0, s8, 8}, {s32, p0, s16, 8}, {s32, p0, s32, 8},
619 {s64, p0, s64, 8}, {s64, p0, s32, 8}, // truncstorei32 from s64
620 {p0, p0, s64, 8}, {s128, p0, s128, 8}, {v16s8, p0, s128, 8},
621 {v8s8, p0, s64, 8}, {v4s16, p0, s64, 8}, {v8s16, p0, s128, 8},
622 {v2s32, p0, s64, 8}, {v4s32, p0, s128, 8}, {v2s64, p0, s128, 8}})
623 .legalForTypesWithMemDesc({
624 // SVE vscale x 128 bit base sizes
625 // TODO: Add nxv2p0. Consider bitcastIf.
626 // See #92130
627 // https://github.com/llvm/llvm-project/pull/92130#discussion_r1616888461
628 {nxv16s8, p0, nxv16s8, 8},
629 {nxv8s16, p0, nxv8s16, 8},
630 {nxv4s32, p0, nxv4s32, 8},
631 {nxv2s64, p0, nxv2s64, 8},
632 })
633 .clampScalar(0, s8, s64)
634 .minScalarOrElt(0, s8)
635 .lowerIf([=](const LegalityQuery &Query) {
636 return Query.Types[0].isScalar() &&
637 Query.Types[0] != Query.MMODescrs[0].MemoryTy;
638 })
639 // Maximum: sN * k = 128
640 .clampMaxNumElements(0, s8, 16)
641 .clampMaxNumElements(0, s16, 8)
642 .clampMaxNumElements(0, s32, 4)
643 .clampMaxNumElements(0, s64, 2)
644 .clampMaxNumElements(0, p0, 2)
646 // TODO: Use BITCAST for v2i8, v2i16 after G_TRUNC gets sorted out
647 .bitcastIf(all(typeInSet(0, {v4s8}),
648 LegalityPredicate([=](const LegalityQuery &Query) {
649 return Query.Types[0].getSizeInBits() ==
650 Query.MMODescrs[0].MemoryTy.getSizeInBits();
651 })),
652 [=](const LegalityQuery &Query) {
653 const LLT VecTy = Query.Types[0];
654 return std::pair(0, LLT::integer(VecTy.getSizeInBits()));
655 })
656 .customIf(IsPtrVecPred)
657 .scalarizeIf(typeInSet(0, {v2s16, v2s8}), 0)
658 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
659 .lower();
660
661 getActionDefinitionsBuilder(G_INDEXED_STORE)
662 // Idx 0 == Ptr, Idx 1 == Val
663 // TODO: we can implement legalizations but as of now these are
664 // generated in a very specific way.
666 {p0, s8, s8, 8},
667 {p0, s16, s16, 8},
668 {p0, s32, s8, 8},
669 {p0, s32, s16, 8},
670 {p0, s32, s32, 8},
671 {p0, s64, s64, 8},
672 {p0, p0, p0, 8},
673 {p0, v8s8, v8s8, 8},
674 {p0, v16s8, v16s8, 8},
675 {p0, v4s16, v4s16, 8},
676 {p0, v8s16, v8s16, 8},
677 {p0, v2s32, v2s32, 8},
678 {p0, v4s32, v4s32, 8},
679 {p0, v2s64, v2s64, 8},
680 {p0, v2p0, v2p0, 8},
681 {p0, s128, s128, 8},
682 })
683 .unsupported();
684
685 auto IndexedLoadBasicPred = [=](const LegalityQuery &Query) {
686 LLT LdTy = Query.Types[0];
687 LLT PtrTy = Query.Types[1];
688 if (!llvm::is_contained(PackedVectorAllTypesVec, LdTy) &&
689 !llvm::is_contained(ScalarAndPtrTypesVec, LdTy) && LdTy != s128)
690 return false;
691 if (PtrTy != p0)
692 return false;
693 return true;
694 };
695 getActionDefinitionsBuilder(G_INDEXED_LOAD)
698 .legalIf(IndexedLoadBasicPred)
699 .unsupported();
700 getActionDefinitionsBuilder({G_INDEXED_SEXTLOAD, G_INDEXED_ZEXTLOAD})
701 .unsupportedIf(
703 .legalIf(all(typeInSet(0, {s16, s32, s64}),
704 LegalityPredicate([=](const LegalityQuery &Q) {
705 LLT LdTy = Q.Types[0];
706 LLT PtrTy = Q.Types[1];
707 LLT MemTy = Q.MMODescrs[0].MemoryTy;
708 if (PtrTy != p0)
709 return false;
710 if (LdTy == s16)
711 return MemTy == s8;
712 if (LdTy == s32)
713 return MemTy == s8 || MemTy == s16;
714 if (LdTy == s64)
715 return MemTy == s8 || MemTy == s16 || MemTy == s32;
716 return false;
717 })))
718 .unsupported();
719
720 // Constants
722 .legalFor({p0, s8, s16, s32, s64})
723 .widenScalarToNextPow2(0)
724 .clampScalar(0, s8, s64);
725 getActionDefinitionsBuilder(G_FCONSTANT)
726 .legalFor({s16, s32, s64, s128})
727 .clampScalar(0, MinFPScalar, s128);
728
729 // FIXME: fix moreElementsToNextPow2
731 .legalFor({{i32, i32}, {i32, i64}, {i32, p0}})
733 .minScalarOrElt(1, s8)
734 .clampScalar(1, s32, s64)
735 .clampScalar(0, s32, s32)
738 [=](const LegalityQuery &Query) {
739 const LLT &Ty = Query.Types[0];
740 const LLT &SrcTy = Query.Types[1];
741 return Ty.isVector() && !SrcTy.isPointerVector() &&
742 Ty.getElementType() != SrcTy.getElementType();
743 },
744 0, 1)
745 .minScalarOrEltIf(
746 [=](const LegalityQuery &Query) { return Query.Types[1] == v2s16; },
747 1, s32)
748 .minScalarOrEltIf(
749 [=](const LegalityQuery &Query) {
750 return Query.Types[1].isPointerVector();
751 },
752 0, s64)
754 .clampNumElements(1, v8s8, v16s8)
755 .clampNumElements(1, v4s16, v8s16)
756 .clampNumElements(1, v2s32, v4s32)
757 .clampNumElements(1, v2s64, v2s64)
758 .clampNumElements(1, v2p0, v2p0)
759 .customIf(isVector(0));
760
762 .legalFor({{s32, f32},
763 {s32, f64},
764 {v4s32, v4f32},
765 {v2s32, v2f32},
766 {v2s64, v2f64}})
767 .legalFor(HasFP16, {{s32, f16}, {v4s16, v4f16}, {v8s16, v8f16}})
769 .clampScalar(0, s32, s32)
770 .minScalarOrElt(1, MinFPScalar)
773 [=](const LegalityQuery &Query) {
774 const LLT &Ty = Query.Types[0];
775 const LLT &SrcTy = Query.Types[1];
776 return Ty.isVector() && !SrcTy.isPointerVector() &&
777 Ty.getElementType() != SrcTy.getElementType();
778 },
779 0, 1)
780 .clampNumElements(1, v4s16, v8s16)
781 .clampNumElements(1, v2s32, v4s32)
782 .clampMaxNumElements(1, s64, 2)
784 .libcallFor({{s32, s128}});
785
786 // Extensions
787 auto ExtLegalFunc = [=](const LegalityQuery &Query) {
788 unsigned DstSize = Query.Types[0].getSizeInBits();
789
790 // Handle legal vectors using legalFor
791 if (Query.Types[0].isVector())
792 return false;
793
794 if (DstSize < 8 || DstSize >= 128 || !isPowerOf2_32(DstSize))
795 return false; // Extending to a scalar s128 needs narrowing.
796
797 const LLT &SrcTy = Query.Types[1];
798
799 // Make sure we fit in a register otherwise. Don't bother checking that
800 // the source type is below 128 bits. We shouldn't be allowing anything
801 // through which is wider than the destination in the first place.
802 unsigned SrcSize = SrcTy.getSizeInBits();
803 if (SrcSize < 8 || !isPowerOf2_32(SrcSize))
804 return false;
805
806 return true;
807 };
808 getActionDefinitionsBuilder({G_ZEXT, G_SEXT, G_ANYEXT})
809 .legalIf(ExtLegalFunc)
810 .legalFor({{v8s16, v8s8}, {v4s32, v4s16}, {v2s64, v2s32}})
811 .clampScalar(0, s64, s64) // Just for s128, others are handled above.
813 .clampMaxNumElements(1, s8, 8)
814 .clampMaxNumElements(1, s16, 4)
815 .clampMaxNumElements(1, s32, 2)
816 // Tries to convert a large EXTEND into two smaller EXTENDs
817 .lowerIf([=](const LegalityQuery &Query) {
818 return (Query.Types[0].getScalarSizeInBits() >
819 Query.Types[1].getScalarSizeInBits() * 2) &&
820 Query.Types[0].isVector() &&
821 (Query.Types[1].getScalarSizeInBits() == 8 ||
822 Query.Types[1].getScalarSizeInBits() == 16);
823 })
824 .clampMinNumElements(1, s8, 8)
825 .clampMinNumElements(1, s16, 4)
827
829 .legalFor({{v8s8, v8s16}, {v4s16, v4s32}, {v2s32, v2s64}})
831 .clampMaxNumElements(0, s8, 8)
832 .clampMaxNumElements(0, s16, 4)
833 .clampMaxNumElements(0, s32, 2)
835 [=](const LegalityQuery &Query) { return Query.Types[0].isVector(); },
836 0, s8)
837 .lowerIf([=](const LegalityQuery &Query) {
838 LLT DstTy = Query.Types[0];
839 LLT SrcTy = Query.Types[1];
840 return DstTy.isVector() && SrcTy.getSizeInBits() > 128 &&
841 DstTy.getScalarSizeInBits() * 2 <= SrcTy.getScalarSizeInBits();
842 })
843 .clampMinNumElements(0, s8, 8)
844 .clampMinNumElements(0, s16, 4)
845 .alwaysLegal();
846
847 getActionDefinitionsBuilder({G_TRUNC_SSAT_S, G_TRUNC_SSAT_U, G_TRUNC_USAT_U})
848 .legalFor({{v8s8, v8s16}, {v4s16, v4s32}, {v2s32, v2s64}})
849 .clampNumElements(0, v2s32, v2s32);
850
851 getActionDefinitionsBuilder(G_SEXT_INREG)
852 .legalFor({s32, s64})
853 .legalFor(PackedVectorAllTypeList)
854 .maxScalar(0, s64)
855 .clampNumElements(0, v8s8, v16s8)
856 .clampNumElements(0, v4s16, v8s16)
857 .clampNumElements(0, v2s32, v4s32)
858 .clampMaxNumElements(0, s64, 2)
859 .lower();
860
861 // FP conversions
863 .legalFor(
864 {{f16, f32}, {f16, f64}, {f32, f64}, {v4f16, v4f32}, {v2f32, v2f64}})
865 .libcallFor({{f16, f128}, {f32, f128}, {f64, f128}})
867 .customIf([](const LegalityQuery &Q) {
868 LLT DstTy = Q.Types[0];
869 LLT SrcTy = Q.Types[1];
870 return SrcTy.isFixedVector() && DstTy.isFixedVector() &&
871 SrcTy.getScalarSizeInBits() == 64 &&
872 DstTy.getScalarSizeInBits() == 16;
873 })
874 // Clamp based on input
875 .clampNumElements(1, v4s32, v4s32)
876 .clampNumElements(1, v2s64, v2s64)
877 .scalarize(0);
878
880 .legalFor({{f32, f16},
881 {f64, f16},
882 {f32, bf16},
883 {f64, f32},
884 {v4f32, v4f16},
885 {v4f32, v4bf16},
886 {v2f64, v2f32}})
887 .libcallFor({{f128, f64}, {f128, f32}, {f128, f16}})
890 [](const LegalityQuery &Q) {
891 LLT DstTy = Q.Types[0];
892 LLT SrcTy = Q.Types[1];
893 return SrcTy.isVector() && DstTy.isVector() &&
894 SrcTy.getScalarSizeInBits() == 16 &&
895 DstTy.getScalarSizeInBits() == 64;
896 },
897 changeElementTo(1, f32))
898 .clampNumElements(0, v4s32, v4s32)
899 .clampNumElements(0, v2s64, v2s64)
900 .scalarize(0);
901
902 // Conversions
903 getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
904 .legalFor({{i32, f32},
905 {i64, f32},
906 {i32, f64},
907 {i64, f64},
908 {v2i32, v2f32},
909 {v4i32, v4f32},
910 {v2i64, v2f64}})
911 .legalFor(HasFP16,
912 {{i32, f16}, {i64, f16}, {v4i16, v4f16}, {v8i16, v8f16}})
913 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
915 // The range of a fp16 value fits into an i17, so we can lower the width
916 // to i64.
918 [=](const LegalityQuery &Query) {
919 return Query.Types[1] == f16 && Query.Types[0].getSizeInBits() > 64;
920 },
921 changeTo(0, i64))
924 .minScalar(0, s32)
925 .widenScalarOrEltToNextPow2OrMinSize(1, /*MinSize=*/HasFP16 ? 16 : 32)
927 [=](const LegalityQuery &Query) {
928 return Query.Types[0].getScalarSizeInBits() <= 64 &&
929 Query.Types[0].getScalarSizeInBits() >
930 Query.Types[1].getScalarSizeInBits();
931 },
933 .widenScalarIf(
934 [=](const LegalityQuery &Query) {
935 return Query.Types[1].getScalarSizeInBits() <= 64 &&
936 Query.Types[0].getScalarSizeInBits() <
937 Query.Types[1].getScalarSizeInBits();
938 },
940 .clampNumElements(0, v4s16, v8s16)
941 .clampNumElements(0, v2s32, v4s32)
942 .clampMaxNumElements(0, s64, 2)
943 .libcallFor(
944 {{i32, f128}, {i64, f128}, {i128, f128}, {i128, f32}, {i128, f64}});
945
946 getActionDefinitionsBuilder({G_FPTOSI_SAT, G_FPTOUI_SAT})
947 .legalFor({{i32, f32},
948 {i64, f32},
949 {i32, f64},
950 {i64, f64},
951 {v2i32, v2f32},
952 {v4i32, v4f32},
953 {v2i64, v2f64}})
954 .legalFor(
955 HasFP16,
956 {{i16, f16}, {i32, f16}, {i64, f16}, {v4i16, v4f16}, {v8i16, v8f16}})
957 // Handle types larger than i64 by scalarizing/lowering.
958 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
960 // The range of a fp16 value fits into an i17, so we can lower the width
961 // to i64.
963 [=](const LegalityQuery &Query) {
964 return Query.Types[1] == f16 && Query.Types[0].getSizeInBits() > 64;
965 },
966 changeTo(0, i64))
967 .lowerIf(::any(scalarWiderThan(0, 64), scalarWiderThan(1, 64)), 0)
969 .widenScalarToNextPow2(0, /*MinSize=*/32)
970 .minScalar(0, s32)
971 .widenScalarOrEltToNextPow2OrMinSize(1, /*MinSize=*/HasFP16 ? 16 : 32)
973 [=](const LegalityQuery &Query) {
974 unsigned ITySize = Query.Types[0].getScalarSizeInBits();
975 return (ITySize == 16 || ITySize == 32 || ITySize == 64) &&
976 ITySize > Query.Types[1].getScalarSizeInBits();
977 },
979 .widenScalarIf(
980 [=](const LegalityQuery &Query) {
981 unsigned FTySize = Query.Types[1].getScalarSizeInBits();
982 return (FTySize == 16 || FTySize == 32 || FTySize == 64) &&
983 Query.Types[0].getScalarSizeInBits() < FTySize;
984 },
987 .clampNumElements(0, v4s16, v8s16)
988 .clampNumElements(0, v2s32, v4s32)
989 .clampMaxNumElements(0, s64, 2);
990
991 getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
992 .legalFor({{f32, i32},
993 {f64, i32},
994 {f32, i64},
995 {f64, i64},
996 {v2f32, v2i32},
997 {v4f32, v4i32},
998 {v2f64, v2i64}})
999 .legalFor(HasFP16,
1000 {{f16, i32}, {f16, i64}, {v4f16, v4i16}, {v8f16, v8i16}})
1001 .scalarizeIf(scalarOrEltWiderThan(1, 64), 1)
1005 .minScalar(1, f32)
1006 .lowerIf([](const LegalityQuery &Query) {
1007 return Query.Types[1].isVector() &&
1008 Query.Types[1].getScalarSizeInBits() == 64 &&
1009 Query.Types[0].getScalarSizeInBits() == 16;
1010 })
1011 .widenScalarIf(
1012 [=](const LegalityQuery &Query) {
1013 return Query.Types[0].getScalarType() == bf16;
1014 },
1015 changeElementTo(0, f32))
1016 .widenScalarOrEltToNextPow2OrMinSize(0, /*MinSize=*/HasFP16 ? 16 : 32)
1017 .scalarizeIf(
1018 // v2i64->v2f32 needs to scalarize to avoid double-rounding issues.
1019 [](const LegalityQuery &Query) {
1020 return Query.Types[0].getScalarSizeInBits() == 32 &&
1021 Query.Types[1].getScalarSizeInBits() == 64;
1022 },
1023 0)
1024 .widenScalarIf(
1025 [](const LegalityQuery &Query) {
1026 return Query.Types[1].getScalarSizeInBits() <= 64 &&
1027 Query.Types[0].getScalarSizeInBits() <
1028 Query.Types[1].getScalarSizeInBits();
1029 },
1031 .widenScalarIf(
1032 [](const LegalityQuery &Query) {
1033 return Query.Types[0].getScalarSizeInBits() <= 64 &&
1034 Query.Types[0].getScalarSizeInBits() >
1035 Query.Types[1].getScalarSizeInBits();
1036 },
1038 .clampNumElements(0, v4s16, v8s16)
1039 .clampNumElements(0, v2s32, v4s32)
1040 .clampMaxNumElements(0, s64, 2)
1041 .libcallFor({{f16, i128},
1042 {f32, i128},
1043 {f64, i128},
1044 {f128, i128},
1045 {f128, i32},
1046 {f128, i64}});
1047
1048 // Control-flow
1051 .legalFor({s32})
1052 .clampScalar(0, s32, s32);
1053 getActionDefinitionsBuilder(G_BRINDIRECT).legalFor({p0});
1054
1056 .legalFor({{s32, s32}, {s64, s32}, {p0, s32}})
1057 .widenScalarToNextPow2(0)
1058 .clampScalar(0, s32, s64)
1059 .clampScalar(1, s32, s32)
1062 .lowerIf(isVector(0));
1063
1064 // Pointer-handling
1065 getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0});
1066
1067 if (TM.getCodeModel() == CodeModel::Small)
1068 getActionDefinitionsBuilder(G_GLOBAL_VALUE).custom();
1069 else
1070 getActionDefinitionsBuilder(G_GLOBAL_VALUE).legalFor({p0});
1071
1072 getActionDefinitionsBuilder(G_PTRAUTH_GLOBAL_VALUE)
1073 .legalIf(all(typeIs(0, p0), typeIs(1, p0)));
1074
1075 getActionDefinitionsBuilder(G_PTRTOINT)
1076 .legalFor({{i64, p0}, {v2i64, v2p0}})
1077 .widenScalarToNextPow2(0, 64)
1078 .clampScalar(0, s64, s64)
1079 .clampMaxNumElements(0, s64, 2);
1080
1081 getActionDefinitionsBuilder(G_INTTOPTR)
1082 .unsupportedIf([&](const LegalityQuery &Query) {
1083 return Query.Types[0].getSizeInBits() != Query.Types[1].getSizeInBits();
1084 })
1085 .legalFor({{p0, i64}, {v2p0, v2i64}})
1086 .clampMaxNumElements(1, s64, 2);
1087
1088 // Casts for 32 and 64-bit width type are just copies.
1089 // Same for 128-bit width type, except they are on the FPR bank.
1092 // Keeping 32-bit instructions legal to prevent regression in some tests
1093 .legalForCartesianProduct({s32, v2s16, v4s8})
1094 .legalForCartesianProduct({s64, v8s8, v4s16, v2s32})
1095 .legalForCartesianProduct({s128, v16s8, v8s16, v4s32, v2s64, v2p0})
1096 .customIf([=](const LegalityQuery &Query) {
1097 // Handle casts from i1 vectors to scalars.
1098 LLT DstTy = Query.Types[0];
1099 LLT SrcTy = Query.Types[1];
1100 return DstTy.isScalar() && SrcTy.isVector() &&
1101 SrcTy.getScalarSizeInBits() == 1;
1102 })
1103 .lowerIf([=](const LegalityQuery &Query) {
1104 return Query.Types[0].isVector() != Query.Types[1].isVector();
1105 })
1107 .clampNumElements(0, v8s8, v16s8)
1108 .clampNumElements(0, v4s16, v8s16)
1109 .clampNumElements(0, v2s32, v4s32)
1110 .clampMaxNumElements(0, s64, 2)
1111 .lower();
1112
1113 getActionDefinitionsBuilder(G_VASTART).legalFor({p0});
1114
1115 // va_list must be a pointer, but most sized types are pretty easy to handle
1116 // as the destination.
1118 .customForCartesianProduct({s8, s16, s32, s64, p0}, {p0})
1119 .clampScalar(0, s8, s64)
1120 .widenScalarToNextPow2(0, /*Min*/ 8);
1121
1122 getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG_WITH_SUCCESS)
1123 .lowerIf(
1124 all(typeInSet(0, {s8, s16, s32, s64, s128}), typeIs(2, p0)));
1125
1126 bool UseOutlineAtomics = ST.outlineAtomics() && !ST.hasLSE();
1127
1128 getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG)
1129 .legalFor(!UseOutlineAtomics, {{s32, p0}, {s64, p0}})
1130 .customFor(!UseOutlineAtomics, {{s128, p0}})
1131 .libcallFor(UseOutlineAtomics,
1132 {{s8, p0}, {s16, p0}, {s32, p0}, {s64, p0}, {s128, p0}})
1133 .clampScalar(0, s32, s64);
1134
1135 getActionDefinitionsBuilder({G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD,
1136 G_ATOMICRMW_SUB, G_ATOMICRMW_AND, G_ATOMICRMW_OR,
1137 G_ATOMICRMW_XOR})
1138 .legalFor(!UseOutlineAtomics, {{s32, p0}, {s64, p0}})
1139 .libcallFor(UseOutlineAtomics,
1140 {{s8, p0}, {s16, p0}, {s32, p0}, {s64, p0}})
1141 .clampScalar(0, s32, s64);
1142
1143 // Do not outline these atomics operations, as per comment in
1144 // AArch64ISelLowering.cpp's shouldExpandAtomicRMWInIR().
1146 {G_ATOMICRMW_MIN, G_ATOMICRMW_MAX, G_ATOMICRMW_UMIN, G_ATOMICRMW_UMAX})
1147 .legalIf(all(typeInSet(0, {s32, s64}), typeIs(1, p0)))
1148 .clampScalar(0, s32, s64);
1149
1150 getActionDefinitionsBuilder(G_BLOCK_ADDR).legalFor({p0});
1151
1152 // Merge/Unmerge
1153 for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
1154 unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
1155 unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0;
1157 .widenScalarToNextPow2(LitTyIdx, 8)
1158 .widenScalarToNextPow2(BigTyIdx, 32)
1159 .clampScalar(LitTyIdx, s8, s64)
1160 .clampScalar(BigTyIdx, s32, s128)
1161 .legalIf([=](const LegalityQuery &Q) {
1162 switch (Q.Types[BigTyIdx].getSizeInBits()) {
1163 case 32:
1164 case 64:
1165 case 128:
1166 break;
1167 default:
1168 return false;
1169 }
1170 switch (Q.Types[LitTyIdx].getSizeInBits()) {
1171 case 8:
1172 case 16:
1173 case 32:
1174 case 64:
1175 return true;
1176 default:
1177 return false;
1178 }
1179 });
1180 }
1181
1182 // TODO : nxv4s16, nxv2s16, nxv2s32
1183 getActionDefinitionsBuilder(G_EXTRACT_VECTOR_ELT)
1184 .legalFor(HasSVE, {{s16, nxv16s8, s64},
1185 {s16, nxv8s16, s64},
1186 {s32, nxv4s32, s64},
1187 {s64, nxv2s64, s64}})
1188 .unsupportedIf([=](const LegalityQuery &Query) {
1189 const LLT &EltTy = Query.Types[1].getElementType();
1190 if (Query.Types[1].isScalableVector())
1191 return false;
1192 return Query.Types[0] != EltTy;
1193 })
1194 .minScalar(2, s64)
1195 .customIf([=](const LegalityQuery &Query) {
1196 const LLT &VecTy = Query.Types[1];
1197 return VecTy == v8s8 || VecTy == v16s8 || VecTy == v2s16 ||
1198 VecTy == v4s16 || VecTy == v8s16 || VecTy == v2s32 ||
1199 VecTy == v4s32 || VecTy == v2s64 || VecTy == v2p0;
1200 })
1201 .minScalarOrEltIf(
1202 [=](const LegalityQuery &Query) {
1203 // We want to promote to <M x s1> to <M x s64> if that wouldn't
1204 // cause the total vec size to be > 128b.
1205 return Query.Types[1].isFixedVector() &&
1206 Query.Types[1].getNumElements() <= 2;
1207 },
1208 0, s64)
1209 .minScalarOrEltIf(
1210 [=](const LegalityQuery &Query) {
1211 return Query.Types[1].isFixedVector() &&
1212 Query.Types[1].getNumElements() <= 4;
1213 },
1214 0, s32)
1215 .minScalarOrEltIf(
1216 [=](const LegalityQuery &Query) {
1217 return Query.Types[1].isFixedVector() &&
1218 Query.Types[1].getNumElements() <= 8;
1219 },
1220 0, s16)
1221 .minScalarOrEltIf(
1222 [=](const LegalityQuery &Query) {
1223 return Query.Types[1].isFixedVector() &&
1224 Query.Types[1].getNumElements() <= 16;
1225 },
1226 0, s8)
1227 .minScalarOrElt(0, s8) // Worst case, we need at least s8.
1229 .clampMaxNumElements(1, s64, 2)
1230 .clampMaxNumElements(1, s32, 4)
1231 .clampMaxNumElements(1, s16, 8)
1232 .clampMaxNumElements(1, s8, 16)
1233 .clampMaxNumElements(1, p0, 2)
1235
1236 getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT)
1237 .legalIf(
1238 typeInSet(0, {v8s8, v16s8, v4s16, v8s16, v2s32, v4s32, v2s64, v2p0}))
1239 .legalFor(HasSVE, {{nxv16s8, s32, s64},
1240 {nxv8s16, s32, s64},
1241 {nxv4s32, s32, s64},
1242 {nxv2s64, s64, s64}})
1245 .clampNumElements(0, v8s8, v16s8)
1246 .clampNumElements(0, v4s16, v8s16)
1247 .clampNumElements(0, v2s32, v4s32)
1248 .clampMaxNumElements(0, s64, 2)
1249 .clampMaxNumElements(0, p0, 2)
1251
1252 getActionDefinitionsBuilder(G_BUILD_VECTOR)
1253 .legalFor({{v8s8, s8},
1254 {v16s8, s8},
1255 {v4s16, s16},
1256 {v8s16, s16},
1257 {v2s32, s32},
1258 {v4s32, s32},
1259 {v2s64, s64},
1260 {v2p0, p0}})
1261 .clampNumElements(0, v4s32, v4s32)
1262 .clampNumElements(0, v2s64, v2s64)
1263 .minScalarOrElt(0, s8)
1266 .minScalarSameAs(1, 0);
1267
1268 getActionDefinitionsBuilder(G_BUILD_VECTOR_TRUNC).lower();
1269
1270 getActionDefinitionsBuilder(G_SHUFFLE_VECTOR)
1271 .legalIf([=](const LegalityQuery &Query) {
1272 const LLT &DstTy = Query.Types[0];
1273 const LLT &SrcTy = Query.Types[1];
1274 // For now just support the TBL2 variant which needs the source vectors
1275 // to be the same size as the dest.
1276 if (DstTy != SrcTy)
1277 return false;
1278 return llvm::is_contained(
1279 {v8s8, v16s8, v4s16, v8s16, v2s32, v4s32, v2s64}, DstTy);
1280 })
1281 .moreElementsIf(
1282 [](const LegalityQuery &Query) {
1283 return Query.Types[0].getNumElements() >
1284 Query.Types[1].getNumElements();
1285 },
1286 changeTo(1, 0))
1289 [](const LegalityQuery &Query) {
1290 return Query.Types[0].getNumElements() <
1291 Query.Types[1].getNumElements();
1292 },
1293 changeTo(0, 1))
1294 .widenScalarOrEltToNextPow2OrMinSize(0, 8)
1295 .clampNumElements(0, v8s8, v16s8)
1296 .clampNumElements(0, v4s16, v8s16)
1297 .clampNumElements(0, v4s32, v4s32)
1298 .clampNumElements(0, v2s64, v2s64)
1300 .bitcastIf(isPointerVector(0), [=](const LegalityQuery &Query) {
1301 // Bitcast pointers vector to i64.
1302 const LLT DstTy = Query.Types[0];
1303 return std::pair(
1304 0, LLT::vector(DstTy.getElementCount(), LLT::integer(64)));
1305 });
1306
1307 getActionDefinitionsBuilder(G_CONCAT_VECTORS)
1308 .legalFor({{v16s8, v8s8}, {v8s16, v4s16}, {v4s32, v2s32}})
1309 .customIf([=](const LegalityQuery &Query) {
1310 return Query.Types[0].isFixedVector() &&
1311 Query.Types[0].getScalarSizeInBits() < 8;
1312 })
1313 .bitcastIf(
1314 [=](const LegalityQuery &Query) {
1315 return Query.Types[0].isFixedVector() &&
1316 Query.Types[1].isFixedVector() &&
1317 Query.Types[0].getScalarSizeInBits() >= 8 &&
1318 isPowerOf2_64(Query.Types[0].getScalarSizeInBits()) &&
1319 Query.Types[0].getSizeInBits() <= 128 &&
1320 Query.Types[1].getSizeInBits() <= 64;
1321 },
1322 [=](const LegalityQuery &Query) {
1323 const LLT DstTy = Query.Types[0];
1324 const LLT SrcTy = Query.Types[1];
1325 return std::pair(
1326 0, DstTy.changeElementSize(SrcTy.getSizeInBits())
1329 SrcTy.getNumElements())));
1330 });
1331
1332 getActionDefinitionsBuilder(G_EXTRACT_SUBVECTOR)
1333 .legalFor({{v8s8, v16s8}, {v4s16, v8s16}, {v2s32, v4s32}})
1335 .immIdx(0); // Inform verifier imm idx 0 is handled.
1336
1337 // TODO: {nxv16s8, s8}, {nxv8s16, s16}
1338 getActionDefinitionsBuilder(G_SPLAT_VECTOR)
1339 .legalFor(HasSVE, {{nxv4s32, s32}, {nxv2s64, s64}});
1340
1341 getActionDefinitionsBuilder(G_JUMP_TABLE).legalFor({p0});
1342
1343 getActionDefinitionsBuilder(G_BRJT).legalFor({{p0, s64}});
1344
1345 getActionDefinitionsBuilder({G_TRAP, G_DEBUGTRAP, G_UBSANTRAP}).alwaysLegal();
1346
1347 getActionDefinitionsBuilder(G_DYN_STACKALLOC).custom();
1348
1349 getActionDefinitionsBuilder({G_STACKSAVE, G_STACKRESTORE}).lower();
1350
1351 if (ST.hasMOPS()) {
1352 // G_BZERO is not supported. Currently it is only emitted by
1353 // PreLegalizerCombiner for G_MEMSET with zero constant.
1355
1357 .legalForCartesianProduct({p0}, {s64}, {s64})
1358 .customForCartesianProduct({p0}, {s8}, {s64})
1359 .immIdx(0); // Inform verifier imm idx 0 is handled.
1360
1361 getActionDefinitionsBuilder({G_MEMCPY, G_MEMMOVE})
1362 .legalForCartesianProduct({p0}, {p0}, {s64})
1363 .immIdx(0); // Inform verifier imm idx 0 is handled.
1364
1365 // G_MEMCPY_INLINE does not have a tailcall immediate
1366 getActionDefinitionsBuilder(G_MEMCPY_INLINE)
1367 .legalForCartesianProduct({p0}, {p0}, {s64});
1368
1369 } else {
1370 getActionDefinitionsBuilder({G_BZERO, G_MEMCPY, G_MEMMOVE, G_MEMSET})
1371 .libcall();
1372 }
1373
1374 // For fadd reductions we have pairwise operations available. We treat the
1375 // usual legal types as legal and handle the lowering to pairwise instructions
1376 // later.
1377 getActionDefinitionsBuilder(G_VECREDUCE_FADD)
1378 .legalFor({{f32, v2f32}, {f32, v4f32}, {f64, v2f64}})
1379 .legalFor(HasFP16, {{f16, v4f16}, {f16, v8f16}})
1380 .minScalarOrElt(0, MinFPScalar)
1381 .clampMaxNumElements(1, s64, 2)
1382 .clampMaxNumElements(1, s32, 4)
1383 .clampMaxNumElements(1, s16, 8)
1385 .scalarize(1)
1386 .lower();
1387
1388 // For fmul reductions we need to split up into individual operations. We
1389 // clamp to 128 bit vectors then to 64bit vectors to produce a cascade of
1390 // smaller types, followed by scalarizing what remains.
1391 getActionDefinitionsBuilder(G_VECREDUCE_FMUL)
1392 .minScalarOrElt(0, MinFPScalar)
1393 .clampMaxNumElements(1, s64, 2)
1394 .clampMaxNumElements(1, s32, 4)
1395 .clampMaxNumElements(1, s16, 8)
1396 .clampMaxNumElements(1, s32, 2)
1397 .clampMaxNumElements(1, s16, 4)
1398 .scalarize(1)
1399 .lower();
1400
1401 getActionDefinitionsBuilder({G_VECREDUCE_SEQ_FADD, G_VECREDUCE_SEQ_FMUL})
1402 .scalarize(2)
1403 .lower();
1404
1405 getActionDefinitionsBuilder(G_VECREDUCE_ADD)
1406 .legalFor({{i8, v8i8},
1407 {i8, v16i8},
1408 {i16, v4i16},
1409 {i16, v8i16},
1410 {i32, v2i32},
1411 {i32, v4i32},
1412 {i64, v2i64}})
1414 .clampMaxNumElements(1, s64, 2)
1415 .clampMaxNumElements(1, s32, 4)
1416 .clampMaxNumElements(1, s16, 8)
1417 .clampMaxNumElements(1, s8, 16)
1419 .scalarize(1);
1420
1421 getActionDefinitionsBuilder({G_VECREDUCE_FMIN, G_VECREDUCE_FMAX,
1422 G_VECREDUCE_FMINIMUM, G_VECREDUCE_FMAXIMUM})
1423 .legalFor({{f32, v2f32}, {f32, v4f32}, {f64, v2f64}})
1424 .legalFor(HasFP16, {{f16, v4f16}, {f16, v8f16}})
1425 .minScalarOrElt(0, MinFPScalar)
1426 .clampMaxNumElements(1, s64, 2)
1427 .clampMaxNumElements(1, s32, 4)
1428 .clampMaxNumElements(1, s16, 8)
1429 .scalarize(1)
1430 .lower();
1431
1432 getActionDefinitionsBuilder(G_VECREDUCE_MUL)
1433 .clampMaxNumElements(1, s32, 2)
1434 .clampMaxNumElements(1, s16, 4)
1435 .clampMaxNumElements(1, s8, 8)
1436 .scalarize(1)
1437 .lower();
1438
1440 {G_VECREDUCE_SMIN, G_VECREDUCE_SMAX, G_VECREDUCE_UMIN, G_VECREDUCE_UMAX})
1441 .legalFor({{i8, v8i8},
1442 {i8, v16i8},
1443 {i16, v4i16},
1444 {i16, v8i16},
1445 {i32, v2i32},
1446 {i32, v4i32}})
1447 .moreElementsIf(
1448 [=](const LegalityQuery &Query) {
1449 return Query.Types[1].isVector() &&
1450 Query.Types[1].getElementType() != s8 &&
1451 Query.Types[1].getNumElements() & 1;
1452 },
1454 .clampMaxNumElements(1, s64, 2)
1455 .clampMaxNumElements(1, s32, 4)
1456 .clampMaxNumElements(1, s16, 8)
1457 .clampMaxNumElements(1, s8, 16)
1458 .scalarize(1)
1459 .lower();
1460
1462 {G_VECREDUCE_OR, G_VECREDUCE_AND, G_VECREDUCE_XOR})
1463 // Try to break down into smaller vectors as long as they're at least 64
1464 // bits. This lets us use vector operations for some parts of the
1465 // reduction.
1466 .fewerElementsIf(
1467 [=](const LegalityQuery &Q) {
1468 LLT SrcTy = Q.Types[1];
1469 if (SrcTy.isScalar())
1470 return false;
1471 if (!isPowerOf2_32(SrcTy.getNumElements()))
1472 return false;
1473 // We can usually perform 64b vector operations.
1474 return SrcTy.getSizeInBits() > 64;
1475 },
1476 [=](const LegalityQuery &Q) {
1477 LLT SrcTy = Q.Types[1];
1478 return std::make_pair(1, SrcTy.divide(2));
1479 })
1480 .scalarize(1)
1481 .lower();
1482
1483 // TODO: Update this to correct handling when adding AArch64/SVE support.
1484 getActionDefinitionsBuilder(G_VECTOR_COMPRESS).lower();
1485
1486 // Access to floating-point environment.
1487 getActionDefinitionsBuilder({G_GET_FPENV, G_SET_FPENV, G_RESET_FPENV,
1488 G_GET_FPMODE, G_SET_FPMODE, G_RESET_FPMODE})
1489 .libcall();
1490
1491 getActionDefinitionsBuilder(G_IS_FPCLASS).lower();
1492
1493 getActionDefinitionsBuilder(G_PREFETCH).custom();
1494
1495 getActionDefinitionsBuilder({G_SCMP, G_UCMP}).lower();
1496
1498 verify(*ST.getInstrInfo());
1499}
1500
1503 LostDebugLocObserver &LocObserver) const {
1504 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1505 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
1506 GISelChangeObserver &Observer = Helper.Observer;
1507 switch (MI.getOpcode()) {
1508 default:
1509 // No idea what to do.
1510 return false;
1511 case TargetOpcode::G_VAARG:
1512 return legalizeVaArg(MI, MRI, MIRBuilder);
1513 case TargetOpcode::G_LOAD:
1514 case TargetOpcode::G_STORE:
1515 return legalizeLoadStore(MI, MRI, MIRBuilder, Observer);
1516 case TargetOpcode::G_SHL:
1517 case TargetOpcode::G_ASHR:
1518 case TargetOpcode::G_LSHR:
1519 return legalizeShlAshrLshr(MI, MRI, MIRBuilder, Observer);
1520 case TargetOpcode::G_GLOBAL_VALUE:
1521 return legalizeSmallCMGlobalValue(MI, MRI, MIRBuilder, Observer);
1522 case TargetOpcode::G_SBFX:
1523 case TargetOpcode::G_UBFX:
1524 return legalizeBitfieldExtract(MI, MRI, Helper);
1525 case TargetOpcode::G_FSHL:
1526 case TargetOpcode::G_FSHR:
1527 return legalizeFunnelShift(MI, MRI, MIRBuilder, Observer, Helper);
1528 case TargetOpcode::G_ROTR:
1529 return legalizeRotate(MI, MRI, Helper);
1530 case TargetOpcode::G_CTPOP:
1531 return legalizeCTPOP(MI, MRI, Helper);
1532 case TargetOpcode::G_ATOMIC_CMPXCHG:
1533 return legalizeAtomicCmpxchg128(MI, MRI, Helper);
1534 case TargetOpcode::G_CTTZ:
1535 return legalizeCTTZ(MI, Helper);
1536 case TargetOpcode::G_BZERO:
1537 case TargetOpcode::G_MEMCPY:
1538 case TargetOpcode::G_MEMMOVE:
1539 case TargetOpcode::G_MEMSET:
1540 return legalizeMemOps(MI, Helper);
1541 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1542 return legalizeExtractVectorElt(MI, MRI, Helper);
1543 case TargetOpcode::G_DYN_STACKALLOC:
1544 return legalizeDynStackAlloc(MI, Helper);
1545 case TargetOpcode::G_PREFETCH:
1546 return legalizePrefetch(MI, Helper);
1547 case TargetOpcode::G_ABS:
1548 return Helper.lowerAbsToCNeg(MI);
1549 case TargetOpcode::G_ICMP:
1550 return legalizeICMP(MI, MRI, MIRBuilder);
1551 case TargetOpcode::G_BITCAST:
1552 return legalizeBitcast(MI, Helper);
1553 case TargetOpcode::G_CONCAT_VECTORS:
1554 return legalizeConcatVectors(MI, MRI, MIRBuilder);
1555 case TargetOpcode::G_FPTRUNC:
1556 // In order to lower f16 to f64 properly, we need to use f32 as an
1557 // intermediary
1558 return legalizeFptrunc(MI, MIRBuilder, MRI);
1559 }
1560
1561 llvm_unreachable("expected switch to return");
1562}
1563
1564bool AArch64LegalizerInfo::legalizeBitcast(MachineInstr &MI,
1565 LegalizerHelper &Helper) const {
1566 assert(MI.getOpcode() == TargetOpcode::G_BITCAST && "Unexpected opcode");
1567 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
1568 // We're trying to handle casts from i1 vectors to scalars but reloading from
1569 // stack.
1570 if (!DstTy.isScalar() || !SrcTy.isVector() ||
1571 SrcTy.getElementType() != LLT::scalar(1))
1572 return false;
1573
1574 Helper.createStackStoreLoad(DstReg, SrcReg);
1575 MI.eraseFromParent();
1576 return true;
1577}
1578
1579bool AArch64LegalizerInfo::legalizeFunnelShift(MachineInstr &MI,
1581 MachineIRBuilder &MIRBuilder,
1582 GISelChangeObserver &Observer,
1583 LegalizerHelper &Helper) const {
1584 assert(MI.getOpcode() == TargetOpcode::G_FSHL ||
1585 MI.getOpcode() == TargetOpcode::G_FSHR);
1586
1587 // Keep as G_FSHR if shift amount is a G_CONSTANT, else use generic
1588 // lowering
1589 Register ShiftNo = MI.getOperand(3).getReg();
1590 LLT ShiftTy = MRI.getType(ShiftNo);
1591 auto VRegAndVal = getIConstantVRegValWithLookThrough(ShiftNo, MRI);
1592
1593 // Adjust shift amount according to Opcode (FSHL/FSHR)
1594 // Convert FSHL to FSHR
1595 LLT OperationTy = MRI.getType(MI.getOperand(0).getReg());
1596 APInt BitWidth(ShiftTy.getSizeInBits(), OperationTy.getSizeInBits(), false);
1597
1598 // Lower non-constant shifts and leave zero shifts to the optimizer.
1599 if (!VRegAndVal || VRegAndVal->Value.urem(BitWidth) == 0)
1600 return (Helper.lowerFunnelShiftAsShifts(MI) ==
1602
1603 APInt Amount = VRegAndVal->Value.urem(BitWidth);
1604
1605 Amount = MI.getOpcode() == TargetOpcode::G_FSHL ? BitWidth - Amount : Amount;
1606
1607 // If the instruction is G_FSHR, has a 64-bit G_CONSTANT for shift amount
1608 // in the range of 0 <-> BitWidth, it is legal
1609 if (ShiftTy.getSizeInBits() == 64 && MI.getOpcode() == TargetOpcode::G_FSHR &&
1610 VRegAndVal->Value.ult(BitWidth))
1611 return true;
1612
1613 // Cast the ShiftNumber to a 64-bit type
1614 auto Cast64 = MIRBuilder.buildConstant(LLT::integer(64), Amount.zext(64));
1615
1616 if (MI.getOpcode() == TargetOpcode::G_FSHR) {
1617 Observer.changingInstr(MI);
1618 MI.getOperand(3).setReg(Cast64.getReg(0));
1619 Observer.changedInstr(MI);
1620 }
1621 // If Opcode is FSHL, remove the FSHL instruction and create a FSHR
1622 // instruction
1623 else if (MI.getOpcode() == TargetOpcode::G_FSHL) {
1624 MIRBuilder.buildInstr(TargetOpcode::G_FSHR, {MI.getOperand(0).getReg()},
1625 {MI.getOperand(1).getReg(), MI.getOperand(2).getReg(),
1626 Cast64.getReg(0)});
1627 MI.eraseFromParent();
1628 }
1629 return true;
1630}
1631
1632bool AArch64LegalizerInfo::legalizeICMP(MachineInstr &MI,
1634 MachineIRBuilder &MIRBuilder) const {
1635 Register DstReg = MI.getOperand(0).getReg();
1636 Register SrcReg1 = MI.getOperand(2).getReg();
1637 Register SrcReg2 = MI.getOperand(3).getReg();
1638 LLT DstTy = MRI.getType(DstReg);
1639 LLT SrcTy = MRI.getType(SrcReg1);
1640
1641 // Check the vector types are legal
1642 if (DstTy.getScalarSizeInBits() != SrcTy.getScalarSizeInBits() ||
1643 DstTy.getNumElements() != SrcTy.getNumElements() ||
1644 (DstTy.getSizeInBits() != 64 && DstTy.getSizeInBits() != 128))
1645 return false;
1646
1647 // Lowers G_ICMP NE => G_ICMP EQ to allow better pattern matching for
1648 // following passes
1649 CmpInst::Predicate Pred = (CmpInst::Predicate)MI.getOperand(1).getPredicate();
1650 if (Pred != CmpInst::ICMP_NE)
1651 return true;
1652 Register CmpReg =
1653 MIRBuilder
1654 .buildICmp(CmpInst::ICMP_EQ, MRI.getType(DstReg), SrcReg1, SrcReg2)
1655 .getReg(0);
1656 MIRBuilder.buildNot(DstReg, CmpReg);
1657
1658 MI.eraseFromParent();
1659 return true;
1660}
1661
1662bool AArch64LegalizerInfo::legalizeRotate(MachineInstr &MI,
1664 LegalizerHelper &Helper) const {
1665 // To allow for imported patterns to match, we ensure that the rotate amount
1666 // is 64b with an extension.
1667 Register AmtReg = MI.getOperand(2).getReg();
1668 LLT AmtTy = MRI.getType(AmtReg);
1669 (void)AmtTy;
1670 assert(AmtTy.isScalar() && "Expected a scalar rotate");
1671 assert(AmtTy.getSizeInBits() < 64 && "Expected this rotate to be legal");
1672 auto NewAmt = Helper.MIRBuilder.buildZExt(LLT::integer(64), AmtReg);
1673 Helper.Observer.changingInstr(MI);
1674 MI.getOperand(2).setReg(NewAmt.getReg(0));
1675 Helper.Observer.changedInstr(MI);
1676 return true;
1677}
1678
1679bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(
1681 GISelChangeObserver &Observer) const {
1682 assert(MI.getOpcode() == TargetOpcode::G_GLOBAL_VALUE);
1683 // We do this custom legalization to convert G_GLOBAL_VALUE into target ADRP +
1684 // G_ADD_LOW instructions.
1685 // By splitting this here, we can optimize accesses in the small code model by
1686 // folding in the G_ADD_LOW into the load/store offset.
1687 auto &GlobalOp = MI.getOperand(1);
1688 // Don't modify an intrinsic call.
1689 if (GlobalOp.isSymbol())
1690 return true;
1691 const auto* GV = GlobalOp.getGlobal();
1692 if (GV->isThreadLocal())
1693 return true; // Don't want to modify TLS vars.
1694
1695 auto &TM = ST->getTargetLowering()->getTargetMachine();
1696 unsigned OpFlags = ST->ClassifyGlobalReference(GV, TM);
1697
1698 if (OpFlags & AArch64II::MO_GOT)
1699 return true;
1700
1701 auto Offset = GlobalOp.getOffset();
1702 Register DstReg = MI.getOperand(0).getReg();
1703 auto ADRP = MIRBuilder.buildInstr(AArch64::ADRP, {LLT::pointer(0, 64)}, {})
1704 .addGlobalAddress(GV, Offset, OpFlags | AArch64II::MO_PAGE);
1705 // Set the regclass on the dest reg too.
1706 MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass);
1707
1708 // MO_TAGGED on the page indicates a tagged address. Set the tag now. We do so
1709 // by creating a MOVK that sets bits 48-63 of the register to (global address
1710 // + 0x100000000 - PC) >> 48. The additional 0x100000000 offset here is to
1711 // prevent an incorrect tag being generated during relocation when the
1712 // global appears before the code section. Without the offset, a global at
1713 // `0x0f00'0000'0000'1000` (i.e. at `0x1000` with tag `0xf`) that's referenced
1714 // by code at `0x2000` would result in `0x0f00'0000'0000'1000 - 0x2000 =
1715 // 0x0eff'ffff'ffff'f000`, meaning the tag would be incorrectly set to `0xe`
1716 // instead of `0xf`.
1717 // This assumes that we're in the small code model so we can assume a binary
1718 // size of <= 4GB, which makes the untagged PC relative offset positive. The
1719 // binary must also be loaded into address range [0, 2^48). Both of these
1720 // properties need to be ensured at runtime when using tagged addresses.
1721 if (OpFlags & AArch64II::MO_TAGGED) {
1722 assert(!Offset &&
1723 "Should not have folded in an offset for a tagged global!");
1724 ADRP = MIRBuilder.buildInstr(AArch64::MOVKXi, {LLT::pointer(0, 64)}, {ADRP})
1725 .addGlobalAddress(GV, 0x100000000,
1727 .addImm(48);
1728 MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass);
1729 }
1730
1731 MIRBuilder.buildInstr(AArch64::G_ADD_LOW, {DstReg}, {ADRP})
1732 .addGlobalAddress(GV, Offset,
1734 MI.eraseFromParent();
1735 return true;
1736}
1737
1739 MachineInstr &MI) const {
1740 MachineIRBuilder &MIB = Helper.MIRBuilder;
1741 MachineRegisterInfo &MRI = *MIB.getMRI();
1742
1743 auto LowerUnaryOp = [&MI, &MIB](unsigned Opcode) {
1744 MIB.buildInstr(Opcode, {MI.getOperand(0)}, {MI.getOperand(2)});
1745 MI.eraseFromParent();
1746 return true;
1747 };
1748 auto LowerBinOp = [&MI, &MIB](unsigned Opcode) {
1749 MIB.buildInstr(Opcode, {MI.getOperand(0)},
1750 {MI.getOperand(2), MI.getOperand(3)});
1751 MI.eraseFromParent();
1752 return true;
1753 };
1754 auto LowerTriOp = [&MI, &MIB](unsigned Opcode) {
1755 MIB.buildInstr(Opcode, {MI.getOperand(0)},
1756 {MI.getOperand(2), MI.getOperand(3), MI.getOperand(4)});
1757 MI.eraseFromParent();
1758 return true;
1759 };
1760
1761 Intrinsic::ID IntrinsicID = cast<GIntrinsic>(MI).getIntrinsicID();
1762 switch (IntrinsicID) {
1763 case Intrinsic::vacopy: {
1764 unsigned PtrSize = ST->isTargetILP32() ? 4 : 8;
1765 unsigned VaListSize =
1766 (ST->isTargetDarwin() || ST->isTargetWindows())
1767 ? PtrSize
1768 : ST->isTargetILP32() ? 20 : 32;
1769
1770 MachineFunction &MF = *MI.getMF();
1772 LLT::scalar(VaListSize * 8));
1773 MIB.buildLoad(Val, MI.getOperand(2),
1776 VaListSize, Align(PtrSize)));
1777 MIB.buildStore(Val, MI.getOperand(1),
1780 VaListSize, Align(PtrSize)));
1781 MI.eraseFromParent();
1782 return true;
1783 }
1784 case Intrinsic::get_dynamic_area_offset: {
1785 MIB.buildConstant(MI.getOperand(0).getReg(), 0);
1786 MI.eraseFromParent();
1787 return true;
1788 }
1789 case Intrinsic::aarch64_mops_memset_tag: {
1790 assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
1791 // Anyext the value being set to 64 bit (only the bottom 8 bits are read by
1792 // the instruction).
1793 auto &Value = MI.getOperand(3);
1794 Register ExtValueReg = MIB.buildAnyExt(LLT::integer(64), Value).getReg(0);
1795 Value.setReg(ExtValueReg);
1796 return true;
1797 }
1798 case Intrinsic::aarch64_prefetch: {
1799 auto &AddrVal = MI.getOperand(1);
1800
1801 int64_t IsWrite = MI.getOperand(2).getImm();
1802 int64_t Target = MI.getOperand(3).getImm();
1803 int64_t IsStream = MI.getOperand(4).getImm();
1804 int64_t IsData = MI.getOperand(5).getImm();
1805
1806 unsigned PrfOp = (IsWrite << 4) | // Load/Store bit
1807 (!IsData << 3) | // IsDataCache bit
1808 (Target << 1) | // Cache level bits
1809 (unsigned)IsStream; // Stream bit
1810
1811 MIB.buildInstr(AArch64::G_AARCH64_PREFETCH).addImm(PrfOp).add(AddrVal);
1812 MI.eraseFromParent();
1813 return true;
1814 }
1815 case Intrinsic::aarch64_range_prefetch: {
1816 auto &AddrVal = MI.getOperand(1);
1817
1818 int64_t IsWrite = MI.getOperand(2).getImm();
1819 int64_t IsStream = MI.getOperand(3).getImm();
1820 unsigned PrfOp = (IsStream << 2) | IsWrite;
1821
1822 MIB.buildInstr(AArch64::G_AARCH64_RANGE_PREFETCH)
1823 .addImm(PrfOp)
1824 .add(AddrVal)
1825 .addUse(MI.getOperand(4).getReg()); // Metadata
1826 MI.eraseFromParent();
1827 return true;
1828 }
1829 case Intrinsic::aarch64_prefetch_ir: {
1830 auto &AddrVal = MI.getOperand(1);
1831 MIB.buildInstr(AArch64::G_AARCH64_PREFETCH).addImm(24).add(AddrVal);
1832 MI.eraseFromParent();
1833 return true;
1834 }
1835 case Intrinsic::aarch64_neon_uaddv:
1836 case Intrinsic::aarch64_neon_saddv:
1837 case Intrinsic::aarch64_neon_umaxv:
1838 case Intrinsic::aarch64_neon_smaxv:
1839 case Intrinsic::aarch64_neon_uminv:
1840 case Intrinsic::aarch64_neon_sminv: {
1841 bool IsSigned = IntrinsicID == Intrinsic::aarch64_neon_saddv ||
1842 IntrinsicID == Intrinsic::aarch64_neon_smaxv ||
1843 IntrinsicID == Intrinsic::aarch64_neon_sminv;
1844
1845 auto OldDst = MI.getOperand(0).getReg();
1846 auto OldDstTy = MRI.getType(OldDst);
1847 LLT NewDstTy = MRI.getType(MI.getOperand(2).getReg()).getElementType();
1848 if (OldDstTy == NewDstTy)
1849 return true;
1850
1851 auto NewDst = MRI.createGenericVirtualRegister(NewDstTy);
1852
1853 Helper.Observer.changingInstr(MI);
1854 MI.getOperand(0).setReg(NewDst);
1855 Helper.Observer.changedInstr(MI);
1856
1857 MIB.setInsertPt(MIB.getMBB(), ++MIB.getInsertPt());
1858 MIB.buildExtOrTrunc(IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT,
1859 OldDst, NewDst);
1860
1861 return true;
1862 }
1863 case Intrinsic::aarch64_neon_uaddlp:
1864 case Intrinsic::aarch64_neon_saddlp: {
1865 unsigned Opc = IntrinsicID == Intrinsic::aarch64_neon_uaddlp
1866 ? AArch64::G_UADDLP
1867 : AArch64::G_SADDLP;
1868 MIB.buildInstr(Opc, {MI.getOperand(0)}, {MI.getOperand(2)});
1869 MI.eraseFromParent();
1870
1871 return true;
1872 }
1873 case Intrinsic::aarch64_neon_uaddlv:
1874 case Intrinsic::aarch64_neon_saddlv: {
1875 unsigned Opc = IntrinsicID == Intrinsic::aarch64_neon_uaddlv
1876 ? AArch64::G_UADDLV
1877 : AArch64::G_SADDLV;
1878 Register DstReg = MI.getOperand(0).getReg();
1879 Register SrcReg = MI.getOperand(2).getReg();
1880 LLT DstTy = MRI.getType(DstReg);
1881
1882 LLT MidTy, ExtTy;
1883 if (DstTy.isScalar() && DstTy.getScalarSizeInBits() <= 32) {
1884 ExtTy = LLT::integer(32);
1885 MidTy = LLT::fixed_vector(4, ExtTy);
1886 } else {
1887 ExtTy = LLT::integer(64);
1888 MidTy = LLT::fixed_vector(2, ExtTy);
1889 }
1890
1891 Register MidReg =
1892 MIB.buildInstr(Opc, {MidTy}, {SrcReg})->getOperand(0).getReg();
1893 Register ZeroReg =
1894 MIB.buildConstant(LLT::integer(64), 0)->getOperand(0).getReg();
1895 Register ExtReg = MIB.buildInstr(AArch64::G_EXTRACT_VECTOR_ELT, {ExtTy},
1896 {MidReg, ZeroReg})
1897 .getReg(0);
1898
1899 if (DstTy.getScalarSizeInBits() < 32)
1900 MIB.buildTrunc(DstReg, ExtReg);
1901 else
1902 MIB.buildCopy(DstReg, ExtReg);
1903
1904 MI.eraseFromParent();
1905
1906 return true;
1907 }
1908 case Intrinsic::aarch64_neon_smax:
1909 return LowerBinOp(TargetOpcode::G_SMAX);
1910 case Intrinsic::aarch64_neon_smin:
1911 return LowerBinOp(TargetOpcode::G_SMIN);
1912 case Intrinsic::aarch64_neon_umax:
1913 return LowerBinOp(TargetOpcode::G_UMAX);
1914 case Intrinsic::aarch64_neon_umin:
1915 return LowerBinOp(TargetOpcode::G_UMIN);
1916 case Intrinsic::aarch64_neon_fmax:
1917 return LowerBinOp(TargetOpcode::G_FMAXIMUM);
1918 case Intrinsic::aarch64_neon_fmin:
1919 return LowerBinOp(TargetOpcode::G_FMINIMUM);
1920 case Intrinsic::aarch64_neon_fmaxnm:
1921 return LowerBinOp(TargetOpcode::G_FMAXNUM);
1922 case Intrinsic::aarch64_neon_fminnm:
1923 return LowerBinOp(TargetOpcode::G_FMINNUM);
1924 case Intrinsic::aarch64_neon_pmull:
1925 case Intrinsic::aarch64_neon_pmull64:
1926 return LowerBinOp(AArch64::G_PMULL);
1927 case Intrinsic::aarch64_neon_smull:
1928 return LowerBinOp(AArch64::G_SMULL);
1929 case Intrinsic::aarch64_neon_umull:
1930 return LowerBinOp(AArch64::G_UMULL);
1931 case Intrinsic::aarch64_neon_sabd:
1932 return LowerBinOp(TargetOpcode::G_ABDS);
1933 case Intrinsic::aarch64_neon_uabd:
1934 return LowerBinOp(TargetOpcode::G_ABDU);
1935 case Intrinsic::aarch64_neon_uhadd:
1936 return LowerBinOp(TargetOpcode::G_UAVGFLOOR);
1937 case Intrinsic::aarch64_neon_urhadd:
1938 return LowerBinOp(TargetOpcode::G_UAVGCEIL);
1939 case Intrinsic::aarch64_neon_shadd:
1940 return LowerBinOp(TargetOpcode::G_SAVGFLOOR);
1941 case Intrinsic::aarch64_neon_srhadd:
1942 return LowerBinOp(TargetOpcode::G_SAVGCEIL);
1943 case Intrinsic::aarch64_neon_sqshrn: {
1944 if (!MRI.getType(MI.getOperand(0).getReg()).isVector())
1945 return true;
1946 // Create right shift instruction. Store the output register in Shr.
1947 auto Shr = MIB.buildInstr(AArch64::G_VASHR,
1948 {MRI.getType(MI.getOperand(2).getReg())},
1949 {MI.getOperand(2), MI.getOperand(3).getImm()});
1950 // Build the narrow intrinsic, taking in Shr.
1951 MIB.buildInstr(TargetOpcode::G_TRUNC_SSAT_S, {MI.getOperand(0)}, {Shr});
1952 MI.eraseFromParent();
1953 return true;
1954 }
1955 case Intrinsic::aarch64_neon_sqshrun: {
1956 if (!MRI.getType(MI.getOperand(0).getReg()).isVector())
1957 return true;
1958 // Create right shift instruction. Store the output register in Shr.
1959 auto Shr = MIB.buildInstr(AArch64::G_VASHR,
1960 {MRI.getType(MI.getOperand(2).getReg())},
1961 {MI.getOperand(2), MI.getOperand(3).getImm()});
1962 // Build the narrow intrinsic, taking in Shr.
1963 MIB.buildInstr(TargetOpcode::G_TRUNC_SSAT_U, {MI.getOperand(0)}, {Shr});
1964 MI.eraseFromParent();
1965 return true;
1966 }
1967 case Intrinsic::aarch64_neon_sqrshrn: {
1968 if (!MRI.getType(MI.getOperand(0).getReg()).isVector())
1969 return true;
1970 // Create right shift instruction. Store the output register in Shr.
1971 auto Shr = MIB.buildInstr(AArch64::G_SRSHR_I,
1972 {MRI.getType(MI.getOperand(2).getReg())},
1973 {MI.getOperand(2), MI.getOperand(3).getImm()});
1974 // Build the narrow intrinsic, taking in Shr.
1975 MIB.buildInstr(TargetOpcode::G_TRUNC_SSAT_S, {MI.getOperand(0)}, {Shr});
1976 MI.eraseFromParent();
1977 return true;
1978 }
1979 case Intrinsic::aarch64_neon_sqrshrun: {
1980 if (!MRI.getType(MI.getOperand(0).getReg()).isVector())
1981 return true;
1982 // Create right shift instruction. Store the output register in Shr.
1983 auto Shr = MIB.buildInstr(AArch64::G_SRSHR_I,
1984 {MRI.getType(MI.getOperand(2).getReg())},
1985 {MI.getOperand(2), MI.getOperand(3).getImm()});
1986 // Build the narrow intrinsic, taking in Shr.
1987 MIB.buildInstr(TargetOpcode::G_TRUNC_SSAT_U, {MI.getOperand(0)}, {Shr});
1988 MI.eraseFromParent();
1989 return true;
1990 }
1991 case Intrinsic::aarch64_neon_uqrshrn: {
1992 if (!MRI.getType(MI.getOperand(0).getReg()).isVector())
1993 return true;
1994 // Create right shift instruction. Store the output register in Shr.
1995 auto Shr = MIB.buildInstr(AArch64::G_URSHR_I,
1996 {MRI.getType(MI.getOperand(2).getReg())},
1997 {MI.getOperand(2), MI.getOperand(3).getImm()});
1998 // Build the narrow intrinsic, taking in Shr.
1999 MIB.buildInstr(TargetOpcode::G_TRUNC_USAT_U, {MI.getOperand(0)}, {Shr});
2000 MI.eraseFromParent();
2001 return true;
2002 }
2003 case Intrinsic::aarch64_neon_uqshrn: {
2004 if (!MRI.getType(MI.getOperand(0).getReg()).isVector())
2005 return true;
2006 // Create right shift instruction. Store the output register in Shr.
2007 auto Shr = MIB.buildInstr(AArch64::G_VLSHR,
2008 {MRI.getType(MI.getOperand(2).getReg())},
2009 {MI.getOperand(2), MI.getOperand(3).getImm()});
2010 // Build the narrow intrinsic, taking in Shr.
2011 MIB.buildInstr(TargetOpcode::G_TRUNC_USAT_U, {MI.getOperand(0)}, {Shr});
2012 MI.eraseFromParent();
2013 return true;
2014 }
2015 case Intrinsic::aarch64_neon_sqshlu: {
2016 // Check if last operand is constant vector dup
2017 auto ShiftAmount = isConstantOrConstantSplatVector(
2018 *MRI.getVRegDef(MI.getOperand(3).getReg()), MRI);
2019 if (ShiftAmount) {
2020 // If so, create a new intrinsic with the correct shift amount
2021 MIB.buildInstr(AArch64::G_SQSHLU_I, {MI.getOperand(0)},
2022 {MI.getOperand(2)})
2023 .addImm(ShiftAmount->getSExtValue());
2024 MI.eraseFromParent();
2025 return true;
2026 }
2027 return false;
2028 }
2029 case Intrinsic::aarch64_neon_vsli: {
2030 MIB.buildInstr(
2031 AArch64::G_SLI, {MI.getOperand(0)},
2032 {MI.getOperand(2), MI.getOperand(3), MI.getOperand(4).getImm()});
2033 MI.eraseFromParent();
2034 break;
2035 }
2036 case Intrinsic::aarch64_neon_vsri: {
2037 MIB.buildInstr(
2038 AArch64::G_SRI, {MI.getOperand(0)},
2039 {MI.getOperand(2), MI.getOperand(3), MI.getOperand(4).getImm()});
2040 MI.eraseFromParent();
2041 break;
2042 }
2043 case Intrinsic::aarch64_neon_abs: {
2044 // Lower the intrinsic to G_ABS.
2045 MIB.buildInstr(TargetOpcode::G_ABS, {MI.getOperand(0)}, {MI.getOperand(2)});
2046 MI.eraseFromParent();
2047 return true;
2048 }
2049 case Intrinsic::aarch64_neon_sqadd: {
2050 if (MRI.getType(MI.getOperand(0).getReg()).isVector())
2051 return LowerBinOp(TargetOpcode::G_SADDSAT);
2052 break;
2053 }
2054 case Intrinsic::aarch64_neon_sqsub: {
2055 if (MRI.getType(MI.getOperand(0).getReg()).isVector())
2056 return LowerBinOp(TargetOpcode::G_SSUBSAT);
2057 break;
2058 }
2059 case Intrinsic::aarch64_neon_uqadd: {
2060 if (MRI.getType(MI.getOperand(0).getReg()).isVector())
2061 return LowerBinOp(TargetOpcode::G_UADDSAT);
2062 break;
2063 }
2064 case Intrinsic::aarch64_neon_uqsub: {
2065 if (MRI.getType(MI.getOperand(0).getReg()).isVector())
2066 return LowerBinOp(TargetOpcode::G_USUBSAT);
2067 break;
2068 }
2069 case Intrinsic::aarch64_neon_udot:
2070 return LowerTriOp(AArch64::G_UDOT);
2071 case Intrinsic::aarch64_neon_sdot:
2072 return LowerTriOp(AArch64::G_SDOT);
2073 case Intrinsic::aarch64_neon_usdot:
2074 return LowerTriOp(AArch64::G_USDOT);
2075 case Intrinsic::aarch64_neon_sqxtn:
2076 return LowerUnaryOp(TargetOpcode::G_TRUNC_SSAT_S);
2077 case Intrinsic::aarch64_neon_sqxtun:
2078 return LowerUnaryOp(TargetOpcode::G_TRUNC_SSAT_U);
2079 case Intrinsic::aarch64_neon_uqxtn:
2080 return LowerUnaryOp(TargetOpcode::G_TRUNC_USAT_U);
2081 case Intrinsic::aarch64_neon_fcvtzu:
2082 return LowerUnaryOp(TargetOpcode::G_FPTOUI_SAT);
2083 case Intrinsic::aarch64_neon_fcvtzs:
2084 return LowerUnaryOp(TargetOpcode::G_FPTOSI_SAT);
2085
2086 case Intrinsic::vector_reverse:
2087 // TODO: Add support for vector_reverse
2088 return false;
2089 }
2090
2091 return true;
2092}
2093
2094bool AArch64LegalizerInfo::legalizeShlAshrLshr(
2096 GISelChangeObserver &Observer) const {
2097 assert(MI.getOpcode() == TargetOpcode::G_ASHR ||
2098 MI.getOpcode() == TargetOpcode::G_LSHR ||
2099 MI.getOpcode() == TargetOpcode::G_SHL);
2100 // If the shift amount is a G_CONSTANT, promote it to a 64 bit type so the
2101 // imported patterns can select it later. Either way, it will be legal.
2102 Register AmtReg = MI.getOperand(2).getReg();
2103 LLT AmtRegEltTy = MRI.getType(AmtReg).getScalarType();
2104 auto VRegAndVal = getIConstantVRegValWithLookThrough(AmtReg, MRI);
2105 if (!VRegAndVal)
2106 return true;
2107 // Check the shift amount is in range for an immediate form.
2108 int64_t Amount = VRegAndVal->Value.getSExtValue();
2109 if (Amount > 31)
2110 return true; // This will have to remain a register variant.
2111 auto ExtCst =
2112 MIRBuilder.buildConstant(AmtRegEltTy.changeElementSize(64), Amount);
2113 Observer.changingInstr(MI);
2114 MI.getOperand(2).setReg(ExtCst.getReg(0));
2115 Observer.changedInstr(MI);
2116 return true;
2117}
2118
2120 MachineRegisterInfo &MRI) {
2121 Base = Root;
2122 Offset = 0;
2123
2124 Register NewBase;
2125 int64_t NewOffset;
2126 if (mi_match(Root, MRI, m_GPtrAdd(m_Reg(NewBase), m_ICst(NewOffset))) &&
2127 isShiftedInt<7, 3>(NewOffset)) {
2128 Base = NewBase;
2129 Offset = NewOffset;
2130 }
2131}
2132
2133// FIXME: This should be removed and replaced with the generic bitcast legalize
2134// action.
2135bool AArch64LegalizerInfo::legalizeLoadStore(
2137 GISelChangeObserver &Observer) const {
2138 assert(MI.getOpcode() == TargetOpcode::G_STORE ||
2139 MI.getOpcode() == TargetOpcode::G_LOAD);
2140 // Here we just try to handle vector loads/stores where our value type might
2141 // have pointer elements, which the SelectionDAG importer can't handle. To
2142 // allow the existing patterns for s64 to fire for p0, we just try to bitcast
2143 // the value to use s64 types.
2144
2145 // Custom legalization requires the instruction, if not deleted, must be fully
2146 // legalized. In order to allow further legalization of the inst, we create
2147 // a new instruction and erase the existing one.
2148
2149 Register ValReg = MI.getOperand(0).getReg();
2150 const LLT ValTy = MRI.getType(ValReg);
2151
2152 if (ValTy == LLT::scalar(128)) {
2153
2154 AtomicOrdering Ordering = (*MI.memoperands_begin())->getSuccessOrdering();
2155 bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD;
2156 bool IsLoadAcquire = IsLoad && Ordering == AtomicOrdering::Acquire;
2157 bool IsStoreRelease = !IsLoad && Ordering == AtomicOrdering::Release;
2158 bool IsRcpC3 =
2159 ST->hasLSE2() && ST->hasRCPC3() && (IsLoadAcquire || IsStoreRelease);
2160
2161 LLT s64 = LLT::integer(64);
2162
2163 unsigned Opcode;
2164 if (IsRcpC3) {
2165 Opcode = IsLoad ? AArch64::LDIAPPX : AArch64::STILPX;
2166 } else {
2167 // For LSE2, loads/stores should have been converted to monotonic and had
2168 // a fence inserted after them.
2169 assert(Ordering == AtomicOrdering::Monotonic ||
2170 Ordering == AtomicOrdering::Unordered);
2171 assert(ST->hasLSE2() && "ldp/stp not single copy atomic without +lse2");
2172
2173 Opcode = IsLoad ? AArch64::LDPXi : AArch64::STPXi;
2174 }
2175
2176 MachineInstrBuilder NewI;
2177 if (IsLoad) {
2178 NewI = MIRBuilder.buildInstr(Opcode, {s64, s64}, {});
2179 MIRBuilder.buildMergeLikeInstr(
2180 ValReg, {NewI->getOperand(0), NewI->getOperand(1)});
2181 } else {
2182 auto Split = MIRBuilder.buildUnmerge(s64, MI.getOperand(0));
2183 NewI = MIRBuilder.buildInstr(
2184 Opcode, {}, {Split->getOperand(0), Split->getOperand(1)});
2185 }
2186
2187 if (IsRcpC3) {
2188 NewI.addUse(MI.getOperand(1).getReg());
2189 } else {
2190 Register Base;
2191 int Offset;
2192 matchLDPSTPAddrMode(MI.getOperand(1).getReg(), Base, Offset, MRI);
2193 NewI.addUse(Base);
2194 NewI.addImm(Offset / 8);
2195 }
2196
2197 NewI.cloneMemRefs(MI);
2198 constrainSelectedInstRegOperands(*NewI, *ST->getInstrInfo(),
2199 *MRI.getTargetRegisterInfo(),
2200 *ST->getRegBankInfo());
2201 MI.eraseFromParent();
2202 return true;
2203 }
2204
2205 if (!ValTy.isPointerVector() ||
2206 ValTy.getElementType().getAddressSpace() != 0) {
2207 LLVM_DEBUG(dbgs() << "Tried to do custom legalization on wrong load/store");
2208 return false;
2209 }
2210
2211 unsigned PtrSize = ValTy.getElementType().getSizeInBits();
2212 const LLT NewTy = LLT::vector(ValTy.getElementCount(), LLT::integer(PtrSize));
2213 auto &MMO = **MI.memoperands_begin();
2214 MMO.setType(NewTy);
2215
2216 if (MI.getOpcode() == TargetOpcode::G_STORE) {
2217 auto Bitcast = MIRBuilder.buildBitcast(NewTy, ValReg);
2218 MIRBuilder.buildStore(Bitcast.getReg(0), MI.getOperand(1), MMO);
2219 } else {
2220 auto NewLoad = MIRBuilder.buildLoad(NewTy, MI.getOperand(1), MMO);
2221 MIRBuilder.buildBitcast(ValReg, NewLoad);
2222 }
2223 MI.eraseFromParent();
2224 return true;
2225}
2226
2227bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI,
2229 MachineIRBuilder &MIRBuilder) const {
2230 MachineFunction &MF = MIRBuilder.getMF();
2231 Align Alignment(MI.getOperand(2).getImm());
2232 Register Dst = MI.getOperand(0).getReg();
2233 Register ListPtr = MI.getOperand(1).getReg();
2234
2235 LLT PtrTy = MRI.getType(ListPtr);
2236 LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
2237
2238 const unsigned PtrSize = PtrTy.getSizeInBits() / 8;
2239 const Align PtrAlign = Align(PtrSize);
2240 auto List = MIRBuilder.buildLoad(
2241 PtrTy, ListPtr,
2242 *MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOLoad,
2243 PtrTy, PtrAlign));
2244
2245 MachineInstrBuilder DstPtr;
2246 if (Alignment > PtrAlign) {
2247 // Realign the list to the actual required alignment.
2248 auto AlignMinus1 =
2249 MIRBuilder.buildConstant(IntPtrTy, Alignment.value() - 1);
2250 auto ListTmp = MIRBuilder.buildPtrAdd(PtrTy, List, AlignMinus1.getReg(0));
2251 DstPtr = MIRBuilder.buildMaskLowPtrBits(PtrTy, ListTmp, Log2(Alignment));
2252 } else
2253 DstPtr = List;
2254
2255 LLT ValTy = MRI.getType(Dst);
2256 uint64_t ValSize = ValTy.getSizeInBits() / 8;
2257 MIRBuilder.buildLoad(
2258 Dst, DstPtr,
2259 *MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOLoad,
2260 ValTy, std::max(Alignment, PtrAlign)));
2261
2262 auto Size = MIRBuilder.buildConstant(IntPtrTy, alignTo(ValSize, PtrAlign));
2263
2264 auto NewList = MIRBuilder.buildPtrAdd(PtrTy, DstPtr, Size.getReg(0));
2265
2266 MIRBuilder.buildStore(NewList, ListPtr,
2267 *MF.getMachineMemOperand(MachinePointerInfo(),
2269 PtrTy, PtrAlign));
2270
2271 MI.eraseFromParent();
2272 return true;
2273}
2274
2275bool AArch64LegalizerInfo::legalizeBitfieldExtract(
2276 MachineInstr &MI, MachineRegisterInfo &MRI, LegalizerHelper &Helper) const {
2277 // Only legal if we can select immediate forms.
2278 // TODO: Lower this otherwise.
2279 return getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI) &&
2280 getIConstantVRegValWithLookThrough(MI.getOperand(3).getReg(), MRI);
2281}
2282
2283bool AArch64LegalizerInfo::legalizeCTPOP(MachineInstr &MI,
2285 LegalizerHelper &Helper) const {
2286 // When there is no integer popcount instruction (FEAT_CSSC isn't available),
2287 // it can be more efficiently lowered to the following sequence that uses
2288 // AdvSIMD registers/instructions as long as the copies to/from the AdvSIMD
2289 // registers are cheap.
2290 // FMOV D0, X0 // copy 64-bit int to vector, high bits zero'd
2291 // CNT V0.8B, V0.8B // 8xbyte pop-counts
2292 // ADDV B0, V0.8B // sum 8xbyte pop-counts
2293 // UMOV X0, V0.B[0] // copy byte result back to integer reg
2294 //
2295 // For 128 bit vector popcounts, we lower to the following sequence:
2296 // cnt.16b v0, v0 // v8s16, v4s32, v2s64
2297 // uaddlp.8h v0, v0 // v8s16, v4s32, v2s64
2298 // uaddlp.4s v0, v0 // v4s32, v2s64
2299 // uaddlp.2d v0, v0 // v2s64
2300 //
2301 // For 64 bit vector popcounts, we lower to the following sequence:
2302 // cnt.8b v0, v0 // v4s16, v2s32
2303 // uaddlp.4h v0, v0 // v4s16, v2s32
2304 // uaddlp.2s v0, v0 // v2s32
2305
2306 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2307 Register Dst = MI.getOperand(0).getReg();
2308 Register Val = MI.getOperand(1).getReg();
2309 LLT Ty = MRI.getType(Val);
2310
2311 LLT i64 = LLT::integer(64);
2312 LLT i32 = LLT::integer(32);
2313 LLT i16 = LLT::integer(16);
2314 LLT i8 = LLT::integer(8);
2315 unsigned Size = Ty.getSizeInBits();
2316
2317 assert(Ty == MRI.getType(Dst) &&
2318 "Expected src and dst to have the same type!");
2319
2320 if (ST->hasCSSC() && Ty.isScalar() && Size == 128) {
2321
2322 auto Split = MIRBuilder.buildUnmerge(i64, Val);
2323 auto CTPOP1 = MIRBuilder.buildCTPOP(i64, Split->getOperand(0));
2324 auto CTPOP2 = MIRBuilder.buildCTPOP(i64, Split->getOperand(1));
2325 auto Add = MIRBuilder.buildAdd(i64, CTPOP1, CTPOP2);
2326
2327 MIRBuilder.buildZExt(Dst, Add);
2328 MI.eraseFromParent();
2329 return true;
2330 }
2331
2332 if (!ST->hasNEON() ||
2333 MI.getMF()->getFunction().hasFnAttribute(Attribute::NoImplicitFloat)) {
2334 // Use generic lowering when custom lowering is not possible.
2335 return Ty.isScalar() && (Size == 32 || Size == 64) &&
2336 Helper.lowerBitCount(MI) ==
2338 }
2339
2340 // Pre-conditioning: widen Val up to the nearest vector type.
2341 // s32,s64,v4s16,v2s32 -> v8i8
2342 // v8s16,v4s32,v2s64 -> v16i8
2343 LLT VTy = Size == 128 ? LLT::fixed_vector(16, i8) : LLT::fixed_vector(8, i8);
2344 if (Ty.isScalar()) {
2345 assert((Size == 32 || Size == 64 || Size == 128) && "Expected only 32, 64, or 128 bit scalars!");
2346 if (Size == 32) {
2347 Val = MIRBuilder.buildZExt(i64, Val).getReg(0);
2348 }
2349 }
2350 Val = MIRBuilder.buildBitcast(VTy, Val).getReg(0);
2351
2352 // Count bits in each byte-sized lane.
2353 auto CTPOP = MIRBuilder.buildCTPOP(VTy, Val);
2354
2355 // Sum across lanes.
2356 if (ST->hasDotProd() && Ty.isVector() && Ty.getNumElements() >= 2 &&
2357 Ty.getScalarSizeInBits() != 16) {
2358 LLT Dt = Ty == LLT::fixed_vector(2, i64) ? LLT::fixed_vector(4, i32) : Ty;
2359 auto Zeros = MIRBuilder.buildConstant(Dt, 0);
2360 auto Ones = MIRBuilder.buildConstant(VTy, 1);
2361 MachineInstrBuilder Sum;
2362
2363 if (Ty == LLT::fixed_vector(2, i64)) {
2364 auto UDOT =
2365 MIRBuilder.buildInstr(AArch64::G_UDOT, {Dt}, {Zeros, Ones, CTPOP});
2366 Sum = MIRBuilder.buildInstr(AArch64::G_UADDLP, {Ty}, {UDOT});
2367 } else if (Ty == LLT::fixed_vector(4, i32)) {
2368 Sum = MIRBuilder.buildInstr(AArch64::G_UDOT, {Dt}, {Zeros, Ones, CTPOP});
2369 } else if (Ty == LLT::fixed_vector(2, i32)) {
2370 Sum = MIRBuilder.buildInstr(AArch64::G_UDOT, {Dt}, {Zeros, Ones, CTPOP});
2371 } else {
2372 llvm_unreachable("unexpected vector shape");
2373 }
2374
2375 Sum->getOperand(0).setReg(Dst);
2376 MI.eraseFromParent();
2377 return true;
2378 }
2379
2380 Register HSum = CTPOP.getReg(0);
2381 unsigned Opc;
2382 SmallVector<LLT> HAddTys;
2383 if (Ty.isScalar()) {
2384 Opc = Intrinsic::aarch64_neon_uaddlv;
2385 HAddTys.push_back(i32);
2386 } else if (Ty == LLT::fixed_vector(8, i16)) {
2387 Opc = Intrinsic::aarch64_neon_uaddlp;
2388 HAddTys.push_back(LLT::fixed_vector(8, i16));
2389 } else if (Ty == LLT::fixed_vector(4, i32)) {
2390 Opc = Intrinsic::aarch64_neon_uaddlp;
2391 HAddTys.push_back(LLT::fixed_vector(8, i16));
2392 HAddTys.push_back(LLT::fixed_vector(4, i32));
2393 } else if (Ty == LLT::fixed_vector(2, i64)) {
2394 Opc = Intrinsic::aarch64_neon_uaddlp;
2395 HAddTys.push_back(LLT::fixed_vector(8, i16));
2396 HAddTys.push_back(LLT::fixed_vector(4, i32));
2397 HAddTys.push_back(LLT::fixed_vector(2, i64));
2398 } else if (Ty == LLT::fixed_vector(4, i16)) {
2399 Opc = Intrinsic::aarch64_neon_uaddlp;
2400 HAddTys.push_back(LLT::fixed_vector(4, i16));
2401 } else if (Ty == LLT::fixed_vector(2, i32)) {
2402 Opc = Intrinsic::aarch64_neon_uaddlp;
2403 HAddTys.push_back(LLT::fixed_vector(4, i16));
2404 HAddTys.push_back(LLT::fixed_vector(2, i32));
2405 } else
2406 llvm_unreachable("unexpected vector shape");
2408 for (LLT HTy : HAddTys) {
2409 UADD = MIRBuilder.buildIntrinsic(Opc, {HTy}).addUse(HSum);
2410 HSum = UADD.getReg(0);
2411 }
2412
2413 // Post-conditioning.
2414 if (Ty.isScalar() && (Size == 64 || Size == 128))
2415 MIRBuilder.buildZExt(Dst, UADD);
2416 else
2417 UADD->getOperand(0).setReg(Dst);
2418 MI.eraseFromParent();
2419 return true;
2420}
2421
2422bool AArch64LegalizerInfo::legalizeAtomicCmpxchg128(
2423 MachineInstr &MI, MachineRegisterInfo &MRI, LegalizerHelper &Helper) const {
2424 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2425 LLT i64 = LLT::integer(64);
2426 auto Addr = MI.getOperand(1).getReg();
2427 auto DesiredI = MIRBuilder.buildUnmerge({i64, i64}, MI.getOperand(2));
2428 auto NewI = MIRBuilder.buildUnmerge({i64, i64}, MI.getOperand(3));
2429 auto DstLo = MRI.createGenericVirtualRegister(i64);
2430 auto DstHi = MRI.createGenericVirtualRegister(i64);
2431
2432 MachineInstrBuilder CAS;
2433 if (ST->hasLSE()) {
2434 // We have 128-bit CASP instructions taking XSeqPair registers, which are
2435 // s128. We need the merge/unmerge to bracket the expansion and pair up with
2436 // the rest of the MIR so we must reassemble the extracted registers into a
2437 // 128-bit known-regclass one with code like this:
2438 //
2439 // %in1 = REG_SEQUENCE Lo, Hi ; One for each input
2440 // %out = CASP %in1, ...
2441 // %OldLo = G_EXTRACT %out, 0
2442 // %OldHi = G_EXTRACT %out, 64
2443 auto Ordering = (*MI.memoperands_begin())->getMergedOrdering();
2444 unsigned Opcode;
2445 switch (Ordering) {
2447 Opcode = AArch64::CASPAX;
2448 break;
2450 Opcode = AArch64::CASPLX;
2451 break;
2454 Opcode = AArch64::CASPALX;
2455 break;
2456 default:
2457 Opcode = AArch64::CASPX;
2458 break;
2459 }
2460
2461 LLT s128 = LLT::scalar(128);
2462 auto CASDst = MRI.createGenericVirtualRegister(s128);
2463 auto CASDesired = MRI.createGenericVirtualRegister(s128);
2464 auto CASNew = MRI.createGenericVirtualRegister(s128);
2465 MIRBuilder.buildInstr(TargetOpcode::REG_SEQUENCE, {CASDesired}, {})
2466 .addUse(DesiredI->getOperand(0).getReg())
2467 .addImm(AArch64::sube64)
2468 .addUse(DesiredI->getOperand(1).getReg())
2469 .addImm(AArch64::subo64);
2470 MIRBuilder.buildInstr(TargetOpcode::REG_SEQUENCE, {CASNew}, {})
2471 .addUse(NewI->getOperand(0).getReg())
2472 .addImm(AArch64::sube64)
2473 .addUse(NewI->getOperand(1).getReg())
2474 .addImm(AArch64::subo64);
2475
2476 CAS = MIRBuilder.buildInstr(Opcode, {CASDst}, {CASDesired, CASNew, Addr});
2477
2478 MIRBuilder.buildExtract({DstLo}, {CASDst}, 0);
2479 MIRBuilder.buildExtract({DstHi}, {CASDst}, 64);
2480 } else {
2481 // The -O0 CMP_SWAP_128 is friendlier to generate code for because LDXP/STXP
2482 // can take arbitrary registers so it just has the normal GPR64 operands the
2483 // rest of AArch64 is expecting.
2484 auto Ordering = (*MI.memoperands_begin())->getMergedOrdering();
2485 unsigned Opcode;
2486 switch (Ordering) {
2488 Opcode = AArch64::CMP_SWAP_128_ACQUIRE;
2489 break;
2491 Opcode = AArch64::CMP_SWAP_128_RELEASE;
2492 break;
2495 Opcode = AArch64::CMP_SWAP_128;
2496 break;
2497 default:
2498 Opcode = AArch64::CMP_SWAP_128_MONOTONIC;
2499 break;
2500 }
2501
2502 auto Scratch = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2503 CAS = MIRBuilder.buildInstr(Opcode, {DstLo, DstHi, Scratch},
2504 {Addr, DesiredI->getOperand(0),
2505 DesiredI->getOperand(1), NewI->getOperand(0),
2506 NewI->getOperand(1)});
2507 }
2508
2509 CAS.cloneMemRefs(MI);
2510 constrainSelectedInstRegOperands(*CAS, *ST->getInstrInfo(),
2511 *MRI.getTargetRegisterInfo(),
2512 *ST->getRegBankInfo());
2513
2514 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {DstLo, DstHi});
2515 MI.eraseFromParent();
2516 return true;
2517}
2518
2519bool AArch64LegalizerInfo::legalizeCTTZ(MachineInstr &MI,
2520 LegalizerHelper &Helper) const {
2521 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2522 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
2523 LLT Ty = MRI.getType(MI.getOperand(1).getReg());
2524 auto BitReverse = MIRBuilder.buildBitReverse(Ty, MI.getOperand(1));
2525 MIRBuilder.buildCTLZ(MI.getOperand(0).getReg(), BitReverse);
2526 MI.eraseFromParent();
2527 return true;
2528}
2529
2530bool AArch64LegalizerInfo::legalizeMemOps(MachineInstr &MI,
2531 LegalizerHelper &Helper) const {
2532 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2533
2534 // Tagged version MOPSMemorySetTagged is legalised in legalizeIntrinsic
2535 if (MI.getOpcode() == TargetOpcode::G_MEMSET) {
2536 // Anyext the value being set to 64 bit (only the bottom 8 bits are read by
2537 // the instruction).
2538 auto &Value = MI.getOperand(1);
2539 Register ExtValueReg =
2540 MIRBuilder.buildAnyExt(LLT::integer(64), Value).getReg(0);
2541 Value.setReg(ExtValueReg);
2542 return true;
2543 }
2544
2545 return false;
2546}
2547
2548bool AArch64LegalizerInfo::legalizeExtractVectorElt(
2549 MachineInstr &MI, MachineRegisterInfo &MRI, LegalizerHelper &Helper) const {
2550 const GExtractVectorElement *Element = cast<GExtractVectorElement>(&MI);
2551 auto VRegAndVal =
2553 if (VRegAndVal)
2554 return true;
2555 LLT VecTy = MRI.getType(Element->getVectorReg());
2556 if (VecTy.isScalableVector())
2557 return true;
2558 return Helper.lowerExtractInsertVectorElt(MI) !=
2560}
2561
2562bool AArch64LegalizerInfo::legalizeDynStackAlloc(
2563 MachineInstr &MI, LegalizerHelper &Helper) const {
2564 MachineFunction &MF = *MI.getParent()->getParent();
2565 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2566 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
2567
2568 // If stack probing is not enabled for this function, use the default
2569 // lowering.
2570 if (!MF.getFunction().hasFnAttribute("probe-stack") ||
2571 MF.getFunction().getFnAttribute("probe-stack").getValueAsString() !=
2572 "inline-asm") {
2573 Helper.lowerDynStackAlloc(MI);
2574 return true;
2575 }
2576
2577 Register Dst = MI.getOperand(0).getReg();
2578 Register AllocSize = MI.getOperand(1).getReg();
2579 Align Alignment = assumeAligned(MI.getOperand(2).getImm());
2580
2581 assert(MRI.getType(Dst) == LLT::pointer(0, 64) &&
2582 "Unexpected type for dynamic alloca");
2583 assert(MRI.getType(AllocSize) == LLT::scalar(64) &&
2584 "Unexpected type for dynamic alloca");
2585
2586 LLT PtrTy = MRI.getType(Dst);
2587 Register SPReg =
2589 Register SPTmp =
2590 Helper.getDynStackAllocTargetPtr(SPReg, AllocSize, Alignment, PtrTy);
2591 auto NewMI =
2592 MIRBuilder.buildInstr(AArch64::PROBED_STACKALLOC_DYN, {}, {SPTmp});
2593 MRI.setRegClass(NewMI.getReg(0), &AArch64::GPR64commonRegClass);
2594 MIRBuilder.setInsertPt(*NewMI->getParent(), NewMI);
2595 MIRBuilder.buildCopy(Dst, SPTmp);
2596
2597 MI.eraseFromParent();
2598 return true;
2599}
2600
2601bool AArch64LegalizerInfo::legalizePrefetch(MachineInstr &MI,
2602 LegalizerHelper &Helper) const {
2603 MachineIRBuilder &MIB = Helper.MIRBuilder;
2604 auto &AddrVal = MI.getOperand(0);
2605
2606 int64_t IsWrite = MI.getOperand(1).getImm();
2607 int64_t Locality = MI.getOperand(2).getImm();
2608 int64_t IsData = MI.getOperand(3).getImm();
2609
2610 bool IsStream = Locality == 0;
2611 if (Locality != 0) {
2612 assert(Locality <= 3 && "Prefetch locality out-of-range");
2613 // The locality degree is the opposite of the cache speed.
2614 // Put the number the other way around.
2615 // The encoding starts at 0 for level 1
2616 Locality = 3 - Locality;
2617 }
2618
2619 unsigned PrfOp = (IsWrite << 4) | (!IsData << 3) | (Locality << 1) | IsStream;
2620
2621 MIB.buildInstr(AArch64::G_AARCH64_PREFETCH).addImm(PrfOp).add(AddrVal);
2622 MI.eraseFromParent();
2623 return true;
2624}
2625
2626bool AArch64LegalizerInfo::legalizeConcatVectors(
2628 MachineIRBuilder &MIRBuilder) const {
2629 // Widen sub-byte element vectors to byte-sized elements before concatenating.
2630 // This is analogous to SDAG's integer type promotion for sub-byte types.
2632 Register DstReg = Concat.getReg(0);
2633 LLT DstTy = MRI.getType(DstReg);
2634 assert(DstTy.getScalarSizeInBits() < 8 && "Expected dst ty to be < 8b");
2635
2636 unsigned WideEltSize =
2637 std::max(8u, (unsigned)PowerOf2Ceil(DstTy.getScalarSizeInBits()));
2638 LLT SrcTy = MRI.getType(Concat.getSourceReg(0));
2639 LLT WideSrcTy = SrcTy.changeElementSize(WideEltSize);
2640 LLT WideDstTy = DstTy.changeElementSize(WideEltSize);
2641
2642 SmallVector<Register> WideSrcs;
2643 for (unsigned I = 0; I < Concat.getNumSources(); ++I) {
2644 auto Wide = MIRBuilder.buildAnyExt(WideSrcTy, Concat.getSourceReg(I));
2645 WideSrcs.push_back(Wide.getReg(0));
2646 }
2647
2648 auto WideConcat = MIRBuilder.buildConcatVectors(WideDstTy, WideSrcs);
2649 MIRBuilder.buildTrunc(DstReg, WideConcat);
2650 MI.eraseFromParent();
2651 return true;
2652}
2653
2654bool AArch64LegalizerInfo::legalizeFptrunc(MachineInstr &MI,
2655 MachineIRBuilder &MIRBuilder,
2656 MachineRegisterInfo &MRI) const {
2657 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
2658 assert(SrcTy.isFixedVector() && isPowerOf2_32(SrcTy.getNumElements()) &&
2659 "Expected a power of 2 elements");
2660
2661 // We must mutate types here as FPTrunc may be used on a IEEE floating point
2662 // or a brainfloat.
2663 LLT v2s16 = DstTy.changeElementCount(2);
2664 LLT v4s16 = DstTy.changeElementCount(4);
2665 LLT v2s32 = SrcTy.changeElementCount(2).changeElementSize(32);
2666 LLT v4s32 = SrcTy.changeElementCount(4).changeElementSize(32);
2667 LLT v2s64 = SrcTy.changeElementCount(2);
2668
2669 SmallVector<Register> RegsToUnmergeTo;
2670 SmallVector<Register> TruncOddDstRegs;
2671 SmallVector<Register> RegsToMerge;
2672
2673 unsigned ElemCount = SrcTy.getNumElements();
2674
2675 // Find the biggest size chunks we can work with
2676 int StepSize = ElemCount % 4 ? 2 : 4;
2677
2678 // If we have a power of 2 greater than 2, we need to first unmerge into
2679 // enough pieces
2680 if (ElemCount <= 2)
2681 RegsToUnmergeTo.push_back(Src);
2682 else {
2683 for (unsigned i = 0; i < ElemCount / 2; ++i)
2684 RegsToUnmergeTo.push_back(MRI.createGenericVirtualRegister(v2s64));
2685
2686 MIRBuilder.buildUnmerge(RegsToUnmergeTo, Src);
2687 }
2688
2689 // Create all of the round-to-odd instructions and store them
2690 for (auto SrcReg : RegsToUnmergeTo) {
2691 Register Mid =
2692 MIRBuilder.buildInstr(AArch64::G_FPTRUNC_ODD, {v2s32}, {SrcReg})
2693 .getReg(0);
2694 TruncOddDstRegs.push_back(Mid);
2695 }
2696
2697 // Truncate 4s32 to 4s16 if we can to reduce instruction count, otherwise
2698 // truncate 2s32 to 2s16.
2699 unsigned Index = 0;
2700 for (unsigned LoopIter = 0; LoopIter < ElemCount / StepSize; ++LoopIter) {
2701 if (StepSize == 4) {
2702 Register ConcatDst =
2703 MIRBuilder
2705 {v4s32}, {TruncOddDstRegs[Index++], TruncOddDstRegs[Index++]})
2706 .getReg(0);
2707
2708 RegsToMerge.push_back(
2709 MIRBuilder.buildFPTrunc(v4s16, ConcatDst).getReg(0));
2710 } else {
2711 RegsToMerge.push_back(
2712 MIRBuilder.buildFPTrunc(v2s16, TruncOddDstRegs[Index++]).getReg(0));
2713 }
2714 }
2715
2716 // If there is only one register, replace the destination
2717 if (RegsToMerge.size() == 1) {
2718 MRI.replaceRegWith(Dst, RegsToMerge.pop_back_val());
2719 MI.eraseFromParent();
2720 return true;
2721 }
2722
2723 // Merge the rest of the instructions & replace the register
2724 Register Fin = MIRBuilder.buildMergeLikeInstr(DstTy, RegsToMerge).getReg(0);
2725 MRI.replaceRegWith(Dst, Fin);
2726 MI.eraseFromParent();
2727 return true;
2728}
static void matchLDPSTPAddrMode(Register Root, Register &Base, int &Offset, MachineRegisterInfo &MRI)
This file declares the targeting of the Machinelegalizer class for AArch64.
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static Error unsupported(const char *Str, const Triple &T)
Definition MachO.cpp:71
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
IRTranslator LLVM IR MI
Interface for Targets to specify which operations they can successfully select and how the others sho...
#define I(x, y, z)
Definition MD5.cpp:57
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineIRBuilder class.
Promote Memory to Register
Definition Mem2Reg.cpp:110
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
ppc ctr loops verify
if(PassOpts->AAPipeline)
static constexpr MCPhysReg SPReg
This file contains some templates that are useful if you are working with the STL at all.
#define LLVM_DEBUG(...)
Definition Debug.h:114
static constexpr int Concat[]
bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI, LostDebugLocObserver &LocObserver) const override
Called for instructions with the Custom LegalizationAction.
bool legalizeIntrinsic(LegalizerHelper &Helper, MachineInstr &MI) const override
AArch64LegalizerInfo(const AArch64Subtarget &ST)
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1054
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1708
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1585
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ ICMP_NE
not equal
Definition InstrTypes.h:698
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition Function.cpp:763
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:728
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
static constexpr LLT float64()
Get a 64-bit IEEE double value.
LLT changeElementCount(ElementCount EC) const
Return a vector or scalar with the same element type and the new element count.
constexpr bool isScalableVector() const
Returns true if the LLT is a scalable vector.
constexpr unsigned getScalarSizeInBits() const
constexpr bool isScalar() const
static constexpr LLT scalable_vector(unsigned MinNumElements, unsigned ScalarSizeInBits)
Get a low-level scalable vector of some number of elements and element width.
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
LLT getScalarType() const
constexpr bool isPointerVector() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
static constexpr LLT float128()
Get a 128-bit IEEE quad value.
constexpr bool isVector() const
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr ElementCount getElementCount() const
static constexpr LLT float16()
Get a 16-bit IEEE half value.
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
constexpr bool isFixedVector() const
Returns true if the LLT is a fixed vector.
static LLT integer(unsigned SizeInBits)
static constexpr LLT bfloat16()
LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
static constexpr LLT float32()
Get a 32-bit IEEE float value.
LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
LLVM_ABI void computeTables()
Compute any ancillary tables needed to quickly decide how an operation should be handled.
LegalizeRuleSet & minScalar(unsigned TypeIdx, const LLT Ty)
Ensure the scalar is at least as wide as Ty.
LegalizeRuleSet & widenScalarOrEltToNextPow2OrMinSize(unsigned TypeIdx, unsigned MinSize=0)
Widen the scalar or vector element type to the next power of two that is at least MinSize.
LegalizeRuleSet & legalFor(std::initializer_list< LLT > Types)
The instruction is legal when type index 0 is any type in the given list.
LegalizeRuleSet & maxScalarEltSameAsIf(LegalityPredicate Predicate, unsigned TypeIdx, unsigned SmallTypeIdx)
Conditionally narrow the scalar or elt to match the size of another.
LegalizeRuleSet & unsupported()
The instruction is unsupported.
LegalizeRuleSet & scalarSameSizeAs(unsigned TypeIdx, unsigned SameSizeIdx)
Change the type TypeIdx to have the same scalar size as type SameSizeIdx.
LegalizeRuleSet & bitcastIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
The specified type index is coerced if predicate is true.
LegalizeRuleSet & libcallFor(std::initializer_list< LLT > Types)
LegalizeRuleSet & maxScalar(unsigned TypeIdx, const LLT Ty)
Ensure the scalar is at most as wide as Ty.
LegalizeRuleSet & minScalarOrElt(unsigned TypeIdx, const LLT Ty)
Ensure the scalar or element is at least as wide as Ty.
LegalizeRuleSet & clampMaxNumElements(unsigned TypeIdx, const LLT EltTy, unsigned MaxElements)
Limit the number of elements in EltTy vectors to at most MaxElements.
LegalizeRuleSet & clampMinNumElements(unsigned TypeIdx, const LLT EltTy, unsigned MinElements)
Limit the number of elements in EltTy vectors to at least MinElements.
LegalizeRuleSet & widenVectorEltsToVectorMinSize(unsigned TypeIdx, unsigned VectorSize)
Ensure the vector size is at least as wide as VectorSize by promoting the element.
LegalizeRuleSet & lowerIfMemSizeNotPow2()
Lower a memory operation if the memory size, rounded to bytes, is not a power of 2.
LegalizeRuleSet & minScalarEltSameAsIf(LegalityPredicate Predicate, unsigned TypeIdx, unsigned LargeTypeIdx)
Conditionally widen the scalar or elt to match the size of another.
LegalizeRuleSet & customForCartesianProduct(std::initializer_list< LLT > Types)
LegalizeRuleSet & lowerIfMemSizeNotByteSizePow2()
Lower a memory operation if the memory access size is not a round power of 2 byte size.
LegalizeRuleSet & moreElementsToNextPow2(unsigned TypeIdx)
Add more elements to the vector to reach the next power of two.
LegalizeRuleSet & narrowScalarIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Narrow the scalar to the one selected by the mutation if the predicate is true.
LegalizeRuleSet & lower()
The instruction is lowered.
LegalizeRuleSet & moreElementsIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Add more elements to reach the type selected by the mutation if the predicate is true.
LegalizeRuleSet & lowerFor(std::initializer_list< LLT > Types)
The instruction is lowered when type index 0 is any type in the given list.
LegalizeRuleSet & scalarizeIf(LegalityPredicate Predicate, unsigned TypeIdx)
LegalizeRuleSet & lowerIf(LegalityPredicate Predicate)
The instruction is lowered if predicate is true.
LegalizeRuleSet & clampScalar(unsigned TypeIdx, const LLT MinTy, const LLT MaxTy)
Limit the range of scalar sizes to MinTy and MaxTy.
LegalizeRuleSet & custom()
Unconditionally custom lower.
LegalizeRuleSet & minScalarSameAs(unsigned TypeIdx, unsigned LargeTypeIdx)
Widen the scalar to match the size of another.
LegalizeRuleSet & unsupportedIf(LegalityPredicate Predicate)
LegalizeRuleSet & minScalarOrEltIf(LegalityPredicate Predicate, unsigned TypeIdx, const LLT Ty)
Ensure the scalar or element is at least as wide as Ty.
LegalizeRuleSet & widenScalarIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Widen the scalar to the one selected by the mutation if the predicate is true.
LegalizeRuleSet & alwaysLegal()
LegalizeRuleSet & clampNumElements(unsigned TypeIdx, const LLT MinTy, const LLT MaxTy)
Limit the number of elements for the given vectors to at least MinTy's number of elements and at most...
LegalizeRuleSet & maxScalarIf(LegalityPredicate Predicate, unsigned TypeIdx, const LLT Ty)
Conditionally limit the maximum size of the scalar.
LegalizeRuleSet & customIf(LegalityPredicate Predicate)
LegalizeRuleSet & widenScalarToNextPow2(unsigned TypeIdx, unsigned MinSize=0)
Widen the scalar to the next power of two that is at least MinSize.
LegalizeRuleSet & scalarize(unsigned TypeIdx)
LegalizeRuleSet & legalForCartesianProduct(std::initializer_list< LLT > Types)
The instruction is legal when type indexes 0 and 1 are both in the given list.
LegalizeRuleSet & legalForTypesWithMemDesc(std::initializer_list< LegalityPredicates::TypePairAndMemDesc > TypesAndMemDesc)
The instruction is legal when type indexes 0 and 1 along with the memory size and minimum alignment i...
unsigned immIdx(unsigned ImmIdx)
LegalizeRuleSet & widenScalarOrEltToNextPow2(unsigned TypeIdx, unsigned MinSize=0)
Widen the scalar or vector element type to the next power of two that is at least MinSize.
LegalizeRuleSet & legalIf(LegalityPredicate Predicate)
The instruction is legal if predicate is true.
LLVM_ABI LegalizeResult lowerDynStackAlloc(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerBitCount(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerExtractInsertVectorElt(MachineInstr &MI)
Lower a vector extract or insert by writing the vector to a stack temporary and reloading the element...
LLVM_ABI LegalizeResult lowerAbsToCNeg(MachineInstr &MI)
const TargetLowering & getTargetLowering() const
LLVM_ABI LegalizeResult lowerFunnelShiftAsShifts(MachineInstr &MI)
LLVM_ABI MachineInstrBuilder createStackStoreLoad(const DstOp &Res, const SrcOp &Val)
Create a store of Val to a stack temporary and return a load as the same type as Res.
@ Legalized
Instruction has been legalized and the MachineFunction changed.
@ UnableToLegalize
Some kind of error has occurred and we could not legalize this instruction.
GISelChangeObserver & Observer
To keep track of changes made by the LegalizerHelper.
LLVM_ABI Register getDynStackAllocTargetPtr(Register SPReg, Register AllocSize, Align Alignment, LLT PtrTy)
MachineIRBuilder & MIRBuilder
Expose MIRBuilder so clients can set their own RecordInsertInstruction functions.
LegalizeRuleSet & getActionDefinitionsBuilder(unsigned Opcode)
Get the action definition builder for the given opcode.
const LegacyLegalizerInfo & getLegacyLegalizerInfo() const
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Helper class to build MachineInstr.
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
MachineInstrBuilder buildAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ADD Op0, Op1.
MachineInstrBuilder buildNot(const DstOp &Dst, const SrcOp &Src0)
Build and insert a bitwise not, NegOne = G_CONSTANT -1 Res = G_OR Op0, NegOne.
MachineInstrBuilder buildUnmerge(ArrayRef< LLT > Res, const SrcOp &Op)
Build and insert Res0, ... = G_UNMERGE_VALUES Op.
MachineInstrBuilder buildExtract(const DstOp &Res, const SrcOp &Src, uint64_t Index)
Build and insert Res0, ... = G_EXTRACT Src, Idx0.
MachineInstrBuilder buildICmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert a Res = G_ICMP Pred, Op0, Op1.
MachineBasicBlock::iterator getInsertPt()
Current insertion point for new instructions.
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ZEXT Op.
MachineInstrBuilder buildConcatVectors(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_CONCAT_VECTORS Op0, ...
MachineInstrBuilder buildIntrinsic(Intrinsic::ID ID, ArrayRef< Register > Res, bool HasSideEffects, bool isConvergent)
Build and insert a G_INTRINSIC instruction.
MachineInstrBuilder buildCTLZ(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ Op0, Src0.
MachineInstrBuilder buildMergeLikeInstr(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ... or Res = G_BUILD_VECTOR Op0, ... or Res = G_CONCAT_VEC...
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_PTR_ADD Op0, Op1.
MachineInstrBuilder buildBitReverse(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITREVERSE Src.
MachineInstrBuilder buildStore(const SrcOp &Val, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert G_STORE Val, Addr, MMO.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildCTPOP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTPOP Op0, Src0.
MachineFunction & getMF()
Getter for the function we currently build.
MachineInstrBuilder buildExtOrTrunc(unsigned ExtOpc, const DstOp &Res, const SrcOp &Op)
Build and insert Res = ExtOpc, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes of...
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_TRUNC Op.
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
MachineInstrBuilder buildAnyExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ANYEXT Op0.
MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITCAST Src.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildFPTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FPTRUNC Op.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
MachineInstrBuilder buildMaskLowPtrBits(const DstOp &Res, const SrcOp &Op0, uint32_t NumBits)
Build and insert Res = G_PTRMASK Op0, G_CONSTANT (1 << NumBits) - 1.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addUse(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
Representation of each machine instruction.
const MachineOperand & getOperand(unsigned i) const
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
LLVM_ABI void setRegClass(Register Reg, const TargetRegisterClass *RC)
setRegClass - Set the register class of the specified virtual register.
LLVM_ABI Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
const TargetRegisterInfo * getTargetRegisterInfo() const
LLVM_ABI void replaceRegWith(Register FromReg, Register ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
Primary interface to the complete machine description for the target machine.
CodeModel::Model getCodeModel() const
Returns the code model.
Target - Wrapper for Target specific information.
LLVM Value Representation.
Definition Value.h:75
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition TypeSize.h:252
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_PREL
MO_PREL - Indicates that the bits of the symbol operand represented by MO_G0 etc are PC relative.
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_TAGGED
MO_TAGGED - With MO_PAGE, indicates that the page includes a memory tag in bits 56-63.
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
LLVM_ABI LegalityPredicate scalarOrEltWiderThan(unsigned TypeIdx, unsigned Size)
True iff the specified type index is a scalar or a vector with an element type that's wider than the ...
LLVM_ABI LegalityPredicate isPointerVector(unsigned TypeIdx)
True iff the specified type index is a vector of pointers (with any address space).
LLVM_ABI LegalityPredicate typeInSet(unsigned TypeIdx, std::initializer_list< LLT > TypesInit)
True iff the given type index is one of the specified types.
LLVM_ABI LegalityPredicate smallerThan(unsigned TypeIdx0, unsigned TypeIdx1)
True iff the first type index has a smaller total bit size than second type index.
LLVM_ABI LegalityPredicate atomicOrderingAtLeastOrStrongerThan(unsigned MMOIdx, AtomicOrdering Ordering)
True iff the specified MMO index has at an atomic ordering of at Ordering or stronger.
Predicate any(Predicate P0, Predicate P1)
True iff P0 or P1 are true.
LLVM_ABI LegalityPredicate isVector(unsigned TypeIdx)
True iff the specified type index is a vector.
Predicate all(Predicate P0, Predicate P1)
True iff P0 and P1 are true.
LLVM_ABI LegalityPredicate typeIs(unsigned TypeIdx, LLT TypesInit)
True iff the given type index is the specified type.
LLVM_ABI LegalityPredicate scalarWiderThan(unsigned TypeIdx, unsigned Size)
True iff the specified type index is a scalar that's wider than the given size.
LLVM_ABI LegalityPredicate scalarNarrowerThan(unsigned TypeIdx, unsigned Size)
True iff the specified type index is a scalar that's narrower than the given size.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
LLVM_ABI LegalizeMutation moreElementsToNextPow2(unsigned TypeIdx, unsigned Min=0)
Add more elements to the type for the given type index to the next power of.
LLVM_ABI LegalizeMutation scalarize(unsigned TypeIdx)
Break up the vector type for the given type index into the element type.
LLVM_ABI LegalizeMutation changeElementTo(unsigned TypeIdx, unsigned FromTypeIdx)
Keep the same scalar or element type as the given type index.
LLVM_ABI LegalizeMutation widenScalarOrEltToNextPow2(unsigned TypeIdx, unsigned Min=0)
Widen the scalar type or vector element type for the given type index to the next power of 2.
LLVM_ABI LegalizeMutation changeTo(unsigned TypeIdx, LLT Ty)
Select this specific type for the given type index.
LLVM_ABI LegalizeMutation changeElementSizeTo(unsigned TypeIdx, unsigned FromTypeIdx)
Change the scalar size or element size to have the same scalar size as type index FromIndex.
operand_type_match m_Reg()
ConstantMatch< APInt > m_ICst(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
Invariant opcodes: All instruction sets have these as their low opcodes.
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI void constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
Definition Utils.cpp:155
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
LLVM_ABI std::optional< APInt > isConstantOrConstantSplatVector(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a constant integer or a splat vector of constant integers.
Definition Utils.cpp:1506
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition MathExtras.h:385
std::function< bool(const LegalityQuery &)> LegalityPredicate
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
constexpr uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Add
Sum of integers.
DWARFExpression::Operation Op
constexpr bool isShiftedInt(int64_t x)
Checks if a signed integer is an N bit number shifted left by S.
Definition MathExtras.h:182
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition Utils.cpp:432
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1947
Align assumeAligned(uint64_t Value)
Treats the value 0 as a 1, so Align is always at least 1.
Definition Alignment.h:100
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition Alignment.h:197
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
ArrayRef< MemDesc > MMODescrs
Operations which require memory can use this to place requirements on the memory type for each MMO.
ArrayRef< LLT > Types
This class contains a discriminated union of information about pointers in memory operands,...