LLVM 20.0.0git
AArch64LegalizerInfo.cpp
Go to the documentation of this file.
1//===- AArch64LegalizerInfo.cpp ----------------------------------*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the Machinelegalizer class for
10/// AArch64.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
16#include "AArch64Subtarget.h"
17#include "llvm/ADT/STLExtras.h"
29#include "llvm/IR/Intrinsics.h"
30#include "llvm/IR/IntrinsicsAArch64.h"
31#include "llvm/IR/Type.h"
33#include <initializer_list>
34
35#define DEBUG_TYPE "aarch64-legalinfo"
36
37using namespace llvm;
38using namespace LegalizeActions;
39using namespace LegalizeMutations;
40using namespace LegalityPredicates;
41using namespace MIPatternMatch;
42
44 : ST(&ST) {
45 using namespace TargetOpcode;
46 const LLT p0 = LLT::pointer(0, 64);
47 const LLT s8 = LLT::scalar(8);
48 const LLT s16 = LLT::scalar(16);
49 const LLT s32 = LLT::scalar(32);
50 const LLT s64 = LLT::scalar(64);
51 const LLT s128 = LLT::scalar(128);
52 const LLT v16s8 = LLT::fixed_vector(16, 8);
53 const LLT v8s8 = LLT::fixed_vector(8, 8);
54 const LLT v4s8 = LLT::fixed_vector(4, 8);
55 const LLT v2s8 = LLT::fixed_vector(2, 8);
56 const LLT v8s16 = LLT::fixed_vector(8, 16);
57 const LLT v4s16 = LLT::fixed_vector(4, 16);
58 const LLT v2s16 = LLT::fixed_vector(2, 16);
59 const LLT v2s32 = LLT::fixed_vector(2, 32);
60 const LLT v4s32 = LLT::fixed_vector(4, 32);
61 const LLT v2s64 = LLT::fixed_vector(2, 64);
62 const LLT v2p0 = LLT::fixed_vector(2, p0);
63
64 const LLT nxv16s8 = LLT::scalable_vector(16, s8);
65 const LLT nxv8s16 = LLT::scalable_vector(8, s16);
66 const LLT nxv4s32 = LLT::scalable_vector(4, s32);
67 const LLT nxv2s64 = LLT::scalable_vector(2, s64);
68
69 std::initializer_list<LLT> PackedVectorAllTypeList = {/* Begin 128bit types */
70 v16s8, v8s16, v4s32,
71 v2s64, v2p0,
72 /* End 128bit types */
73 /* Begin 64bit types */
74 v8s8, v4s16, v2s32};
75 std::initializer_list<LLT> ScalarAndPtrTypesList = {s8, s16, s32, s64, p0};
76 SmallVector<LLT, 8> PackedVectorAllTypesVec(PackedVectorAllTypeList);
77 SmallVector<LLT, 8> ScalarAndPtrTypesVec(ScalarAndPtrTypesList);
78
79 const TargetMachine &TM = ST.getTargetLowering()->getTargetMachine();
80
81 // FIXME: support subtargets which have neon/fp-armv8 disabled.
82 if (!ST.hasNEON() || !ST.hasFPARMv8()) {
84 return;
85 }
86
87 // Some instructions only support s16 if the subtarget has full 16-bit FP
88 // support.
89 const bool HasFP16 = ST.hasFullFP16();
90 const LLT &MinFPScalar = HasFP16 ? s16 : s32;
91
92 const bool HasCSSC = ST.hasCSSC();
93 const bool HasRCPC3 = ST.hasRCPC3();
94
96 {G_IMPLICIT_DEF, G_FREEZE, G_CONSTANT_FOLD_BARRIER})
97 .legalFor({p0, s8, s16, s32, s64})
98 .legalFor(PackedVectorAllTypeList)
100 .clampScalar(0, s8, s64)
103 .clampNumElements(0, v8s8, v16s8)
104 .clampNumElements(0, v4s16, v8s16)
105 .clampNumElements(0, v2s32, v4s32)
106 .clampNumElements(0, v2s64, v2s64);
107
109 .legalFor({p0, s16, s32, s64})
110 .legalFor(PackedVectorAllTypeList)
112 .clampScalar(0, s16, s64)
113 // Maximum: sN * k = 128
114 .clampMaxNumElements(0, s8, 16)
115 .clampMaxNumElements(0, s16, 8)
116 .clampMaxNumElements(0, s32, 4)
117 .clampMaxNumElements(0, s64, 2)
118 .clampMaxNumElements(0, p0, 2);
119
121 .legalFor({s32, s64, v4s16, v8s16, v2s32, v4s32, v2s64})
123 .clampScalar(0, s32, s64)
124 .clampNumElements(0, v4s16, v8s16)
125 .clampNumElements(0, v2s32, v4s32)
126 .clampNumElements(0, v2s64, v2s64)
127 .moreElementsToNextPow2(0);
128
129 getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR})
130 .legalFor({s32, s64, v2s32, v2s64, v4s32, v4s16, v8s16, v16s8, v8s8})
131 .widenScalarToNextPow2(0)
132 .clampScalar(0, s32, s64)
133 .clampMaxNumElements(0, s8, 16)
134 .clampMaxNumElements(0, s16, 8)
135 .clampNumElements(0, v2s32, v4s32)
136 .clampNumElements(0, v2s64, v2s64)
138 [=](const LegalityQuery &Query) {
139 return Query.Types[0].getNumElements() <= 2;
140 },
141 0, s32)
142 .minScalarOrEltIf(
143 [=](const LegalityQuery &Query) {
144 return Query.Types[0].getNumElements() <= 4;
145 },
146 0, s16)
147 .minScalarOrEltIf(
148 [=](const LegalityQuery &Query) {
149 return Query.Types[0].getNumElements() <= 16;
150 },
151 0, s8)
153
154 getActionDefinitionsBuilder({G_SHL, G_ASHR, G_LSHR})
155 .customIf([=](const LegalityQuery &Query) {
156 const auto &SrcTy = Query.Types[0];
157 const auto &AmtTy = Query.Types[1];
158 return !SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
159 AmtTy.getSizeInBits() == 32;
160 })
161 .legalFor({
162 {s32, s32},
163 {s32, s64},
164 {s64, s64},
165 {v8s8, v8s8},
166 {v16s8, v16s8},
167 {v4s16, v4s16},
168 {v8s16, v8s16},
169 {v2s32, v2s32},
170 {v4s32, v4s32},
171 {v2s64, v2s64},
172 })
173 .widenScalarToNextPow2(0)
174 .clampScalar(1, s32, s64)
175 .clampScalar(0, s32, s64)
176 .clampNumElements(0, v8s8, v16s8)
177 .clampNumElements(0, v4s16, v8s16)
178 .clampNumElements(0, v2s32, v4s32)
179 .clampNumElements(0, v2s64, v2s64)
181 .minScalarSameAs(1, 0);
182
184 .legalFor({{p0, s64}, {v2p0, v2s64}})
185 .clampScalarOrElt(1, s64, s64)
186 .clampNumElements(0, v2p0, v2p0);
187
188 getActionDefinitionsBuilder(G_PTRMASK).legalFor({{p0, s64}});
189
190 getActionDefinitionsBuilder({G_SDIV, G_UDIV})
191 .legalFor({s32, s64})
192 .libcallFor({s128})
193 .clampScalar(0, s32, s64)
195 .scalarize(0);
196
197 getActionDefinitionsBuilder({G_SREM, G_UREM, G_SDIVREM, G_UDIVREM})
198 .lowerFor({s8, s16, s32, s64, v2s64, v4s32, v2s32})
200 .clampScalarOrElt(0, s32, s64)
201 .clampNumElements(0, v2s32, v4s32)
202 .clampNumElements(0, v2s64, v2s64)
203 .moreElementsToNextPow2(0);
204
205
206 getActionDefinitionsBuilder({G_SMULO, G_UMULO})
207 .widenScalarToNextPow2(0, /*Min = */ 32)
208 .clampScalar(0, s32, s64)
209 .lower();
210
211 getActionDefinitionsBuilder({G_SMULH, G_UMULH})
212 .legalFor({s64, v8s16, v16s8, v4s32})
213 .lower();
214
215 auto &MinMaxActions = getActionDefinitionsBuilder(
216 {G_SMIN, G_SMAX, G_UMIN, G_UMAX});
217 if (HasCSSC)
218 MinMaxActions
219 .legalFor({s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
220 // Making clamping conditional on CSSC extension as without legal types we
221 // lower to CMP which can fold one of the two sxtb's we'd otherwise need
222 // if we detect a type smaller than 32-bit.
223 .minScalar(0, s32);
224 else
225 MinMaxActions
226 .legalFor({v8s8, v16s8, v4s16, v8s16, v2s32, v4s32});
227 MinMaxActions
228 .clampNumElements(0, v8s8, v16s8)
229 .clampNumElements(0, v4s16, v8s16)
230 .clampNumElements(0, v2s32, v4s32)
231 // FIXME: This sholdn't be needed as v2s64 types are going to
232 // be expanded anyway, but G_ICMP doesn't support splitting vectors yet
233 .clampNumElements(0, v2s64, v2s64)
234 .lower();
235
237 {G_SADDE, G_SSUBE, G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_UADDO, G_USUBO})
238 .legalFor({{s32, s32}, {s64, s32}})
239 .clampScalar(0, s32, s64)
240 .clampScalar(1, s32, s64)
242
243 getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FMA, G_FNEG,
244 G_FABS, G_FSQRT, G_FMAXNUM, G_FMINNUM,
245 G_FMAXIMUM, G_FMINIMUM, G_FCEIL, G_FFLOOR,
246 G_FRINT, G_FNEARBYINT, G_INTRINSIC_TRUNC,
247 G_INTRINSIC_ROUND, G_INTRINSIC_ROUNDEVEN})
248 .legalFor({MinFPScalar, s32, s64, v2s32, v4s32, v2s64})
249 .legalIf([=](const LegalityQuery &Query) {
250 const auto &Ty = Query.Types[0];
251 return (Ty == v8s16 || Ty == v4s16) && HasFP16;
252 })
253 .libcallFor({s128})
254 .minScalarOrElt(0, MinFPScalar)
255 .clampNumElements(0, v4s16, v8s16)
256 .clampNumElements(0, v2s32, v4s32)
257 .clampNumElements(0, v2s64, v2s64)
259
261 .libcallFor({s32, s64})
262 .minScalar(0, s32)
263 .scalarize(0);
264
265 getActionDefinitionsBuilder({G_INTRINSIC_LRINT, G_INTRINSIC_LLRINT})
266 .legalFor({{s64, MinFPScalar}, {s64, s32}, {s64, s64}})
267 .libcallFor({{s64, s128}})
268 .minScalarOrElt(1, MinFPScalar);
269
271 {G_FCOS, G_FSIN, G_FPOW, G_FLOG, G_FLOG2, G_FLOG10, G_FTAN, G_FEXP,
272 G_FEXP2, G_FEXP10, G_FACOS, G_FASIN, G_FATAN, G_FCOSH, G_FSINH, G_FTANH})
273 // We need a call for these, so we always need to scalarize.
274 .scalarize(0)
275 // Regardless of FP16 support, widen 16-bit elements to 32-bits.
276 .minScalar(0, s32)
277 .libcallFor({s32, s64});
279 .scalarize(0)
280 .minScalar(0, s32)
281 .libcallFor({{s32, s32}, {s64, s32}});
282
284 .legalIf(all(typeInSet(0, {s32, s64, p0}),
285 typeInSet(1, {s8, s16, s32}), smallerThan(1, 0)))
287 .clampScalar(0, s32, s64)
289 .minScalar(1, s8)
290 .maxScalarIf(typeInSet(0, {s32}), 1, s16)
291 .maxScalarIf(typeInSet(0, {s64, p0}), 1, s32);
292
294 .legalIf(all(typeInSet(0, {s16, s32, s64, p0}),
295 typeInSet(1, {s32, s64, s128, p0}), smallerThan(0, 1)))
297 .clampScalar(1, s32, s128)
299 .minScalar(0, s16)
300 .maxScalarIf(typeInSet(1, {s32}), 0, s16)
301 .maxScalarIf(typeInSet(1, {s64, p0}), 0, s32)
302 .maxScalarIf(typeInSet(1, {s128}), 0, s64);
303
304
305 for (unsigned Op : {G_SEXTLOAD, G_ZEXTLOAD}) {
306 auto &Actions = getActionDefinitionsBuilder(Op);
307
308 if (Op == G_SEXTLOAD)
310
311 // Atomics have zero extending behavior.
312 Actions
313 .legalForTypesWithMemDesc({{s32, p0, s8, 8},
314 {s32, p0, s16, 8},
315 {s32, p0, s32, 8},
316 {s64, p0, s8, 2},
317 {s64, p0, s16, 2},
318 {s64, p0, s32, 4},
319 {s64, p0, s64, 8},
320 {p0, p0, s64, 8},
321 {v2s32, p0, s64, 8}})
322 .widenScalarToNextPow2(0)
323 .clampScalar(0, s32, s64)
324 // TODO: We could support sum-of-pow2's but the lowering code doesn't know
325 // how to do that yet.
326 .unsupportedIfMemSizeNotPow2()
327 // Lower anything left over into G_*EXT and G_LOAD
328 .lower();
329 }
330
331 auto IsPtrVecPred = [=](const LegalityQuery &Query) {
332 const LLT &ValTy = Query.Types[0];
333 return ValTy.isPointerVector() && ValTy.getAddressSpace() == 0;
334 };
335
336 auto &LoadActions = getActionDefinitionsBuilder(G_LOAD);
337 auto &StoreActions = getActionDefinitionsBuilder(G_STORE);
338
339 if (ST.hasSVE()) {
340 LoadActions.legalForTypesWithMemDesc({
341 // 128 bit base sizes
342 {nxv16s8, p0, nxv16s8, 8},
343 {nxv8s16, p0, nxv8s16, 8},
344 {nxv4s32, p0, nxv4s32, 8},
345 {nxv2s64, p0, nxv2s64, 8},
346 });
347
348 // TODO: Add nxv2p0. Consider bitcastIf.
349 // See #92130
350 // https://github.com/llvm/llvm-project/pull/92130#discussion_r1616888461
351 StoreActions.legalForTypesWithMemDesc({
352 // 128 bit base sizes
353 {nxv16s8, p0, nxv16s8, 8},
354 {nxv8s16, p0, nxv8s16, 8},
355 {nxv4s32, p0, nxv4s32, 8},
356 {nxv2s64, p0, nxv2s64, 8},
357 });
358 }
359
360 LoadActions
361 .customIf([=](const LegalityQuery &Query) {
362 return HasRCPC3 && Query.Types[0] == s128 &&
363 Query.MMODescrs[0].Ordering == AtomicOrdering::Acquire;
364 })
365 .customIf([=](const LegalityQuery &Query) {
366 return Query.Types[0] == s128 &&
367 Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic;
368 })
369 .legalForTypesWithMemDesc({{s8, p0, s8, 8},
370 {s16, p0, s16, 8},
371 {s32, p0, s32, 8},
372 {s64, p0, s64, 8},
373 {p0, p0, s64, 8},
374 {s128, p0, s128, 8},
375 {v8s8, p0, s64, 8},
376 {v16s8, p0, s128, 8},
377 {v4s16, p0, s64, 8},
378 {v8s16, p0, s128, 8},
379 {v2s32, p0, s64, 8},
380 {v4s32, p0, s128, 8},
381 {v2s64, p0, s128, 8}})
382 // These extends are also legal
383 .legalForTypesWithMemDesc(
384 {{s32, p0, s8, 8}, {s32, p0, s16, 8}, {s64, p0, s32, 8}})
385 .widenScalarToNextPow2(0, /* MinSize = */ 8)
386 .clampMaxNumElements(0, s8, 16)
387 .clampMaxNumElements(0, s16, 8)
388 .clampMaxNumElements(0, s32, 4)
389 .clampMaxNumElements(0, s64, 2)
390 .clampMaxNumElements(0, p0, 2)
391 .lowerIfMemSizeNotByteSizePow2()
392 .clampScalar(0, s8, s64)
393 .narrowScalarIf(
394 [=](const LegalityQuery &Query) {
395 // Clamp extending load results to 32-bits.
396 return Query.Types[0].isScalar() &&
397 Query.Types[0] != Query.MMODescrs[0].MemoryTy &&
398 Query.Types[0].getSizeInBits() > 32;
399 },
400 changeTo(0, s32))
401 // TODO: Use BITCAST for v2i8, v2i16 after G_TRUNC gets sorted out
402 .bitcastIf(typeInSet(0, {v4s8}),
403 [=](const LegalityQuery &Query) {
404 const LLT VecTy = Query.Types[0];
405 return std::pair(0, LLT::scalar(VecTy.getSizeInBits()));
406 })
407 .customIf(IsPtrVecPred)
408 .scalarizeIf(typeInSet(0, {v2s16, v2s8}), 0);
409
410 StoreActions
411 .customIf([=](const LegalityQuery &Query) {
412 return HasRCPC3 && Query.Types[0] == s128 &&
413 Query.MMODescrs[0].Ordering == AtomicOrdering::Release;
414 })
415 .customIf([=](const LegalityQuery &Query) {
416 return Query.Types[0] == s128 &&
417 Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic;
418 })
419 .legalForTypesWithMemDesc(
420 {{s8, p0, s8, 8}, {s16, p0, s8, 8}, // truncstorei8 from s16
421 {s32, p0, s8, 8}, // truncstorei8 from s32
422 {s64, p0, s8, 8}, // truncstorei8 from s64
423 {s16, p0, s16, 8}, {s32, p0, s16, 8}, // truncstorei16 from s32
424 {s64, p0, s16, 8}, // truncstorei16 from s64
425 {s32, p0, s8, 8}, {s32, p0, s16, 8}, {s32, p0, s32, 8},
426 {s64, p0, s64, 8}, {s64, p0, s32, 8}, // truncstorei32 from s64
427 {p0, p0, s64, 8}, {s128, p0, s128, 8}, {v16s8, p0, s128, 8},
428 {v8s8, p0, s64, 8}, {v4s16, p0, s64, 8}, {v8s16, p0, s128, 8},
429 {v2s32, p0, s64, 8}, {v4s32, p0, s128, 8}, {v2s64, p0, s128, 8}})
430 .clampScalar(0, s8, s64)
431 .lowerIf([=](const LegalityQuery &Query) {
432 return Query.Types[0].isScalar() &&
433 Query.Types[0] != Query.MMODescrs[0].MemoryTy;
434 })
435 // Maximum: sN * k = 128
436 .clampMaxNumElements(0, s8, 16)
437 .clampMaxNumElements(0, s16, 8)
438 .clampMaxNumElements(0, s32, 4)
439 .clampMaxNumElements(0, s64, 2)
440 .clampMaxNumElements(0, p0, 2)
441 .lowerIfMemSizeNotPow2()
442 // TODO: Use BITCAST for v2i8, v2i16 after G_TRUNC gets sorted out
443 .bitcastIf(typeInSet(0, {v4s8}),
444 [=](const LegalityQuery &Query) {
445 const LLT VecTy = Query.Types[0];
446 return std::pair(0, LLT::scalar(VecTy.getSizeInBits()));
447 })
448 .customIf(IsPtrVecPred)
449 .scalarizeIf(typeInSet(0, {v2s16, v2s8}), 0);
450
451 getActionDefinitionsBuilder(G_INDEXED_STORE)
452 // Idx 0 == Ptr, Idx 1 == Val
453 // TODO: we can implement legalizations but as of now these are
454 // generated in a very specific way.
456 {p0, s8, s8, 8},
457 {p0, s16, s16, 8},
458 {p0, s32, s8, 8},
459 {p0, s32, s16, 8},
460 {p0, s32, s32, 8},
461 {p0, s64, s64, 8},
462 {p0, p0, p0, 8},
463 {p0, v8s8, v8s8, 8},
464 {p0, v16s8, v16s8, 8},
465 {p0, v4s16, v4s16, 8},
466 {p0, v8s16, v8s16, 8},
467 {p0, v2s32, v2s32, 8},
468 {p0, v4s32, v4s32, 8},
469 {p0, v2s64, v2s64, 8},
470 {p0, v2p0, v2p0, 8},
471 {p0, s128, s128, 8},
472 })
473 .unsupported();
474
475 auto IndexedLoadBasicPred = [=](const LegalityQuery &Query) {
476 LLT LdTy = Query.Types[0];
477 LLT PtrTy = Query.Types[1];
478 if (!llvm::is_contained(PackedVectorAllTypesVec, LdTy) &&
479 !llvm::is_contained(ScalarAndPtrTypesVec, LdTy) && LdTy != s128)
480 return false;
481 if (PtrTy != p0)
482 return false;
483 return true;
484 };
485 getActionDefinitionsBuilder(G_INDEXED_LOAD)
488 .legalIf(IndexedLoadBasicPred)
489 .unsupported();
490 getActionDefinitionsBuilder({G_INDEXED_SEXTLOAD, G_INDEXED_ZEXTLOAD})
491 .unsupportedIf(
493 .legalIf(all(typeInSet(0, {s16, s32, s64}),
494 LegalityPredicate([=](const LegalityQuery &Q) {
495 LLT LdTy = Q.Types[0];
496 LLT PtrTy = Q.Types[1];
497 LLT MemTy = Q.MMODescrs[0].MemoryTy;
498 if (PtrTy != p0)
499 return false;
500 if (LdTy == s16)
501 return MemTy == s8;
502 if (LdTy == s32)
503 return MemTy == s8 || MemTy == s16;
504 if (LdTy == s64)
505 return MemTy == s8 || MemTy == s16 || MemTy == s32;
506 return false;
507 })))
508 .unsupported();
509
510 // Constants
512 .legalFor({p0, s8, s16, s32, s64})
513 .widenScalarToNextPow2(0)
514 .clampScalar(0, s8, s64);
515 getActionDefinitionsBuilder(G_FCONSTANT)
516 .legalIf([=](const LegalityQuery &Query) {
517 const auto &Ty = Query.Types[0];
518 if (HasFP16 && Ty == s16)
519 return true;
520 return Ty == s32 || Ty == s64 || Ty == s128;
521 })
522 .clampScalar(0, MinFPScalar, s128);
523
524 // FIXME: fix moreElementsToNextPow2
526 .legalFor({{s32, s32}, {s32, s64}, {s32, p0}})
528 .clampScalar(1, s32, s64)
529 .clampScalar(0, s32, s32)
530 .minScalarEltSameAsIf(
531 [=](const LegalityQuery &Query) {
532 const LLT &Ty = Query.Types[0];
533 const LLT &SrcTy = Query.Types[1];
534 return Ty.isVector() && !SrcTy.isPointerVector() &&
535 Ty.getElementType() != SrcTy.getElementType();
536 },
537 0, 1)
538 .minScalarOrEltIf(
539 [=](const LegalityQuery &Query) { return Query.Types[1] == v2s16; },
540 1, s32)
541 .minScalarOrEltIf(
542 [=](const LegalityQuery &Query) { return Query.Types[1] == v2p0; }, 0,
543 s64)
545 .clampNumElements(1, v8s8, v16s8)
546 .clampNumElements(1, v4s16, v8s16)
547 .clampNumElements(1, v2s32, v4s32)
548 .clampNumElements(1, v2s64, v2s64)
549 .customIf(isVector(0));
550
552 .legalFor({{s32, MinFPScalar},
553 {s32, s32},
554 {s32, s64},
555 {v4s32, v4s32},
556 {v2s32, v2s32},
557 {v2s64, v2s64}})
558 .legalIf([=](const LegalityQuery &Query) {
559 const auto &Ty = Query.Types[1];
560 return (Ty == v8s16 || Ty == v4s16) && Ty == Query.Types[0] && HasFP16;
561 })
563 .clampScalar(0, s32, s32)
564 .minScalarOrElt(1, MinFPScalar)
565 .scalarizeIf(scalarOrEltWiderThan(1, 64), 1)
566 .minScalarEltSameAsIf(
567 [=](const LegalityQuery &Query) {
568 const LLT &Ty = Query.Types[0];
569 const LLT &SrcTy = Query.Types[1];
570 return Ty.isVector() && !SrcTy.isPointerVector() &&
571 Ty.getElementType() != SrcTy.getElementType();
572 },
573 0, 1)
574 .clampNumElements(1, v4s16, v8s16)
575 .clampNumElements(1, v2s32, v4s32)
576 .clampMaxNumElements(1, s64, 2)
577 .moreElementsToNextPow2(1)
578 .libcallFor({{s32, s128}});
579
580 // Extensions
581 auto ExtLegalFunc = [=](const LegalityQuery &Query) {
582 unsigned DstSize = Query.Types[0].getSizeInBits();
583
584 // Handle legal vectors using legalFor
585 if (Query.Types[0].isVector())
586 return false;
587
588 if (DstSize < 8 || DstSize >= 128 || !isPowerOf2_32(DstSize))
589 return false; // Extending to a scalar s128 needs narrowing.
590
591 const LLT &SrcTy = Query.Types[1];
592
593 // Make sure we fit in a register otherwise. Don't bother checking that
594 // the source type is below 128 bits. We shouldn't be allowing anything
595 // through which is wider than the destination in the first place.
596 unsigned SrcSize = SrcTy.getSizeInBits();
597 if (SrcSize < 8 || !isPowerOf2_32(SrcSize))
598 return false;
599
600 return true;
601 };
602 getActionDefinitionsBuilder({G_ZEXT, G_SEXT, G_ANYEXT})
603 .legalIf(ExtLegalFunc)
604 .legalFor({{v2s64, v2s32}, {v4s32, v4s16}, {v8s16, v8s8}})
605 .clampScalar(0, s64, s64) // Just for s128, others are handled above.
607 .clampMaxNumElements(1, s8, 8)
608 .clampMaxNumElements(1, s16, 4)
609 .clampMaxNumElements(1, s32, 2)
610 // Tries to convert a large EXTEND into two smaller EXTENDs
611 .lowerIf([=](const LegalityQuery &Query) {
612 return (Query.Types[0].getScalarSizeInBits() >
613 Query.Types[1].getScalarSizeInBits() * 2) &&
614 Query.Types[0].isVector() &&
615 (Query.Types[1].getScalarSizeInBits() == 8 ||
616 Query.Types[1].getScalarSizeInBits() == 16);
617 })
618 .clampMinNumElements(1, s8, 8)
619 .clampMinNumElements(1, s16, 4);
620
622 .legalFor({{v2s32, v2s64}, {v4s16, v4s32}, {v8s8, v8s16}})
624 .clampMaxNumElements(0, s8, 8)
625 .clampMaxNumElements(0, s16, 4)
626 .clampMaxNumElements(0, s32, 2)
627 .minScalarOrEltIf(
628 [=](const LegalityQuery &Query) { return Query.Types[0].isVector(); },
629 0, s8)
630 .lowerIf([=](const LegalityQuery &Query) {
631 LLT DstTy = Query.Types[0];
632 LLT SrcTy = Query.Types[1];
633 return DstTy.isVector() && SrcTy.getSizeInBits() > 128 &&
634 DstTy.getScalarSizeInBits() * 2 <= SrcTy.getScalarSizeInBits();
635 })
636 .clampMinNumElements(0, s8, 8)
637 .clampMinNumElements(0, s16, 4)
638 .alwaysLegal();
639
640 getActionDefinitionsBuilder(G_SEXT_INREG)
641 .legalFor({s32, s64})
642 .legalFor(PackedVectorAllTypeList)
643 .maxScalar(0, s64)
644 .clampNumElements(0, v8s8, v16s8)
645 .clampNumElements(0, v4s16, v8s16)
646 .clampNumElements(0, v2s32, v4s32)
647 .clampMaxNumElements(0, s64, 2)
648 .lower();
649
650 // FP conversions
652 .legalFor(
653 {{s16, s32}, {s16, s64}, {s32, s64}, {v4s16, v4s32}, {v2s32, v2s64}})
654 .libcallFor({{s16, s128}, {s32, s128}, {s64, s128}})
655 .clampNumElements(0, v4s16, v4s16)
656 .clampNumElements(0, v2s32, v2s32)
657 .scalarize(0);
658
660 .legalFor(
661 {{s32, s16}, {s64, s16}, {s64, s32}, {v4s32, v4s16}, {v2s64, v2s32}})
662 .libcallFor({{s128, s64}, {s128, s32}, {s128, s16}})
663 .clampNumElements(0, v4s32, v4s32)
664 .clampNumElements(0, v2s64, v2s64)
665 .scalarize(0);
666
667 // Conversions
668 getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
669 .legalFor({{s32, s32},
670 {s64, s32},
671 {s32, s64},
672 {s64, s64},
673 {v2s64, v2s64},
674 {v4s32, v4s32},
675 {v2s32, v2s32}})
676 .legalIf([=](const LegalityQuery &Query) {
677 return HasFP16 &&
678 (Query.Types[1] == s16 || Query.Types[1] == v4s16 ||
679 Query.Types[1] == v8s16) &&
680 (Query.Types[0] == s32 || Query.Types[0] == s64 ||
681 Query.Types[0] == v4s16 || Query.Types[0] == v8s16);
682 })
683 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
684 .scalarizeIf(scalarOrEltWiderThan(1, 64), 1)
685 // The range of a fp16 value fits into an i17, so we can lower the width
686 // to i64.
687 .narrowScalarIf(
688 [=](const LegalityQuery &Query) {
689 return Query.Types[1] == s16 && Query.Types[0].getSizeInBits() > 64;
690 },
691 changeTo(0, s64))
693 .widenScalarOrEltToNextPow2OrMinSize(0)
694 .minScalar(0, s32)
695 .widenScalarOrEltToNextPow2OrMinSize(1, /*MinSize=*/HasFP16 ? 16 : 32)
696 .widenScalarIf(
697 [=](const LegalityQuery &Query) {
698 return Query.Types[0].getScalarSizeInBits() <= 64 &&
699 Query.Types[0].getScalarSizeInBits() >
700 Query.Types[1].getScalarSizeInBits();
701 },
703 .widenScalarIf(
704 [=](const LegalityQuery &Query) {
705 return Query.Types[1].getScalarSizeInBits() <= 64 &&
706 Query.Types[0].getScalarSizeInBits() <
707 Query.Types[1].getScalarSizeInBits();
708 },
710 .clampNumElements(0, v4s16, v8s16)
711 .clampNumElements(0, v2s32, v4s32)
712 .clampMaxNumElements(0, s64, 2)
713 .libcallFor(
714 {{s32, s128}, {s64, s128}, {s128, s128}, {s128, s32}, {s128, s64}});
715
716 getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
717 .legalFor({{s32, s32},
718 {s64, s32},
719 {s32, s64},
720 {s64, s64},
721 {v2s64, v2s64},
722 {v4s32, v4s32},
723 {v2s32, v2s32}})
724 .legalIf([=](const LegalityQuery &Query) {
725 return HasFP16 &&
726 (Query.Types[0] == s16 || Query.Types[0] == v4s16 ||
727 Query.Types[0] == v8s16) &&
728 (Query.Types[1] == s32 || Query.Types[1] == s64 ||
729 Query.Types[1] == v4s16 || Query.Types[1] == v8s16);
730 })
731 .scalarizeIf(scalarOrEltWiderThan(1, 64), 1)
732 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
734 .widenScalarOrEltToNextPow2OrMinSize(1)
735 .minScalar(1, s32)
736 .widenScalarOrEltToNextPow2OrMinSize(0, /*MinSize=*/HasFP16 ? 16 : 32)
737 .widenScalarIf(
738 [=](const LegalityQuery &Query) {
739 return Query.Types[1].getScalarSizeInBits() <= 64 &&
740 Query.Types[0].getScalarSizeInBits() <
741 Query.Types[1].getScalarSizeInBits();
742 },
744 .widenScalarIf(
745 [=](const LegalityQuery &Query) {
746 return Query.Types[0].getScalarSizeInBits() <= 64 &&
747 Query.Types[0].getScalarSizeInBits() >
748 Query.Types[1].getScalarSizeInBits();
749 },
751 .clampNumElements(0, v4s16, v8s16)
752 .clampNumElements(0, v2s32, v4s32)
753 .clampMaxNumElements(0, s64, 2)
754 .libcallFor({{s16, s128},
755 {s32, s128},
756 {s64, s128},
757 {s128, s128},
758 {s128, s32},
759 {s128, s64}});
760
761 // Control-flow
763 .legalFor({s32})
764 .clampScalar(0, s32, s32);
765 getActionDefinitionsBuilder(G_BRINDIRECT).legalFor({p0});
766
768 .legalFor({{s32, s32}, {s64, s32}, {p0, s32}})
769 .widenScalarToNextPow2(0)
770 .clampScalar(0, s32, s64)
771 .clampScalar(1, s32, s32)
773 .lowerIf(isVector(0));
774
775 // Pointer-handling
776 getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0});
777
778 if (TM.getCodeModel() == CodeModel::Small)
779 getActionDefinitionsBuilder(G_GLOBAL_VALUE).custom();
780 else
781 getActionDefinitionsBuilder(G_GLOBAL_VALUE).legalFor({p0});
782
783 getActionDefinitionsBuilder(G_PTRAUTH_GLOBAL_VALUE)
784 .legalIf(all(typeIs(0, p0), typeIs(1, p0)));
785
787 .legalFor({{s64, p0}, {v2s64, v2p0}})
788 .widenScalarToNextPow2(0, 64)
789 .clampScalar(0, s64, s64);
790
792 .unsupportedIf([&](const LegalityQuery &Query) {
793 return Query.Types[0].getSizeInBits() != Query.Types[1].getSizeInBits();
794 })
795 .legalFor({{p0, s64}, {v2p0, v2s64}});
796
797 // Casts for 32 and 64-bit width type are just copies.
798 // Same for 128-bit width type, except they are on the FPR bank.
800 // Keeping 32-bit instructions legal to prevent regression in some tests
801 .legalForCartesianProduct({s32, v2s16, v4s8})
802 .legalForCartesianProduct({s64, v8s8, v4s16, v2s32})
803 .legalForCartesianProduct({s128, v16s8, v8s16, v4s32, v2s64, v2p0})
804 .lowerIf([=](const LegalityQuery &Query) {
805 return Query.Types[0].isVector() != Query.Types[1].isVector();
806 })
808 .clampNumElements(0, v8s8, v16s8)
809 .clampNumElements(0, v4s16, v8s16)
810 .clampNumElements(0, v2s32, v4s32)
811 .lower();
812
813 getActionDefinitionsBuilder(G_VASTART).legalFor({p0});
814
815 // va_list must be a pointer, but most sized types are pretty easy to handle
816 // as the destination.
818 .customForCartesianProduct({s8, s16, s32, s64, p0}, {p0})
819 .clampScalar(0, s8, s64)
820 .widenScalarToNextPow2(0, /*Min*/ 8);
821
822 getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG_WITH_SUCCESS)
823 .lowerIf(
824 all(typeInSet(0, {s8, s16, s32, s64, s128}), typeIs(2, p0)));
825
826 LegalityPredicate UseOutlineAtomics = [&ST](const LegalityQuery &Query) {
827 return ST.outlineAtomics() && !ST.hasLSE();
828 };
829
830 getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG)
831 .legalIf(all(typeInSet(0, {s32, s64}), typeIs(1, p0),
832 predNot(UseOutlineAtomics)))
833 .customIf(all(typeIs(0, s128), predNot(UseOutlineAtomics)))
834 .customIf([UseOutlineAtomics](const LegalityQuery &Query) {
835 return Query.Types[0].getSizeInBits() == 128 &&
836 !UseOutlineAtomics(Query);
837 })
838 .libcallIf(all(typeInSet(0, {s8, s16, s32, s64, s128}), typeIs(1, p0),
839 UseOutlineAtomics))
840 .clampScalar(0, s32, s64);
841
842 getActionDefinitionsBuilder({G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD,
843 G_ATOMICRMW_SUB, G_ATOMICRMW_AND, G_ATOMICRMW_OR,
844 G_ATOMICRMW_XOR})
845 .legalIf(all(typeInSet(0, {s32, s64}), typeIs(1, p0),
846 predNot(UseOutlineAtomics)))
847 .libcallIf(all(typeInSet(0, {s8, s16, s32, s64}), typeIs(1, p0),
848 UseOutlineAtomics))
849 .clampScalar(0, s32, s64);
850
851 // Do not outline these atomics operations, as per comment in
852 // AArch64ISelLowering.cpp's shouldExpandAtomicRMWInIR().
854 {G_ATOMICRMW_MIN, G_ATOMICRMW_MAX, G_ATOMICRMW_UMIN, G_ATOMICRMW_UMAX})
855 .legalIf(all(typeInSet(0, {s32, s64}), typeIs(1, p0)))
856 .clampScalar(0, s32, s64);
857
858 getActionDefinitionsBuilder(G_BLOCK_ADDR).legalFor({p0});
859
860 // Merge/Unmerge
861 for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
862 unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
863 unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0;
865 .widenScalarToNextPow2(LitTyIdx, 8)
866 .widenScalarToNextPow2(BigTyIdx, 32)
867 .clampScalar(LitTyIdx, s8, s64)
868 .clampScalar(BigTyIdx, s32, s128)
869 .legalIf([=](const LegalityQuery &Q) {
870 switch (Q.Types[BigTyIdx].getSizeInBits()) {
871 case 32:
872 case 64:
873 case 128:
874 break;
875 default:
876 return false;
877 }
878 switch (Q.Types[LitTyIdx].getSizeInBits()) {
879 case 8:
880 case 16:
881 case 32:
882 case 64:
883 return true;
884 default:
885 return false;
886 }
887 });
888 }
889
890 getActionDefinitionsBuilder(G_EXTRACT_VECTOR_ELT)
891 .unsupportedIf([=](const LegalityQuery &Query) {
892 const LLT &EltTy = Query.Types[1].getElementType();
893 return Query.Types[0] != EltTy;
894 })
895 .minScalar(2, s64)
896 .customIf([=](const LegalityQuery &Query) {
897 const LLT &VecTy = Query.Types[1];
898 return VecTy == v2s16 || VecTy == v4s16 || VecTy == v8s16 ||
899 VecTy == v4s32 || VecTy == v2s64 || VecTy == v2s32 ||
900 VecTy == v8s8 || VecTy == v16s8 || VecTy == v2p0;
901 })
902 .minScalarOrEltIf(
903 [=](const LegalityQuery &Query) {
904 // We want to promote to <M x s1> to <M x s64> if that wouldn't
905 // cause the total vec size to be > 128b.
906 return Query.Types[1].getNumElements() <= 2;
907 },
908 0, s64)
909 .minScalarOrEltIf(
910 [=](const LegalityQuery &Query) {
911 return Query.Types[1].getNumElements() <= 4;
912 },
913 0, s32)
914 .minScalarOrEltIf(
915 [=](const LegalityQuery &Query) {
916 return Query.Types[1].getNumElements() <= 8;
917 },
918 0, s16)
919 .minScalarOrEltIf(
920 [=](const LegalityQuery &Query) {
921 return Query.Types[1].getNumElements() <= 16;
922 },
923 0, s8)
924 .minScalarOrElt(0, s8) // Worst case, we need at least s8.
926 .clampMaxNumElements(1, s64, 2)
927 .clampMaxNumElements(1, s32, 4)
928 .clampMaxNumElements(1, s16, 8)
929 .clampMaxNumElements(1, s8, 16)
930 .clampMaxNumElements(1, p0, 2);
931
932 getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT)
933 .legalIf(
934 typeInSet(0, {v16s8, v8s8, v8s16, v4s16, v4s32, v2s32, v2s64, v2p0}))
937 .clampNumElements(0, v8s8, v16s8)
938 .clampNumElements(0, v4s16, v8s16)
939 .clampNumElements(0, v2s32, v4s32)
940 .clampMaxNumElements(0, s64, 2)
941 .clampMaxNumElements(0, p0, 2);
942
943 getActionDefinitionsBuilder(G_BUILD_VECTOR)
944 .legalFor({{v8s8, s8},
945 {v16s8, s8},
946 {v4s16, s16},
947 {v8s16, s16},
948 {v2s32, s32},
949 {v4s32, s32},
950 {v2p0, p0},
951 {v2s64, s64}})
952 .clampNumElements(0, v4s32, v4s32)
953 .clampNumElements(0, v2s64, v2s64)
954 .minScalarOrElt(0, s8)
956 .minScalarSameAs(1, 0);
957
958 getActionDefinitionsBuilder(G_BUILD_VECTOR_TRUNC).lower();
959
962 {s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
963 .scalarize(1)
964 .widenScalarToNextPow2(1, /*Min=*/32)
965 .clampScalar(1, s32, s64)
966 .scalarSameSizeAs(0, 1);
967 getActionDefinitionsBuilder(G_CTLZ_ZERO_UNDEF).lower();
968
969 // TODO: Custom lowering for v2s32, v4s32, v2s64.
970 getActionDefinitionsBuilder(G_BITREVERSE)
971 .legalFor({s32, s64, v8s8, v16s8})
972 .widenScalarToNextPow2(0, /*Min = */ 32)
973 .clampScalar(0, s32, s64);
974
975 getActionDefinitionsBuilder(G_CTTZ_ZERO_UNDEF).lower();
976
978 .lowerIf(isVector(0))
979 .widenScalarToNextPow2(1, /*Min=*/32)
980 .clampScalar(1, s32, s64)
981 .scalarSameSizeAs(0, 1)
982 .legalIf([=](const LegalityQuery &Query) {
983 return (HasCSSC && typeInSet(0, {s32, s64})(Query));
984 })
985 .customIf([=](const LegalityQuery &Query) {
986 return (!HasCSSC && typeInSet(0, {s32, s64})(Query));
987 });
988
989 getActionDefinitionsBuilder(G_SHUFFLE_VECTOR)
990 .legalIf([=](const LegalityQuery &Query) {
991 const LLT &DstTy = Query.Types[0];
992 const LLT &SrcTy = Query.Types[1];
993 // For now just support the TBL2 variant which needs the source vectors
994 // to be the same size as the dest.
995 if (DstTy != SrcTy)
996 return false;
997 return llvm::is_contained(
998 {v2s64, v2p0, v2s32, v4s32, v4s16, v16s8, v8s8, v8s16}, DstTy);
999 })
1000 // G_SHUFFLE_VECTOR can have scalar sources (from 1 x s vectors), we
1001 // just want those lowered into G_BUILD_VECTOR
1002 .lowerIf([=](const LegalityQuery &Query) {
1003 return !Query.Types[1].isVector();
1004 })
1005 .moreElementsIf(
1006 [](const LegalityQuery &Query) {
1007 return Query.Types[0].isVector() && Query.Types[1].isVector() &&
1008 Query.Types[0].getNumElements() >
1009 Query.Types[1].getNumElements();
1010 },
1011 changeTo(1, 0))
1013 .moreElementsIf(
1014 [](const LegalityQuery &Query) {
1015 return Query.Types[0].isVector() && Query.Types[1].isVector() &&
1016 Query.Types[0].getNumElements() <
1017 Query.Types[1].getNumElements();
1018 },
1019 changeTo(0, 1))
1020 .widenScalarOrEltToNextPow2OrMinSize(0, 8)
1021 .clampNumElements(0, v8s8, v16s8)
1022 .clampNumElements(0, v4s16, v8s16)
1023 .clampNumElements(0, v4s32, v4s32)
1024 .clampNumElements(0, v2s64, v2s64);
1025
1026 getActionDefinitionsBuilder(G_CONCAT_VECTORS)
1027 .legalFor({{v4s32, v2s32}, {v8s16, v4s16}, {v16s8, v8s8}})
1028 .bitcastIf(
1029 [=](const LegalityQuery &Query) {
1030 return Query.Types[0].getSizeInBits() <= 128 &&
1031 Query.Types[1].getSizeInBits() <= 64;
1032 },
1033 [=](const LegalityQuery &Query) {
1034 const LLT DstTy = Query.Types[0];
1035 const LLT SrcTy = Query.Types[1];
1036 return std::pair(
1037 0, DstTy.changeElementSize(SrcTy.getSizeInBits())
1040 SrcTy.getNumElements())));
1041 });
1042
1043 getActionDefinitionsBuilder(G_JUMP_TABLE).legalFor({p0});
1044
1045 getActionDefinitionsBuilder(G_BRJT).legalFor({{p0, s64}});
1046
1047 getActionDefinitionsBuilder(G_DYN_STACKALLOC).custom();
1048
1049 getActionDefinitionsBuilder({G_STACKSAVE, G_STACKRESTORE}).lower();
1050
1051 if (ST.hasMOPS()) {
1052 // G_BZERO is not supported. Currently it is only emitted by
1053 // PreLegalizerCombiner for G_MEMSET with zero constant.
1055
1057 .legalForCartesianProduct({p0}, {s64}, {s64})
1058 .customForCartesianProduct({p0}, {s8}, {s64})
1059 .immIdx(0); // Inform verifier imm idx 0 is handled.
1060
1061 getActionDefinitionsBuilder({G_MEMCPY, G_MEMMOVE})
1062 .legalForCartesianProduct({p0}, {p0}, {s64})
1063 .immIdx(0); // Inform verifier imm idx 0 is handled.
1064
1065 // G_MEMCPY_INLINE does not have a tailcall immediate
1066 getActionDefinitionsBuilder(G_MEMCPY_INLINE)
1067 .legalForCartesianProduct({p0}, {p0}, {s64});
1068
1069 } else {
1070 getActionDefinitionsBuilder({G_BZERO, G_MEMCPY, G_MEMMOVE, G_MEMSET})
1071 .libcall();
1072 }
1073
1074 // FIXME: Legal vector types are only legal with NEON.
1075 auto &ABSActions = getActionDefinitionsBuilder(G_ABS);
1076 if (HasCSSC)
1077 ABSActions
1078 .legalFor({s32, s64});
1079 ABSActions.legalFor(PackedVectorAllTypeList)
1080 .customIf([=](const LegalityQuery &Q) {
1081 // TODO: Fix suboptimal codegen for 128+ bit types.
1082 LLT SrcTy = Q.Types[0];
1083 return SrcTy.isScalar() && SrcTy.getSizeInBits() < 128;
1084 })
1085 .widenScalarIf(
1086 [=](const LegalityQuery &Query) { return Query.Types[0] == v4s8; },
1087 [=](const LegalityQuery &Query) { return std::make_pair(0, v4s16); })
1088 .widenScalarIf(
1089 [=](const LegalityQuery &Query) { return Query.Types[0] == v2s16; },
1090 [=](const LegalityQuery &Query) { return std::make_pair(0, v2s32); })
1091 .clampNumElements(0, v8s8, v16s8)
1092 .clampNumElements(0, v4s16, v8s16)
1093 .clampNumElements(0, v2s32, v4s32)
1094 .clampNumElements(0, v2s64, v2s64)
1095 .moreElementsToNextPow2(0)
1096 .lower();
1097
1098 // For fadd reductions we have pairwise operations available. We treat the
1099 // usual legal types as legal and handle the lowering to pairwise instructions
1100 // later.
1101 getActionDefinitionsBuilder(G_VECREDUCE_FADD)
1102 .legalFor({{s32, v2s32}, {s32, v4s32}, {s64, v2s64}})
1103 .legalIf([=](const LegalityQuery &Query) {
1104 const auto &Ty = Query.Types[1];
1105 return (Ty == v4s16 || Ty == v8s16) && HasFP16;
1106 })
1107 .minScalarOrElt(0, MinFPScalar)
1108 .clampMaxNumElements(1, s64, 2)
1109 .clampMaxNumElements(1, s32, 4)
1110 .clampMaxNumElements(1, s16, 8)
1111 .lower();
1112
1113 // For fmul reductions we need to split up into individual operations. We
1114 // clamp to 128 bit vectors then to 64bit vectors to produce a cascade of
1115 // smaller types, followed by scalarizing what remains.
1116 getActionDefinitionsBuilder(G_VECREDUCE_FMUL)
1117 .minScalarOrElt(0, MinFPScalar)
1118 .clampMaxNumElements(1, s64, 2)
1119 .clampMaxNumElements(1, s32, 4)
1120 .clampMaxNumElements(1, s16, 8)
1121 .clampMaxNumElements(1, s32, 2)
1122 .clampMaxNumElements(1, s16, 4)
1123 .scalarize(1)
1124 .lower();
1125
1126 getActionDefinitionsBuilder({G_VECREDUCE_SEQ_FADD, G_VECREDUCE_SEQ_FMUL})
1127 .scalarize(2)
1128 .lower();
1129
1130 getActionDefinitionsBuilder(G_VECREDUCE_ADD)
1131 .legalFor({{s8, v16s8},
1132 {s8, v8s8},
1133 {s16, v8s16},
1134 {s16, v4s16},
1135 {s32, v4s32},
1136 {s32, v2s32},
1137 {s64, v2s64}})
1138 .clampMaxNumElements(1, s64, 2)
1139 .clampMaxNumElements(1, s32, 4)
1140 .clampMaxNumElements(1, s16, 8)
1141 .clampMaxNumElements(1, s8, 16)
1142 .lower();
1143
1144 getActionDefinitionsBuilder({G_VECREDUCE_FMIN, G_VECREDUCE_FMAX,
1145 G_VECREDUCE_FMINIMUM, G_VECREDUCE_FMAXIMUM})
1146 .legalFor({{s32, v4s32}, {s32, v2s32}, {s64, v2s64}})
1147 .legalIf([=](const LegalityQuery &Query) {
1148 const auto &Ty = Query.Types[1];
1149 return Query.Types[0] == s16 && (Ty == v8s16 || Ty == v4s16) && HasFP16;
1150 })
1151 .minScalarOrElt(0, MinFPScalar)
1152 .clampMaxNumElements(1, s64, 2)
1153 .clampMaxNumElements(1, s32, 4)
1154 .clampMaxNumElements(1, s16, 8)
1155 .lower();
1156
1157 getActionDefinitionsBuilder(G_VECREDUCE_MUL)
1158 .clampMaxNumElements(1, s32, 2)
1159 .clampMaxNumElements(1, s16, 4)
1160 .clampMaxNumElements(1, s8, 8)
1161 .scalarize(1)
1162 .lower();
1163
1165 {G_VECREDUCE_SMIN, G_VECREDUCE_SMAX, G_VECREDUCE_UMIN, G_VECREDUCE_UMAX})
1166 .legalFor({{s8, v8s8},
1167 {s8, v16s8},
1168 {s16, v4s16},
1169 {s16, v8s16},
1170 {s32, v2s32},
1171 {s32, v4s32}})
1172 .moreElementsIf(
1173 [=](const LegalityQuery &Query) {
1174 return Query.Types[1].isVector() &&
1175 Query.Types[1].getElementType() != s8 &&
1176 Query.Types[1].getNumElements() & 1;
1177 },
1179 .clampMaxNumElements(1, s64, 2)
1180 .clampMaxNumElements(1, s32, 4)
1181 .clampMaxNumElements(1, s16, 8)
1182 .clampMaxNumElements(1, s8, 16)
1183 .scalarize(1)
1184 .lower();
1185
1187 {G_VECREDUCE_OR, G_VECREDUCE_AND, G_VECREDUCE_XOR})
1188 // Try to break down into smaller vectors as long as they're at least 64
1189 // bits. This lets us use vector operations for some parts of the
1190 // reduction.
1191 .fewerElementsIf(
1192 [=](const LegalityQuery &Q) {
1193 LLT SrcTy = Q.Types[1];
1194 if (SrcTy.isScalar())
1195 return false;
1196 if (!isPowerOf2_32(SrcTy.getNumElements()))
1197 return false;
1198 // We can usually perform 64b vector operations.
1199 return SrcTy.getSizeInBits() > 64;
1200 },
1201 [=](const LegalityQuery &Q) {
1202 LLT SrcTy = Q.Types[1];
1203 return std::make_pair(1, SrcTy.divide(2));
1204 })
1205 .scalarize(1)
1206 .lower();
1207
1208 // TODO: Update this to correct handling when adding AArch64/SVE support.
1209 getActionDefinitionsBuilder(G_VECTOR_COMPRESS).lower();
1210
1211 getActionDefinitionsBuilder({G_FSHL, G_FSHR})
1212 .customFor({{s32, s32}, {s32, s64}, {s64, s64}})
1213 .lower();
1214
1216 .legalFor({{s32, s64}, {s64, s64}})
1217 .customIf([=](const LegalityQuery &Q) {
1218 return Q.Types[0].isScalar() && Q.Types[1].getScalarSizeInBits() < 64;
1219 })
1220 .lower();
1222
1223 getActionDefinitionsBuilder({G_SBFX, G_UBFX})
1224 .customFor({{s32, s32}, {s64, s64}});
1225
1226 auto always = [=](const LegalityQuery &Q) { return true; };
1227 auto &CTPOPActions = getActionDefinitionsBuilder(G_CTPOP);
1228 if (HasCSSC)
1229 CTPOPActions
1230 .legalFor({{s32, s32},
1231 {s64, s64},
1232 {v8s8, v8s8},
1233 {v16s8, v16s8}})
1234 .customFor({{s128, s128},
1235 {v2s64, v2s64},
1236 {v2s32, v2s32},
1237 {v4s32, v4s32},
1238 {v4s16, v4s16},
1239 {v8s16, v8s16}});
1240 else
1241 CTPOPActions
1242 .legalFor({{v8s8, v8s8},
1243 {v16s8, v16s8}})
1244 .customFor({{s32, s32},
1245 {s64, s64},
1246 {s128, s128},
1247 {v2s64, v2s64},
1248 {v2s32, v2s32},
1249 {v4s32, v4s32},
1250 {v4s16, v4s16},
1251 {v8s16, v8s16}});
1252 CTPOPActions
1253 .clampScalar(0, s32, s128)
1254 .widenScalarToNextPow2(0)
1255 .minScalarEltSameAsIf(always, 1, 0)
1256 .maxScalarEltSameAsIf(always, 1, 0);
1257
1258 getActionDefinitionsBuilder({G_UADDSAT, G_SADDSAT, G_USUBSAT, G_SSUBSAT})
1259 .legalFor({v2s64, v2s32, v4s32, v4s16, v8s16, v8s8, v16s8})
1260 .clampNumElements(0, v8s8, v16s8)
1261 .clampNumElements(0, v4s16, v8s16)
1262 .clampNumElements(0, v2s32, v4s32)
1263 .clampMaxNumElements(0, s64, 2)
1265 .lower();
1266
1267 // TODO: Libcall support for s128.
1268 // TODO: s16 should be legal with full FP16 support.
1269 getActionDefinitionsBuilder({G_LROUND, G_LLROUND})
1270 .legalFor({{s64, s32}, {s64, s64}});
1271
1272 // TODO: Custom legalization for mismatched types.
1273 getActionDefinitionsBuilder(G_FCOPYSIGN)
1275 [](const LegalityQuery &Query) { return Query.Types[0].isScalar(); },
1276 [=](const LegalityQuery &Query) {
1277 const LLT Ty = Query.Types[0];
1278 return std::pair(0, LLT::fixed_vector(Ty == s16 ? 4 : 2, Ty));
1279 })
1280 .lower();
1281
1283
1284 // Access to floating-point environment.
1285 getActionDefinitionsBuilder({G_GET_FPENV, G_SET_FPENV, G_RESET_FPENV,
1286 G_GET_FPMODE, G_SET_FPMODE, G_RESET_FPMODE})
1287 .libcall();
1288
1289 getActionDefinitionsBuilder(G_IS_FPCLASS).lower();
1290
1291 getActionDefinitionsBuilder(G_PREFETCH).custom();
1292
1293 getActionDefinitionsBuilder({G_SCMP, G_UCMP}).lower();
1294
1296 verify(*ST.getInstrInfo());
1297}
1298
1301 LostDebugLocObserver &LocObserver) const {
1302 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1303 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
1304 GISelChangeObserver &Observer = Helper.Observer;
1305 switch (MI.getOpcode()) {
1306 default:
1307 // No idea what to do.
1308 return false;
1309 case TargetOpcode::G_VAARG:
1310 return legalizeVaArg(MI, MRI, MIRBuilder);
1311 case TargetOpcode::G_LOAD:
1312 case TargetOpcode::G_STORE:
1313 return legalizeLoadStore(MI, MRI, MIRBuilder, Observer);
1314 case TargetOpcode::G_SHL:
1315 case TargetOpcode::G_ASHR:
1316 case TargetOpcode::G_LSHR:
1317 return legalizeShlAshrLshr(MI, MRI, MIRBuilder, Observer);
1318 case TargetOpcode::G_GLOBAL_VALUE:
1319 return legalizeSmallCMGlobalValue(MI, MRI, MIRBuilder, Observer);
1320 case TargetOpcode::G_SBFX:
1321 case TargetOpcode::G_UBFX:
1322 return legalizeBitfieldExtract(MI, MRI, Helper);
1323 case TargetOpcode::G_FSHL:
1324 case TargetOpcode::G_FSHR:
1325 return legalizeFunnelShift(MI, MRI, MIRBuilder, Observer, Helper);
1326 case TargetOpcode::G_ROTR:
1327 return legalizeRotate(MI, MRI, Helper);
1328 case TargetOpcode::G_CTPOP:
1329 return legalizeCTPOP(MI, MRI, Helper);
1330 case TargetOpcode::G_ATOMIC_CMPXCHG:
1331 return legalizeAtomicCmpxchg128(MI, MRI, Helper);
1332 case TargetOpcode::G_CTTZ:
1333 return legalizeCTTZ(MI, Helper);
1334 case TargetOpcode::G_BZERO:
1335 case TargetOpcode::G_MEMCPY:
1336 case TargetOpcode::G_MEMMOVE:
1337 case TargetOpcode::G_MEMSET:
1338 return legalizeMemOps(MI, Helper);
1339 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1340 return legalizeExtractVectorElt(MI, MRI, Helper);
1341 case TargetOpcode::G_DYN_STACKALLOC:
1342 return legalizeDynStackAlloc(MI, Helper);
1343 case TargetOpcode::G_PREFETCH:
1344 return legalizePrefetch(MI, Helper);
1345 case TargetOpcode::G_ABS:
1346 return Helper.lowerAbsToCNeg(MI);
1347 case TargetOpcode::G_ICMP:
1348 return legalizeICMP(MI, MRI, MIRBuilder);
1349 }
1350
1351 llvm_unreachable("expected switch to return");
1352}
1353
1354bool AArch64LegalizerInfo::legalizeFunnelShift(MachineInstr &MI,
1356 MachineIRBuilder &MIRBuilder,
1357 GISelChangeObserver &Observer,
1358 LegalizerHelper &Helper) const {
1359 assert(MI.getOpcode() == TargetOpcode::G_FSHL ||
1360 MI.getOpcode() == TargetOpcode::G_FSHR);
1361
1362 // Keep as G_FSHR if shift amount is a G_CONSTANT, else use generic
1363 // lowering
1364 Register ShiftNo = MI.getOperand(3).getReg();
1365 LLT ShiftTy = MRI.getType(ShiftNo);
1366 auto VRegAndVal = getIConstantVRegValWithLookThrough(ShiftNo, MRI);
1367
1368 // Adjust shift amount according to Opcode (FSHL/FSHR)
1369 // Convert FSHL to FSHR
1370 LLT OperationTy = MRI.getType(MI.getOperand(0).getReg());
1371 APInt BitWidth(ShiftTy.getSizeInBits(), OperationTy.getSizeInBits(), false);
1372
1373 // Lower non-constant shifts and leave zero shifts to the optimizer.
1374 if (!VRegAndVal || VRegAndVal->Value.urem(BitWidth) == 0)
1375 return (Helper.lowerFunnelShiftAsShifts(MI) ==
1377
1378 APInt Amount = VRegAndVal->Value.urem(BitWidth);
1379
1380 Amount = MI.getOpcode() == TargetOpcode::G_FSHL ? BitWidth - Amount : Amount;
1381
1382 // If the instruction is G_FSHR, has a 64-bit G_CONSTANT for shift amount
1383 // in the range of 0 <-> BitWidth, it is legal
1384 if (ShiftTy.getSizeInBits() == 64 && MI.getOpcode() == TargetOpcode::G_FSHR &&
1385 VRegAndVal->Value.ult(BitWidth))
1386 return true;
1387
1388 // Cast the ShiftNumber to a 64-bit type
1389 auto Cast64 = MIRBuilder.buildConstant(LLT::scalar(64), Amount.zext(64));
1390
1391 if (MI.getOpcode() == TargetOpcode::G_FSHR) {
1392 Observer.changingInstr(MI);
1393 MI.getOperand(3).setReg(Cast64.getReg(0));
1394 Observer.changedInstr(MI);
1395 }
1396 // If Opcode is FSHL, remove the FSHL instruction and create a FSHR
1397 // instruction
1398 else if (MI.getOpcode() == TargetOpcode::G_FSHL) {
1399 MIRBuilder.buildInstr(TargetOpcode::G_FSHR, {MI.getOperand(0).getReg()},
1400 {MI.getOperand(1).getReg(), MI.getOperand(2).getReg(),
1401 Cast64.getReg(0)});
1402 MI.eraseFromParent();
1403 }
1404 return true;
1405}
1406
1407bool AArch64LegalizerInfo::legalizeICMP(MachineInstr &MI,
1409 MachineIRBuilder &MIRBuilder) const {
1410 Register DstReg = MI.getOperand(0).getReg();
1411 Register SrcReg1 = MI.getOperand(2).getReg();
1412 Register SrcReg2 = MI.getOperand(3).getReg();
1413 LLT DstTy = MRI.getType(DstReg);
1414 LLT SrcTy = MRI.getType(SrcReg1);
1415
1416 // Check the vector types are legal
1417 if (DstTy.getScalarSizeInBits() != SrcTy.getScalarSizeInBits() ||
1418 DstTy.getNumElements() != SrcTy.getNumElements() ||
1419 (DstTy.getSizeInBits() != 64 && DstTy.getSizeInBits() != 128))
1420 return false;
1421
1422 // Lowers G_ICMP NE => G_ICMP EQ to allow better pattern matching for
1423 // following passes
1424 CmpInst::Predicate Pred = (CmpInst::Predicate)MI.getOperand(1).getPredicate();
1425 if (Pred != CmpInst::ICMP_NE)
1426 return true;
1427 Register CmpReg =
1428 MIRBuilder
1429 .buildICmp(CmpInst::ICMP_EQ, MRI.getType(DstReg), SrcReg1, SrcReg2)
1430 .getReg(0);
1431 MIRBuilder.buildNot(DstReg, CmpReg);
1432
1433 MI.eraseFromParent();
1434 return true;
1435}
1436
1437bool AArch64LegalizerInfo::legalizeRotate(MachineInstr &MI,
1439 LegalizerHelper &Helper) const {
1440 // To allow for imported patterns to match, we ensure that the rotate amount
1441 // is 64b with an extension.
1442 Register AmtReg = MI.getOperand(2).getReg();
1443 LLT AmtTy = MRI.getType(AmtReg);
1444 (void)AmtTy;
1445 assert(AmtTy.isScalar() && "Expected a scalar rotate");
1446 assert(AmtTy.getSizeInBits() < 64 && "Expected this rotate to be legal");
1447 auto NewAmt = Helper.MIRBuilder.buildZExt(LLT::scalar(64), AmtReg);
1448 Helper.Observer.changingInstr(MI);
1449 MI.getOperand(2).setReg(NewAmt.getReg(0));
1450 Helper.Observer.changedInstr(MI);
1451 return true;
1452}
1453
1454bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(
1456 GISelChangeObserver &Observer) const {
1457 assert(MI.getOpcode() == TargetOpcode::G_GLOBAL_VALUE);
1458 // We do this custom legalization to convert G_GLOBAL_VALUE into target ADRP +
1459 // G_ADD_LOW instructions.
1460 // By splitting this here, we can optimize accesses in the small code model by
1461 // folding in the G_ADD_LOW into the load/store offset.
1462 auto &GlobalOp = MI.getOperand(1);
1463 // Don't modify an intrinsic call.
1464 if (GlobalOp.isSymbol())
1465 return true;
1466 const auto* GV = GlobalOp.getGlobal();
1467 if (GV->isThreadLocal())
1468 return true; // Don't want to modify TLS vars.
1469
1470 auto &TM = ST->getTargetLowering()->getTargetMachine();
1471 unsigned OpFlags = ST->ClassifyGlobalReference(GV, TM);
1472
1473 if (OpFlags & AArch64II::MO_GOT)
1474 return true;
1475
1476 auto Offset = GlobalOp.getOffset();
1477 Register DstReg = MI.getOperand(0).getReg();
1478 auto ADRP = MIRBuilder.buildInstr(AArch64::ADRP, {LLT::pointer(0, 64)}, {})
1479 .addGlobalAddress(GV, Offset, OpFlags | AArch64II::MO_PAGE);
1480 // Set the regclass on the dest reg too.
1481 MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass);
1482
1483 // MO_TAGGED on the page indicates a tagged address. Set the tag now. We do so
1484 // by creating a MOVK that sets bits 48-63 of the register to (global address
1485 // + 0x100000000 - PC) >> 48. The additional 0x100000000 offset here is to
1486 // prevent an incorrect tag being generated during relocation when the
1487 // global appears before the code section. Without the offset, a global at
1488 // `0x0f00'0000'0000'1000` (i.e. at `0x1000` with tag `0xf`) that's referenced
1489 // by code at `0x2000` would result in `0x0f00'0000'0000'1000 - 0x2000 =
1490 // 0x0eff'ffff'ffff'f000`, meaning the tag would be incorrectly set to `0xe`
1491 // instead of `0xf`.
1492 // This assumes that we're in the small code model so we can assume a binary
1493 // size of <= 4GB, which makes the untagged PC relative offset positive. The
1494 // binary must also be loaded into address range [0, 2^48). Both of these
1495 // properties need to be ensured at runtime when using tagged addresses.
1496 if (OpFlags & AArch64II::MO_TAGGED) {
1497 assert(!Offset &&
1498 "Should not have folded in an offset for a tagged global!");
1499 ADRP = MIRBuilder.buildInstr(AArch64::MOVKXi, {LLT::pointer(0, 64)}, {ADRP})
1500 .addGlobalAddress(GV, 0x100000000,
1502 .addImm(48);
1503 MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass);
1504 }
1505
1506 MIRBuilder.buildInstr(AArch64::G_ADD_LOW, {DstReg}, {ADRP})
1507 .addGlobalAddress(GV, Offset,
1509 MI.eraseFromParent();
1510 return true;
1511}
1512
1514 MachineInstr &MI) const {
1515 Intrinsic::ID IntrinsicID = cast<GIntrinsic>(MI).getIntrinsicID();
1516 switch (IntrinsicID) {
1517 case Intrinsic::vacopy: {
1518 unsigned PtrSize = ST->isTargetILP32() ? 4 : 8;
1519 unsigned VaListSize =
1520 (ST->isTargetDarwin() || ST->isTargetWindows())
1521 ? PtrSize
1522 : ST->isTargetILP32() ? 20 : 32;
1523
1524 MachineFunction &MF = *MI.getMF();
1526 LLT::scalar(VaListSize * 8));
1527 MachineIRBuilder MIB(MI);
1528 MIB.buildLoad(Val, MI.getOperand(2),
1531 VaListSize, Align(PtrSize)));
1532 MIB.buildStore(Val, MI.getOperand(1),
1535 VaListSize, Align(PtrSize)));
1536 MI.eraseFromParent();
1537 return true;
1538 }
1539 case Intrinsic::get_dynamic_area_offset: {
1540 MachineIRBuilder &MIB = Helper.MIRBuilder;
1541 MIB.buildConstant(MI.getOperand(0).getReg(), 0);
1542 MI.eraseFromParent();
1543 return true;
1544 }
1545 case Intrinsic::aarch64_mops_memset_tag: {
1546 assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
1547 // Anyext the value being set to 64 bit (only the bottom 8 bits are read by
1548 // the instruction).
1549 MachineIRBuilder MIB(MI);
1550 auto &Value = MI.getOperand(3);
1551 Register ExtValueReg = MIB.buildAnyExt(LLT::scalar(64), Value).getReg(0);
1552 Value.setReg(ExtValueReg);
1553 return true;
1554 }
1555 case Intrinsic::aarch64_prefetch: {
1556 MachineIRBuilder MIB(MI);
1557 auto &AddrVal = MI.getOperand(1);
1558
1559 int64_t IsWrite = MI.getOperand(2).getImm();
1560 int64_t Target = MI.getOperand(3).getImm();
1561 int64_t IsStream = MI.getOperand(4).getImm();
1562 int64_t IsData = MI.getOperand(5).getImm();
1563
1564 unsigned PrfOp = (IsWrite << 4) | // Load/Store bit
1565 (!IsData << 3) | // IsDataCache bit
1566 (Target << 1) | // Cache level bits
1567 (unsigned)IsStream; // Stream bit
1568
1569 MIB.buildInstr(AArch64::G_AARCH64_PREFETCH).addImm(PrfOp).add(AddrVal);
1570 MI.eraseFromParent();
1571 return true;
1572 }
1573 case Intrinsic::aarch64_neon_uaddv:
1574 case Intrinsic::aarch64_neon_saddv:
1575 case Intrinsic::aarch64_neon_umaxv:
1576 case Intrinsic::aarch64_neon_smaxv:
1577 case Intrinsic::aarch64_neon_uminv:
1578 case Intrinsic::aarch64_neon_sminv: {
1579 MachineIRBuilder MIB(MI);
1580 MachineRegisterInfo &MRI = *MIB.getMRI();
1581 bool IsSigned = IntrinsicID == Intrinsic::aarch64_neon_saddv ||
1582 IntrinsicID == Intrinsic::aarch64_neon_smaxv ||
1583 IntrinsicID == Intrinsic::aarch64_neon_sminv;
1584
1585 auto OldDst = MI.getOperand(0).getReg();
1586 auto OldDstTy = MRI.getType(OldDst);
1587 LLT NewDstTy = MRI.getType(MI.getOperand(2).getReg()).getElementType();
1588 if (OldDstTy == NewDstTy)
1589 return true;
1590
1591 auto NewDst = MRI.createGenericVirtualRegister(NewDstTy);
1592
1593 Helper.Observer.changingInstr(MI);
1594 MI.getOperand(0).setReg(NewDst);
1595 Helper.Observer.changedInstr(MI);
1596
1597 MIB.setInsertPt(MIB.getMBB(), ++MIB.getInsertPt());
1598 MIB.buildExtOrTrunc(IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT,
1599 OldDst, NewDst);
1600
1601 return true;
1602 }
1603 case Intrinsic::aarch64_neon_uaddlp:
1604 case Intrinsic::aarch64_neon_saddlp: {
1605 MachineIRBuilder MIB(MI);
1606
1607 unsigned Opc = IntrinsicID == Intrinsic::aarch64_neon_uaddlp
1608 ? AArch64::G_UADDLP
1609 : AArch64::G_SADDLP;
1610 MIB.buildInstr(Opc, {MI.getOperand(0)}, {MI.getOperand(2)});
1611 MI.eraseFromParent();
1612
1613 return true;
1614 }
1615 case Intrinsic::aarch64_neon_uaddlv:
1616 case Intrinsic::aarch64_neon_saddlv: {
1617 MachineIRBuilder MIB(MI);
1618 MachineRegisterInfo &MRI = *MIB.getMRI();
1619
1620 unsigned Opc = IntrinsicID == Intrinsic::aarch64_neon_uaddlv
1621 ? AArch64::G_UADDLV
1622 : AArch64::G_SADDLV;
1623 Register DstReg = MI.getOperand(0).getReg();
1624 Register SrcReg = MI.getOperand(2).getReg();
1625 LLT DstTy = MRI.getType(DstReg);
1626
1627 LLT MidTy, ExtTy;
1628 if (DstTy.isScalar() && DstTy.getScalarSizeInBits() <= 32) {
1629 MidTy = LLT::fixed_vector(4, 32);
1630 ExtTy = LLT::scalar(32);
1631 } else {
1632 MidTy = LLT::fixed_vector(2, 64);
1633 ExtTy = LLT::scalar(64);
1634 }
1635
1636 Register MidReg =
1637 MIB.buildInstr(Opc, {MidTy}, {SrcReg})->getOperand(0).getReg();
1638 Register ZeroReg =
1639 MIB.buildConstant(LLT::scalar(64), 0)->getOperand(0).getReg();
1640 Register ExtReg = MIB.buildInstr(AArch64::G_EXTRACT_VECTOR_ELT, {ExtTy},
1641 {MidReg, ZeroReg})
1642 .getReg(0);
1643
1644 if (DstTy.getScalarSizeInBits() < 32)
1645 MIB.buildTrunc(DstReg, ExtReg);
1646 else
1647 MIB.buildCopy(DstReg, ExtReg);
1648
1649 MI.eraseFromParent();
1650
1651 return true;
1652 }
1653 case Intrinsic::aarch64_neon_smax:
1654 case Intrinsic::aarch64_neon_smin:
1655 case Intrinsic::aarch64_neon_umax:
1656 case Intrinsic::aarch64_neon_umin:
1657 case Intrinsic::aarch64_neon_fmax:
1658 case Intrinsic::aarch64_neon_fmin:
1659 case Intrinsic::aarch64_neon_fmaxnm:
1660 case Intrinsic::aarch64_neon_fminnm: {
1661 MachineIRBuilder MIB(MI);
1662 if (IntrinsicID == Intrinsic::aarch64_neon_smax)
1663 MIB.buildSMax(MI.getOperand(0), MI.getOperand(2), MI.getOperand(3));
1664 else if (IntrinsicID == Intrinsic::aarch64_neon_smin)
1665 MIB.buildSMin(MI.getOperand(0), MI.getOperand(2), MI.getOperand(3));
1666 else if (IntrinsicID == Intrinsic::aarch64_neon_umax)
1667 MIB.buildUMax(MI.getOperand(0), MI.getOperand(2), MI.getOperand(3));
1668 else if (IntrinsicID == Intrinsic::aarch64_neon_umin)
1669 MIB.buildUMin(MI.getOperand(0), MI.getOperand(2), MI.getOperand(3));
1670 else if (IntrinsicID == Intrinsic::aarch64_neon_fmax)
1671 MIB.buildInstr(TargetOpcode::G_FMAXIMUM, {MI.getOperand(0)},
1672 {MI.getOperand(2), MI.getOperand(3)});
1673 else if (IntrinsicID == Intrinsic::aarch64_neon_fmin)
1674 MIB.buildInstr(TargetOpcode::G_FMINIMUM, {MI.getOperand(0)},
1675 {MI.getOperand(2), MI.getOperand(3)});
1676 else if (IntrinsicID == Intrinsic::aarch64_neon_fmaxnm)
1677 MIB.buildInstr(TargetOpcode::G_FMAXNUM, {MI.getOperand(0)},
1678 {MI.getOperand(2), MI.getOperand(3)});
1679 else if (IntrinsicID == Intrinsic::aarch64_neon_fminnm)
1680 MIB.buildInstr(TargetOpcode::G_FMINNUM, {MI.getOperand(0)},
1681 {MI.getOperand(2), MI.getOperand(3)});
1682 MI.eraseFromParent();
1683 return true;
1684 }
1685 case Intrinsic::vector_reverse:
1686 // TODO: Add support for vector_reverse
1687 return false;
1688 }
1689
1690 return true;
1691}
1692
1693bool AArch64LegalizerInfo::legalizeShlAshrLshr(
1695 GISelChangeObserver &Observer) const {
1696 assert(MI.getOpcode() == TargetOpcode::G_ASHR ||
1697 MI.getOpcode() == TargetOpcode::G_LSHR ||
1698 MI.getOpcode() == TargetOpcode::G_SHL);
1699 // If the shift amount is a G_CONSTANT, promote it to a 64 bit type so the
1700 // imported patterns can select it later. Either way, it will be legal.
1701 Register AmtReg = MI.getOperand(2).getReg();
1702 auto VRegAndVal = getIConstantVRegValWithLookThrough(AmtReg, MRI);
1703 if (!VRegAndVal)
1704 return true;
1705 // Check the shift amount is in range for an immediate form.
1706 int64_t Amount = VRegAndVal->Value.getSExtValue();
1707 if (Amount > 31)
1708 return true; // This will have to remain a register variant.
1709 auto ExtCst = MIRBuilder.buildConstant(LLT::scalar(64), Amount);
1710 Observer.changingInstr(MI);
1711 MI.getOperand(2).setReg(ExtCst.getReg(0));
1712 Observer.changedInstr(MI);
1713 return true;
1714}
1715
1718 Base = Root;
1719 Offset = 0;
1720
1721 Register NewBase;
1722 int64_t NewOffset;
1723 if (mi_match(Root, MRI, m_GPtrAdd(m_Reg(NewBase), m_ICst(NewOffset))) &&
1724 isShiftedInt<7, 3>(NewOffset)) {
1725 Base = NewBase;
1726 Offset = NewOffset;
1727 }
1728}
1729
1730// FIXME: This should be removed and replaced with the generic bitcast legalize
1731// action.
1732bool AArch64LegalizerInfo::legalizeLoadStore(
1734 GISelChangeObserver &Observer) const {
1735 assert(MI.getOpcode() == TargetOpcode::G_STORE ||
1736 MI.getOpcode() == TargetOpcode::G_LOAD);
1737 // Here we just try to handle vector loads/stores where our value type might
1738 // have pointer elements, which the SelectionDAG importer can't handle. To
1739 // allow the existing patterns for s64 to fire for p0, we just try to bitcast
1740 // the value to use s64 types.
1741
1742 // Custom legalization requires the instruction, if not deleted, must be fully
1743 // legalized. In order to allow further legalization of the inst, we create
1744 // a new instruction and erase the existing one.
1745
1746 Register ValReg = MI.getOperand(0).getReg();
1747 const LLT ValTy = MRI.getType(ValReg);
1748
1749 if (ValTy == LLT::scalar(128)) {
1750
1751 AtomicOrdering Ordering = (*MI.memoperands_begin())->getSuccessOrdering();
1752 bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD;
1753 bool IsLoadAcquire = IsLoad && Ordering == AtomicOrdering::Acquire;
1754 bool IsStoreRelease = !IsLoad && Ordering == AtomicOrdering::Release;
1755 bool IsRcpC3 =
1756 ST->hasLSE2() && ST->hasRCPC3() && (IsLoadAcquire || IsStoreRelease);
1757
1758 LLT s64 = LLT::scalar(64);
1759
1760 unsigned Opcode;
1761 if (IsRcpC3) {
1762 Opcode = IsLoad ? AArch64::LDIAPPX : AArch64::STILPX;
1763 } else {
1764 // For LSE2, loads/stores should have been converted to monotonic and had
1765 // a fence inserted after them.
1766 assert(Ordering == AtomicOrdering::Monotonic ||
1767 Ordering == AtomicOrdering::Unordered);
1768 assert(ST->hasLSE2() && "ldp/stp not single copy atomic without +lse2");
1769
1770 Opcode = IsLoad ? AArch64::LDPXi : AArch64::STPXi;
1771 }
1772
1774 if (IsLoad) {
1775 NewI = MIRBuilder.buildInstr(Opcode, {s64, s64}, {});
1776 MIRBuilder.buildMergeLikeInstr(
1777 ValReg, {NewI->getOperand(0), NewI->getOperand(1)});
1778 } else {
1779 auto Split = MIRBuilder.buildUnmerge(s64, MI.getOperand(0));
1780 NewI = MIRBuilder.buildInstr(
1781 Opcode, {}, {Split->getOperand(0), Split->getOperand(1)});
1782 }
1783
1784 if (IsRcpC3) {
1785 NewI.addUse(MI.getOperand(1).getReg());
1786 } else {
1787 Register Base;
1788 int Offset;
1789 matchLDPSTPAddrMode(MI.getOperand(1).getReg(), Base, Offset, MRI);
1790 NewI.addUse(Base);
1791 NewI.addImm(Offset / 8);
1792 }
1793
1794 NewI.cloneMemRefs(MI);
1796 *MRI.getTargetRegisterInfo(),
1797 *ST->getRegBankInfo());
1798 MI.eraseFromParent();
1799 return true;
1800 }
1801
1802 if (!ValTy.isPointerVector() ||
1803 ValTy.getElementType().getAddressSpace() != 0) {
1804 LLVM_DEBUG(dbgs() << "Tried to do custom legalization on wrong load/store");
1805 return false;
1806 }
1807
1808 unsigned PtrSize = ValTy.getElementType().getSizeInBits();
1809 const LLT NewTy = LLT::vector(ValTy.getElementCount(), PtrSize);
1810 auto &MMO = **MI.memoperands_begin();
1811 MMO.setType(NewTy);
1812
1813 if (MI.getOpcode() == TargetOpcode::G_STORE) {
1814 auto Bitcast = MIRBuilder.buildBitcast(NewTy, ValReg);
1815 MIRBuilder.buildStore(Bitcast.getReg(0), MI.getOperand(1), MMO);
1816 } else {
1817 auto NewLoad = MIRBuilder.buildLoad(NewTy, MI.getOperand(1), MMO);
1818 MIRBuilder.buildBitcast(ValReg, NewLoad);
1819 }
1820 MI.eraseFromParent();
1821 return true;
1822}
1823
1824bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI,
1826 MachineIRBuilder &MIRBuilder) const {
1827 MachineFunction &MF = MIRBuilder.getMF();
1828 Align Alignment(MI.getOperand(2).getImm());
1829 Register Dst = MI.getOperand(0).getReg();
1830 Register ListPtr = MI.getOperand(1).getReg();
1831
1832 LLT PtrTy = MRI.getType(ListPtr);
1833 LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
1834
1835 const unsigned PtrSize = PtrTy.getSizeInBits() / 8;
1836 const Align PtrAlign = Align(PtrSize);
1837 auto List = MIRBuilder.buildLoad(
1838 PtrTy, ListPtr,
1840 PtrTy, PtrAlign));
1841
1842 MachineInstrBuilder DstPtr;
1843 if (Alignment > PtrAlign) {
1844 // Realign the list to the actual required alignment.
1845 auto AlignMinus1 =
1846 MIRBuilder.buildConstant(IntPtrTy, Alignment.value() - 1);
1847 auto ListTmp = MIRBuilder.buildPtrAdd(PtrTy, List, AlignMinus1.getReg(0));
1848 DstPtr = MIRBuilder.buildMaskLowPtrBits(PtrTy, ListTmp, Log2(Alignment));
1849 } else
1850 DstPtr = List;
1851
1852 LLT ValTy = MRI.getType(Dst);
1853 uint64_t ValSize = ValTy.getSizeInBits() / 8;
1854 MIRBuilder.buildLoad(
1855 Dst, DstPtr,
1857 ValTy, std::max(Alignment, PtrAlign)));
1858
1859 auto Size = MIRBuilder.buildConstant(IntPtrTy, alignTo(ValSize, PtrAlign));
1860
1861 auto NewList = MIRBuilder.buildPtrAdd(PtrTy, DstPtr, Size.getReg(0));
1862
1863 MIRBuilder.buildStore(NewList, ListPtr,
1866 PtrTy, PtrAlign));
1867
1868 MI.eraseFromParent();
1869 return true;
1870}
1871
1872bool AArch64LegalizerInfo::legalizeBitfieldExtract(
1874 // Only legal if we can select immediate forms.
1875 // TODO: Lower this otherwise.
1876 return getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI) &&
1877 getIConstantVRegValWithLookThrough(MI.getOperand(3).getReg(), MRI);
1878}
1879
1880bool AArch64LegalizerInfo::legalizeCTPOP(MachineInstr &MI,
1882 LegalizerHelper &Helper) const {
1883 // When there is no integer popcount instruction (FEAT_CSSC isn't available),
1884 // it can be more efficiently lowered to the following sequence that uses
1885 // AdvSIMD registers/instructions as long as the copies to/from the AdvSIMD
1886 // registers are cheap.
1887 // FMOV D0, X0 // copy 64-bit int to vector, high bits zero'd
1888 // CNT V0.8B, V0.8B // 8xbyte pop-counts
1889 // ADDV B0, V0.8B // sum 8xbyte pop-counts
1890 // UMOV X0, V0.B[0] // copy byte result back to integer reg
1891 //
1892 // For 128 bit vector popcounts, we lower to the following sequence:
1893 // cnt.16b v0, v0 // v8s16, v4s32, v2s64
1894 // uaddlp.8h v0, v0 // v8s16, v4s32, v2s64
1895 // uaddlp.4s v0, v0 // v4s32, v2s64
1896 // uaddlp.2d v0, v0 // v2s64
1897 //
1898 // For 64 bit vector popcounts, we lower to the following sequence:
1899 // cnt.8b v0, v0 // v4s16, v2s32
1900 // uaddlp.4h v0, v0 // v4s16, v2s32
1901 // uaddlp.2s v0, v0 // v2s32
1902
1903 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1904 Register Dst = MI.getOperand(0).getReg();
1905 Register Val = MI.getOperand(1).getReg();
1906 LLT Ty = MRI.getType(Val);
1907 unsigned Size = Ty.getSizeInBits();
1908
1909 assert(Ty == MRI.getType(Dst) &&
1910 "Expected src and dst to have the same type!");
1911
1912 if (ST->hasCSSC() && Ty.isScalar() && Size == 128) {
1913 LLT s64 = LLT::scalar(64);
1914
1915 auto Split = MIRBuilder.buildUnmerge(s64, Val);
1916 auto CTPOP1 = MIRBuilder.buildCTPOP(s64, Split->getOperand(0));
1917 auto CTPOP2 = MIRBuilder.buildCTPOP(s64, Split->getOperand(1));
1918 auto Add = MIRBuilder.buildAdd(s64, CTPOP1, CTPOP2);
1919
1920 MIRBuilder.buildZExt(Dst, Add);
1921 MI.eraseFromParent();
1922 return true;
1923 }
1924
1925 if (!ST->hasNEON() ||
1926 MI.getMF()->getFunction().hasFnAttribute(Attribute::NoImplicitFloat)) {
1927 // Use generic lowering when custom lowering is not possible.
1928 return Ty.isScalar() && (Size == 32 || Size == 64) &&
1929 Helper.lowerBitCount(MI) ==
1931 }
1932
1933 // Pre-conditioning: widen Val up to the nearest vector type.
1934 // s32,s64,v4s16,v2s32 -> v8i8
1935 // v8s16,v4s32,v2s64 -> v16i8
1936 LLT VTy = Size == 128 ? LLT::fixed_vector(16, 8) : LLT::fixed_vector(8, 8);
1937 if (Ty.isScalar()) {
1938 assert((Size == 32 || Size == 64 || Size == 128) && "Expected only 32, 64, or 128 bit scalars!");
1939 if (Size == 32) {
1940 Val = MIRBuilder.buildZExt(LLT::scalar(64), Val).getReg(0);
1941 }
1942 }
1943 Val = MIRBuilder.buildBitcast(VTy, Val).getReg(0);
1944
1945 // Count bits in each byte-sized lane.
1946 auto CTPOP = MIRBuilder.buildCTPOP(VTy, Val);
1947
1948 // Sum across lanes.
1949
1950 if (ST->hasDotProd() && Ty.isVector() && Ty.getNumElements() >= 2 &&
1951 Ty.getScalarSizeInBits() != 16) {
1952 LLT Dt = Ty == LLT::fixed_vector(2, 64) ? LLT::fixed_vector(4, 32) : Ty;
1953 auto Zeros = MIRBuilder.buildConstant(Dt, 0);
1954 auto Ones = MIRBuilder.buildConstant(VTy, 1);
1956
1957 if (Ty == LLT::fixed_vector(2, 64)) {
1958 auto UDOT =
1959 MIRBuilder.buildInstr(AArch64::G_UDOT, {Dt}, {Zeros, Ones, CTPOP});
1960 Sum = MIRBuilder.buildInstr(AArch64::G_UADDLP, {Ty}, {UDOT});
1961 } else if (Ty == LLT::fixed_vector(4, 32)) {
1962 Sum = MIRBuilder.buildInstr(AArch64::G_UDOT, {Dt}, {Zeros, Ones, CTPOP});
1963 } else if (Ty == LLT::fixed_vector(2, 32)) {
1964 Sum = MIRBuilder.buildInstr(AArch64::G_UDOT, {Dt}, {Zeros, Ones, CTPOP});
1965 } else {
1966 llvm_unreachable("unexpected vector shape");
1967 }
1968
1969 Sum->getOperand(0).setReg(Dst);
1970 MI.eraseFromParent();
1971 return true;
1972 }
1973
1974 Register HSum = CTPOP.getReg(0);
1975 unsigned Opc;
1976 SmallVector<LLT> HAddTys;
1977 if (Ty.isScalar()) {
1978 Opc = Intrinsic::aarch64_neon_uaddlv;
1979 HAddTys.push_back(LLT::scalar(32));
1980 } else if (Ty == LLT::fixed_vector(8, 16)) {
1981 Opc = Intrinsic::aarch64_neon_uaddlp;
1982 HAddTys.push_back(LLT::fixed_vector(8, 16));
1983 } else if (Ty == LLT::fixed_vector(4, 32)) {
1984 Opc = Intrinsic::aarch64_neon_uaddlp;
1985 HAddTys.push_back(LLT::fixed_vector(8, 16));
1986 HAddTys.push_back(LLT::fixed_vector(4, 32));
1987 } else if (Ty == LLT::fixed_vector(2, 64)) {
1988 Opc = Intrinsic::aarch64_neon_uaddlp;
1989 HAddTys.push_back(LLT::fixed_vector(8, 16));
1990 HAddTys.push_back(LLT::fixed_vector(4, 32));
1991 HAddTys.push_back(LLT::fixed_vector(2, 64));
1992 } else if (Ty == LLT::fixed_vector(4, 16)) {
1993 Opc = Intrinsic::aarch64_neon_uaddlp;
1994 HAddTys.push_back(LLT::fixed_vector(4, 16));
1995 } else if (Ty == LLT::fixed_vector(2, 32)) {
1996 Opc = Intrinsic::aarch64_neon_uaddlp;
1997 HAddTys.push_back(LLT::fixed_vector(4, 16));
1998 HAddTys.push_back(LLT::fixed_vector(2, 32));
1999 } else
2000 llvm_unreachable("unexpected vector shape");
2002 for (LLT HTy : HAddTys) {
2003 UADD = MIRBuilder.buildIntrinsic(Opc, {HTy}).addUse(HSum);
2004 HSum = UADD.getReg(0);
2005 }
2006
2007 // Post-conditioning.
2008 if (Ty.isScalar() && (Size == 64 || Size == 128))
2009 MIRBuilder.buildZExt(Dst, UADD);
2010 else
2011 UADD->getOperand(0).setReg(Dst);
2012 MI.eraseFromParent();
2013 return true;
2014}
2015
2016bool AArch64LegalizerInfo::legalizeAtomicCmpxchg128(
2018 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2019 LLT s64 = LLT::scalar(64);
2020 auto Addr = MI.getOperand(1).getReg();
2021 auto DesiredI = MIRBuilder.buildUnmerge({s64, s64}, MI.getOperand(2));
2022 auto NewI = MIRBuilder.buildUnmerge({s64, s64}, MI.getOperand(3));
2023 auto DstLo = MRI.createGenericVirtualRegister(s64);
2024 auto DstHi = MRI.createGenericVirtualRegister(s64);
2025
2027 if (ST->hasLSE()) {
2028 // We have 128-bit CASP instructions taking XSeqPair registers, which are
2029 // s128. We need the merge/unmerge to bracket the expansion and pair up with
2030 // the rest of the MIR so we must reassemble the extracted registers into a
2031 // 128-bit known-regclass one with code like this:
2032 //
2033 // %in1 = REG_SEQUENCE Lo, Hi ; One for each input
2034 // %out = CASP %in1, ...
2035 // %OldLo = G_EXTRACT %out, 0
2036 // %OldHi = G_EXTRACT %out, 64
2037 auto Ordering = (*MI.memoperands_begin())->getMergedOrdering();
2038 unsigned Opcode;
2039 switch (Ordering) {
2041 Opcode = AArch64::CASPAX;
2042 break;
2044 Opcode = AArch64::CASPLX;
2045 break;
2048 Opcode = AArch64::CASPALX;
2049 break;
2050 default:
2051 Opcode = AArch64::CASPX;
2052 break;
2053 }
2054
2055 LLT s128 = LLT::scalar(128);
2056 auto CASDst = MRI.createGenericVirtualRegister(s128);
2057 auto CASDesired = MRI.createGenericVirtualRegister(s128);
2058 auto CASNew = MRI.createGenericVirtualRegister(s128);
2059 MIRBuilder.buildInstr(TargetOpcode::REG_SEQUENCE, {CASDesired}, {})
2060 .addUse(DesiredI->getOperand(0).getReg())
2061 .addImm(AArch64::sube64)
2062 .addUse(DesiredI->getOperand(1).getReg())
2063 .addImm(AArch64::subo64);
2064 MIRBuilder.buildInstr(TargetOpcode::REG_SEQUENCE, {CASNew}, {})
2065 .addUse(NewI->getOperand(0).getReg())
2066 .addImm(AArch64::sube64)
2067 .addUse(NewI->getOperand(1).getReg())
2068 .addImm(AArch64::subo64);
2069
2070 CAS = MIRBuilder.buildInstr(Opcode, {CASDst}, {CASDesired, CASNew, Addr});
2071
2072 MIRBuilder.buildExtract({DstLo}, {CASDst}, 0);
2073 MIRBuilder.buildExtract({DstHi}, {CASDst}, 64);
2074 } else {
2075 // The -O0 CMP_SWAP_128 is friendlier to generate code for because LDXP/STXP
2076 // can take arbitrary registers so it just has the normal GPR64 operands the
2077 // rest of AArch64 is expecting.
2078 auto Ordering = (*MI.memoperands_begin())->getMergedOrdering();
2079 unsigned Opcode;
2080 switch (Ordering) {
2082 Opcode = AArch64::CMP_SWAP_128_ACQUIRE;
2083 break;
2085 Opcode = AArch64::CMP_SWAP_128_RELEASE;
2086 break;
2089 Opcode = AArch64::CMP_SWAP_128;
2090 break;
2091 default:
2092 Opcode = AArch64::CMP_SWAP_128_MONOTONIC;
2093 break;
2094 }
2095
2096 auto Scratch = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2097 CAS = MIRBuilder.buildInstr(Opcode, {DstLo, DstHi, Scratch},
2098 {Addr, DesiredI->getOperand(0),
2099 DesiredI->getOperand(1), NewI->getOperand(0),
2100 NewI->getOperand(1)});
2101 }
2102
2103 CAS.cloneMemRefs(MI);
2105 *MRI.getTargetRegisterInfo(),
2106 *ST->getRegBankInfo());
2107
2108 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {DstLo, DstHi});
2109 MI.eraseFromParent();
2110 return true;
2111}
2112
2113bool AArch64LegalizerInfo::legalizeCTTZ(MachineInstr &MI,
2114 LegalizerHelper &Helper) const {
2115 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2116 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
2117 LLT Ty = MRI.getType(MI.getOperand(1).getReg());
2118 auto BitReverse = MIRBuilder.buildBitReverse(Ty, MI.getOperand(1));
2119 MIRBuilder.buildCTLZ(MI.getOperand(0).getReg(), BitReverse);
2120 MI.eraseFromParent();
2121 return true;
2122}
2123
2124bool AArch64LegalizerInfo::legalizeMemOps(MachineInstr &MI,
2125 LegalizerHelper &Helper) const {
2126 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2127
2128 // Tagged version MOPSMemorySetTagged is legalised in legalizeIntrinsic
2129 if (MI.getOpcode() == TargetOpcode::G_MEMSET) {
2130 // Anyext the value being set to 64 bit (only the bottom 8 bits are read by
2131 // the instruction).
2132 auto &Value = MI.getOperand(1);
2133 Register ExtValueReg =
2134 MIRBuilder.buildAnyExt(LLT::scalar(64), Value).getReg(0);
2135 Value.setReg(ExtValueReg);
2136 return true;
2137 }
2138
2139 return false;
2140}
2141
2142bool AArch64LegalizerInfo::legalizeExtractVectorElt(
2144 assert(MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT);
2145 auto VRegAndVal =
2146 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
2147 if (VRegAndVal)
2148 return true;
2149 return Helper.lowerExtractInsertVectorElt(MI) !=
2151}
2152
2153bool AArch64LegalizerInfo::legalizeDynStackAlloc(
2154 MachineInstr &MI, LegalizerHelper &Helper) const {
2155 MachineFunction &MF = *MI.getParent()->getParent();
2156 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2157 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
2158
2159 // If stack probing is not enabled for this function, use the default
2160 // lowering.
2161 if (!MF.getFunction().hasFnAttribute("probe-stack") ||
2162 MF.getFunction().getFnAttribute("probe-stack").getValueAsString() !=
2163 "inline-asm") {
2164 Helper.lowerDynStackAlloc(MI);
2165 return true;
2166 }
2167
2168 Register Dst = MI.getOperand(0).getReg();
2169 Register AllocSize = MI.getOperand(1).getReg();
2170 Align Alignment = assumeAligned(MI.getOperand(2).getImm());
2171
2172 assert(MRI.getType(Dst) == LLT::pointer(0, 64) &&
2173 "Unexpected type for dynamic alloca");
2174 assert(MRI.getType(AllocSize) == LLT::scalar(64) &&
2175 "Unexpected type for dynamic alloca");
2176
2177 LLT PtrTy = MRI.getType(Dst);
2178 Register SPReg =
2180 Register SPTmp =
2181 Helper.getDynStackAllocTargetPtr(SPReg, AllocSize, Alignment, PtrTy);
2182 auto NewMI =
2183 MIRBuilder.buildInstr(AArch64::PROBED_STACKALLOC_DYN, {}, {SPTmp});
2184 MRI.setRegClass(NewMI.getReg(0), &AArch64::GPR64commonRegClass);
2185 MIRBuilder.setInsertPt(*NewMI->getParent(), NewMI);
2186 MIRBuilder.buildCopy(Dst, SPTmp);
2187
2188 MI.eraseFromParent();
2189 return true;
2190}
2191
2192bool AArch64LegalizerInfo::legalizePrefetch(MachineInstr &MI,
2193 LegalizerHelper &Helper) const {
2194 MachineIRBuilder &MIB = Helper.MIRBuilder;
2195 auto &AddrVal = MI.getOperand(0);
2196
2197 int64_t IsWrite = MI.getOperand(1).getImm();
2198 int64_t Locality = MI.getOperand(2).getImm();
2199 int64_t IsData = MI.getOperand(3).getImm();
2200
2201 bool IsStream = Locality == 0;
2202 if (Locality != 0) {
2203 assert(Locality <= 3 && "Prefetch locality out-of-range");
2204 // The locality degree is the opposite of the cache speed.
2205 // Put the number the other way around.
2206 // The encoding starts at 0 for level 1
2207 Locality = 3 - Locality;
2208 }
2209
2210 unsigned PrfOp = (IsWrite << 4) | (!IsData << 3) | (Locality << 1) | IsStream;
2211
2212 MIB.buildInstr(AArch64::G_AARCH64_PREFETCH).addImm(PrfOp).add(AddrVal);
2213 MI.eraseFromParent();
2214 return true;
2215}
unsigned const MachineRegisterInfo * MRI
static void matchLDPSTPAddrMode(Register Root, Register &Base, int &Offset, MachineRegisterInfo &MRI)
This file declares the targeting of the Machinelegalizer class for AArch64.
This file declares the targeting of the RegisterBankInfo class for AArch64.
static Error unsupported(const char *Str, const Triple &T)
Definition: MachO.cpp:71
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Addr
uint64_t Size
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
IRTranslator LLVM IR MI
Interface for Targets to specify which operations they can successfully select and how the others sho...
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineIRBuilder class.
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
ppc ctr loops verify
if(VerifyEach)
const char LLVMTargetMachineRef TM
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI, LostDebugLocObserver &LocObserver) const override
Called for instructions with the Custom LegalizationAction.
bool legalizeIntrinsic(LegalizerHelper &Helper, MachineInstr &MI) const override
AArch64LegalizerInfo(const AArch64Subtarget &ST)
const AArch64InstrInfo * getInstrInfo() const override
const AArch64TargetLowering * getTargetLowering() const override
unsigned ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const
ClassifyGlobalReference - Find the target operand flags that describe how a global value should be re...
const RegisterBankInfo * getRegBankInfo() const override
Class for arbitrary precision integers.
Definition: APInt.h:78
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:981
APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition: APInt.cpp:1636
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1522
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:392
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:757
@ ICMP_EQ
equal
Definition: InstrTypes.h:778
@ ICMP_NE
not equal
Definition: InstrTypes.h:779
This class represents an Operation in the Expression.
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:769
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:743
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
constexpr unsigned getScalarSizeInBits() const
Definition: LowLevelType.h:267
constexpr bool isScalar() const
Definition: LowLevelType.h:146
static constexpr LLT scalable_vector(unsigned MinNumElements, unsigned ScalarSizeInBits)
Get a low-level scalable vector of some number of elements and element width.
Definition: LowLevelType.h:113
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
Definition: LowLevelType.h:64
constexpr bool isPointerVector() const
Definition: LowLevelType.h:152
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelType.h:42
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
Definition: LowLevelType.h:159
constexpr bool isVector() const
Definition: LowLevelType.h:148
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
Definition: LowLevelType.h:57
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
Definition: LowLevelType.h:193
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
Definition: LowLevelType.h:290
constexpr ElementCount getElementCount() const
Definition: LowLevelType.h:184
constexpr LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
Definition: LowLevelType.h:221
constexpr unsigned getAddressSpace() const
Definition: LowLevelType.h:280
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
Definition: LowLevelType.h:100
constexpr LLT changeElementCount(ElementCount EC) const
Return a vector or scalar with the same element type and the new element count.
Definition: LowLevelType.h:230
constexpr LLT divide(int Factor) const
Return a type that is Factor times smaller.
Definition: LowLevelType.h:237
void computeTables()
Compute any ancillary tables needed to quickly decide how an operation should be handled.
LegalizeRuleSet & minScalar(unsigned TypeIdx, const LLT Ty)
Ensure the scalar is at least as wide as Ty.
LegalizeRuleSet & legalFor(std::initializer_list< LLT > Types)
The instruction is legal when type index 0 is any type in the given list.
LegalizeRuleSet & unsupported()
The instruction is unsupported.
LegalizeRuleSet & scalarSameSizeAs(unsigned TypeIdx, unsigned SameSizeIdx)
Change the type TypeIdx to have the same scalar size as type SameSizeIdx.
LegalizeRuleSet & libcallFor(std::initializer_list< LLT > Types)
LegalizeRuleSet & maxScalar(unsigned TypeIdx, const LLT Ty)
Ensure the scalar is at most as wide as Ty.
LegalizeRuleSet & minScalarOrElt(unsigned TypeIdx, const LLT Ty)
Ensure the scalar or element is at least as wide as Ty.
LegalizeRuleSet & clampMaxNumElements(unsigned TypeIdx, const LLT EltTy, unsigned MaxElements)
Limit the number of elements in EltTy vectors to at most MaxElements.
LegalizeRuleSet & clampMinNumElements(unsigned TypeIdx, const LLT EltTy, unsigned MinElements)
Limit the number of elements in EltTy vectors to at least MinElements.
LegalizeRuleSet & widenVectorEltsToVectorMinSize(unsigned TypeIdx, unsigned VectorSize)
Ensure the vector size is at least as wide as VectorSize by promoting the element.
LegalizeRuleSet & minScalarEltSameAsIf(LegalityPredicate Predicate, unsigned TypeIdx, unsigned LargeTypeIdx)
Conditionally widen the scalar or elt to match the size of another.
LegalizeRuleSet & customForCartesianProduct(std::initializer_list< LLT > Types)
LegalizeRuleSet & moreElementsToNextPow2(unsigned TypeIdx)
Add more elements to the vector to reach the next power of two.
LegalizeRuleSet & lower()
The instruction is lowered.
LegalizeRuleSet & moreElementsIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Add more elements to reach the type selected by the mutation if the predicate is true.
LegalizeRuleSet & lowerIf(LegalityPredicate Predicate)
The instruction is lowered if predicate is true.
LegalizeRuleSet & clampScalar(unsigned TypeIdx, const LLT MinTy, const LLT MaxTy)
Limit the range of scalar sizes to MinTy and MaxTy.
LegalizeRuleSet & custom()
Unconditionally custom lower.
LegalizeRuleSet & minScalarSameAs(unsigned TypeIdx, unsigned LargeTypeIdx)
Widen the scalar to match the size of another.
LegalizeRuleSet & unsupportedIf(LegalityPredicate Predicate)
LegalizeRuleSet & minScalarOrEltIf(LegalityPredicate Predicate, unsigned TypeIdx, const LLT Ty)
Ensure the scalar or element is at least as wide as Ty.
LegalizeRuleSet & clampNumElements(unsigned TypeIdx, const LLT MinTy, const LLT MaxTy)
Limit the number of elements for the given vectors to at least MinTy's number of elements and at most...
LegalizeRuleSet & maxScalarIf(LegalityPredicate Predicate, unsigned TypeIdx, const LLT Ty)
Conditionally limit the maximum size of the scalar.
LegalizeRuleSet & customIf(LegalityPredicate Predicate)
LegalizeRuleSet & widenScalarToNextPow2(unsigned TypeIdx, unsigned MinSize=0)
Widen the scalar to the next power of two that is at least MinSize.
LegalizeRuleSet & scalarize(unsigned TypeIdx)
LegalizeRuleSet & legalForCartesianProduct(std::initializer_list< LLT > Types)
The instruction is legal when type indexes 0 and 1 are both in the given list.
LegalizeRuleSet & legalForTypesWithMemDesc(std::initializer_list< LegalityPredicates::TypePairAndMemDesc > TypesAndMemDesc)
The instruction is legal when type indexes 0 and 1 along with the memory size and minimum alignment i...
LegalizeRuleSet & libcallIf(LegalityPredicate Predicate)
Like legalIf, but for the Libcall action.
LegalizeRuleSet & legalIf(LegalityPredicate Predicate)
The instruction is legal if predicate is true.
LegalizeResult lowerDynStackAlloc(MachineInstr &MI)
LegalizeResult lowerBitCount(MachineInstr &MI)
LegalizeResult lowerExtractInsertVectorElt(MachineInstr &MI)
Lower a vector extract or insert by writing the vector to a stack temporary and reloading the element...
LegalizeResult lowerAbsToCNeg(MachineInstr &MI)
const TargetLowering & getTargetLowering() const
LegalizeResult lowerFunnelShiftAsShifts(MachineInstr &MI)
@ Legalized
Instruction has been legalized and the MachineFunction changed.
@ UnableToLegalize
Some kind of error has occurred and we could not legalize this instruction.
GISelChangeObserver & Observer
To keep track of changes made by the LegalizerHelper.
Register getDynStackAllocTargetPtr(Register SPReg, Register AllocSize, Align Alignment, LLT PtrTy)
MachineIRBuilder & MIRBuilder
Expose MIRBuilder so clients can set their own RecordInsertInstruction functions.
LegalizeRuleSet & getActionDefinitionsBuilder(unsigned Opcode)
Get the action definition builder for the given opcode.
const LegacyLegalizerInfo & getLegacyLegalizerInfo() const
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Helper class to build MachineInstr.
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
MachineInstrBuilder buildAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ADD Op0, Op1.
MachineInstrBuilder buildNot(const DstOp &Dst, const SrcOp &Src0)
Build and insert a bitwise not, NegOne = G_CONSTANT -1 Res = G_OR Op0, NegOne.
MachineInstrBuilder buildUnmerge(ArrayRef< LLT > Res, const SrcOp &Op)
Build and insert Res0, ... = G_UNMERGE_VALUES Op.
MachineInstrBuilder buildExtract(const DstOp &Res, const SrcOp &Src, uint64_t Index)
Build and insert Res0, ... = G_EXTRACT Src, Idx0.
MachineInstrBuilder buildICmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1)
Build and insert a Res = G_ICMP Pred, Op0, Op1.
MachineBasicBlock::iterator getInsertPt()
Current insertion point for new instructions.
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ZEXT Op.
MachineInstrBuilder buildIntrinsic(Intrinsic::ID ID, ArrayRef< Register > Res, bool HasSideEffects, bool isConvergent)
Build and insert a G_INTRINSIC instruction.
MachineInstrBuilder buildCTLZ(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ Op0, Src0.
MachineInstrBuilder buildSMax(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_SMAX Op0, Op1.
MachineInstrBuilder buildMergeLikeInstr(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ... or Res = G_BUILD_VECTOR Op0, ... or Res = G_CONCAT_VEC...
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_PTR_ADD Op0, Op1.
MachineInstrBuilder buildBitReverse(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITREVERSE Src.
MachineInstrBuilder buildStore(const SrcOp &Val, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert G_STORE Val, Addr, MMO.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildCTPOP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTPOP Op0, Src0.
MachineFunction & getMF()
Getter for the function we currently build.
MachineInstrBuilder buildSMin(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_SMIN Op0, Op1.
MachineInstrBuilder buildExtOrTrunc(unsigned ExtOpc, const DstOp &Res, const SrcOp &Op)
Build and insert Res = ExtOpc, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes of...
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_TRUNC Op.
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
MachineInstrBuilder buildAnyExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ANYEXT Op0.
MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITCAST Src.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
MachineInstrBuilder buildMaskLowPtrBits(const DstOp &Res, const SrcOp &Op0, uint32_t NumBits)
Build and insert Res = G_PTRMASK Op0, G_CONSTANT (1 << NumBits) - 1.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
MachineInstrBuilder buildUMin(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_UMIN Op0, Op1.
MachineInstrBuilder buildUMax(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_UMAX Op0, Op1.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:579
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
void setReg(Register Reg)
Change the register this operand corresponds to.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
void push_back(const T &Elt)
Definition: SmallVector.h:427
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1210
const TargetMachine & getTargetMachine() const
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
Target - Wrapper for Target specific information.
LLVM Value Representation.
Definition: Value.h:74
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition: TypeSize.h:254
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_PREL
MO_PREL - Indicates that the bits of the symbol operand represented by MO_G0 etc are PC relative.
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_TAGGED
MO_TAGGED - With MO_PAGE, indicates that the page includes a memory tag in bits 56-63.
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
LegalityPredicate scalarOrEltWiderThan(unsigned TypeIdx, unsigned Size)
True iff the specified type index is a scalar or a vector with an element type that's wider than the ...
LegalityPredicate typeInSet(unsigned TypeIdx, std::initializer_list< LLT > TypesInit)
True iff the given type index is one of the specified types.
LegalityPredicate smallerThan(unsigned TypeIdx0, unsigned TypeIdx1)
True iff the first type index has a smaller total bit size than second type index.
LegalityPredicate atomicOrderingAtLeastOrStrongerThan(unsigned MMOIdx, AtomicOrdering Ordering)
True iff the specified MMO index has at an atomic ordering of at Ordering or stronger.
LegalityPredicate isVector(unsigned TypeIdx)
True iff the specified type index is a vector.
Predicate all(Predicate P0, Predicate P1)
True iff P0 and P1 are true.
LegalityPredicate typeIs(unsigned TypeIdx, LLT TypesInit)
True iff the given type index is the specified type.
Predicate predNot(Predicate P)
True iff P is false.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
Definition: LegalizerInfo.h:74
LegalizeMutation moreElementsToNextPow2(unsigned TypeIdx, unsigned Min=0)
Add more elements to the type for the given type index to the next power of.
LegalizeMutation scalarize(unsigned TypeIdx)
Break up the vector type for the given type index into the element type.
LegalizeMutation widenScalarOrEltToNextPow2(unsigned TypeIdx, unsigned Min=0)
Widen the scalar type or vector element type for the given type index to the next power of 2.
LegalizeMutation changeTo(unsigned TypeIdx, LLT Ty)
Select this specific type for the given type index.
LegalizeMutation changeElementSizeTo(unsigned TypeIdx, unsigned FromTypeIdx)
Change the scalar size or element size to have the same scalar size as type index FromIndex.
operand_type_match m_Reg()
ConstantMatch< APInt > m_ICst(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
bool constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
Definition: Utils.cpp:155
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:291
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Add
Sum of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition: Utils.cpp:433
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1879
Align assumeAligned(uint64_t Value)
Treats the value 0 as a 1, so Align is always at least 1.
Definition: Alignment.h:111
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:208
std::function< bool(const LegalityQuery &)> LegalityPredicate
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
ArrayRef< MemDesc > MMODescrs
Operations which require memory can use this to place requirements on the memory type for each MMO.
ArrayRef< LLT > Types
This class contains a discriminated union of information about pointers in memory operands,...