Bug Summary

File:llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
Warning:line 602, column 40
Division by zero

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name AArch64LegalizerInfo.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/build-llvm/lib/Target/AArch64 -resource-dir /usr/lib/llvm-13/lib/clang/13.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/build-llvm/lib/Target/AArch64 -I /build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/AArch64 -I /build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/build-llvm/include -I /build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/include -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-13/lib/clang/13.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/build-llvm/lib/Target/AArch64 -fdebug-prefix-map=/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82=. -ferror-limit 19 -fvisibility hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2021-06-21-164211-33944-1 -x c++ /build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp

/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp

1//===- AArch64LegalizerInfo.cpp ----------------------------------*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the Machinelegalizer class for
10/// AArch64.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
14#include "AArch64LegalizerInfo.h"
15#include "AArch64RegisterBankInfo.h"
16#include "AArch64Subtarget.h"
17#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
18#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
19#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
20#include "llvm/CodeGen/GlobalISel/Utils.h"
21#include "llvm/CodeGen/MachineInstr.h"
22#include "llvm/CodeGen/MachineRegisterInfo.h"
23#include "llvm/CodeGen/TargetOpcodes.h"
24#include "llvm/CodeGen/ValueTypes.h"
25#include "llvm/IR/DerivedTypes.h"
26#include "llvm/IR/IntrinsicsAArch64.h"
27#include "llvm/IR/Type.h"
28#include "llvm/Support/MathExtras.h"
29#include <initializer_list>
30
31#define DEBUG_TYPE"aarch64-legalinfo" "aarch64-legalinfo"
32
33using namespace llvm;
34using namespace LegalizeActions;
35using namespace LegalizeMutations;
36using namespace LegalityPredicates;
37
38AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
39 : ST(&ST) {
40 using namespace TargetOpcode;
41 const LLT p0 = LLT::pointer(0, 64);
42 const LLT s1 = LLT::scalar(1);
43 const LLT s8 = LLT::scalar(8);
44 const LLT s16 = LLT::scalar(16);
45 const LLT s32 = LLT::scalar(32);
46 const LLT s64 = LLT::scalar(64);
47 const LLT s128 = LLT::scalar(128);
48 const LLT s256 = LLT::scalar(256);
49 const LLT s512 = LLT::scalar(512);
50 const LLT v16s8 = LLT::vector(16, 8);
51 const LLT v8s8 = LLT::vector(8, 8);
52 const LLT v4s8 = LLT::vector(4, 8);
53 const LLT v8s16 = LLT::vector(8, 16);
54 const LLT v4s16 = LLT::vector(4, 16);
55 const LLT v2s16 = LLT::vector(2, 16);
56 const LLT v2s32 = LLT::vector(2, 32);
57 const LLT v4s32 = LLT::vector(4, 32);
58 const LLT v2s64 = LLT::vector(2, 64);
59 const LLT v2p0 = LLT::vector(2, p0);
60
61 std::initializer_list<LLT> PackedVectorAllTypeList = {/* Begin 128bit types */
62 v16s8, v8s16, v4s32,
63 v2s64, v2p0,
64 /* End 128bit types */
65 /* Begin 64bit types */
66 v8s8, v4s16, v2s32};
67
68 const TargetMachine &TM = ST.getTargetLowering()->getTargetMachine();
69
70 // FIXME: support subtargets which have neon/fp-armv8 disabled.
71 if (!ST.hasNEON() || !ST.hasFPARMv8()) {
72 getLegacyLegalizerInfo().computeTables();
73 return;
74 }
75
76 // Some instructions only support s16 if the subtarget has full 16-bit FP
77 // support.
78 const bool HasFP16 = ST.hasFullFP16();
79 const LLT &MinFPScalar = HasFP16 ? s16 : s32;
80
81 getActionDefinitionsBuilder({G_IMPLICIT_DEF, G_FREEZE})
82 .legalFor({p0, s1, s8, s16, s32, s64})
83 .legalFor(PackedVectorAllTypeList)
84 .clampScalar(0, s1, s64)
85 .widenScalarToNextPow2(0, 8)
86 .fewerElementsIf(
87 [=](const LegalityQuery &Query) {
88 return Query.Types[0].isVector() &&
89 (Query.Types[0].getElementType() != s64 ||
90 Query.Types[0].getNumElements() != 2);
91 },
92 [=](const LegalityQuery &Query) {
93 LLT EltTy = Query.Types[0].getElementType();
94 if (EltTy == s64)
95 return std::make_pair(0, LLT::vector(2, 64));
96 return std::make_pair(0, EltTy);
97 });
98
99 getActionDefinitionsBuilder(G_PHI).legalFor({p0, s16, s32, s64})
100 .legalFor(PackedVectorAllTypeList)
101 .clampScalar(0, s16, s64)
102 .widenScalarToNextPow2(0);
103
104 getActionDefinitionsBuilder(G_BSWAP)
105 .legalFor({s32, s64, v4s32, v2s32, v2s64})
106 .clampScalar(0, s32, s64)
107 .widenScalarToNextPow2(0);
108
109 getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR})
110 .legalFor({s32, s64, v2s32, v4s32, v4s16, v8s16, v16s8, v8s8})
111 .scalarizeIf(
112 [=](const LegalityQuery &Query) {
113 return Query.Opcode == G_MUL && Query.Types[0] == v2s64;
114 },
115 0)
116 .legalFor({v2s64})
117 .clampScalar(0, s32, s64)
118 .widenScalarToNextPow2(0)
119 .clampNumElements(0, v2s32, v4s32)
120 .clampNumElements(0, v2s64, v2s64)
121 .moreElementsToNextPow2(0);
122
123 getActionDefinitionsBuilder({G_SHL, G_ASHR, G_LSHR})
124 .customIf([=](const LegalityQuery &Query) {
125 const auto &SrcTy = Query.Types[0];
126 const auto &AmtTy = Query.Types[1];
127 return !SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
128 AmtTy.getSizeInBits() == 32;
129 })
130 .legalFor({
131 {s32, s32},
132 {s32, s64},
133 {s64, s64},
134 {v8s8, v8s8},
135 {v16s8, v16s8},
136 {v4s16, v4s16},
137 {v8s16, v8s16},
138 {v2s32, v2s32},
139 {v4s32, v4s32},
140 {v2s64, v2s64},
141 })
142 .clampScalar(1, s32, s64)
143 .clampScalar(0, s32, s64)
144 .widenScalarToNextPow2(0)
145 .clampNumElements(0, v2s32, v4s32)
146 .clampNumElements(0, v2s64, v2s64)
147 .moreElementsToNextPow2(0)
148 .minScalarSameAs(1, 0);
149
150 getActionDefinitionsBuilder(G_PTR_ADD)
151 .legalFor({{p0, s64}, {v2p0, v2s64}})
152 .clampScalar(1, s64, s64);
153
154 getActionDefinitionsBuilder(G_PTRMASK).legalFor({{p0, s64}});
155
156 getActionDefinitionsBuilder({G_SDIV, G_UDIV})
157 .legalFor({s32, s64})
158 .libcallFor({s128})
159 .clampScalar(0, s32, s64)
160 .widenScalarToNextPow2(0)
161 .scalarize(0);
162
163 getActionDefinitionsBuilder({G_SREM, G_UREM, G_SDIVREM, G_UDIVREM})
164 .lowerFor({s1, s8, s16, s32, s64});
165
166 getActionDefinitionsBuilder({G_SMULO, G_UMULO}).lowerFor({{s64, s1}});
167
168 getActionDefinitionsBuilder({G_SMULH, G_UMULH}).legalFor({s32, s64});
169
170 getActionDefinitionsBuilder({G_SMIN, G_SMAX, G_UMIN, G_UMAX})
171 .lowerIf([=](const LegalityQuery &Q) { return Q.Types[0].isScalar(); });
172
173 getActionDefinitionsBuilder(
174 {G_SADDE, G_SSUBE, G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_UADDO, G_USUBO})
175 .legalFor({{s32, s1}, {s64, s1}})
176 .clampScalar(0, s32, s64)
177 .widenScalarToNextPow2(0);
178
179 getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FNEG})
180 .legalFor({s32, s64, v2s64, v4s32, v2s32})
181 .clampNumElements(0, v2s32, v4s32)
182 .clampNumElements(0, v2s64, v2s64);
183
184 getActionDefinitionsBuilder(G_FREM).libcallFor({s32, s64});
185
186 getActionDefinitionsBuilder({G_FCEIL, G_FABS, G_FSQRT, G_FFLOOR, G_FRINT,
187 G_FMA, G_INTRINSIC_TRUNC, G_INTRINSIC_ROUND,
188 G_FNEARBYINT, G_INTRINSIC_LRINT})
189 // If we don't have full FP16 support, then scalarize the elements of
190 // vectors containing fp16 types.
191 .fewerElementsIf(
192 [=, &ST](const LegalityQuery &Query) {
193 const auto &Ty = Query.Types[0];
194 return Ty.isVector() && Ty.getElementType() == s16 &&
195 !ST.hasFullFP16();
196 },
197 [=](const LegalityQuery &Query) { return std::make_pair(0, s16); })
198 // If we don't have full FP16 support, then widen s16 to s32 if we
199 // encounter it.
200 .widenScalarIf(
201 [=, &ST](const LegalityQuery &Query) {
202 return Query.Types[0] == s16 && !ST.hasFullFP16();
203 },
204 [=](const LegalityQuery &Query) { return std::make_pair(0, s32); })
205 .legalFor({s16, s32, s64, v2s32, v4s32, v2s64, v2s16, v4s16, v8s16});
206
207 getActionDefinitionsBuilder(
208 {G_FCOS, G_FSIN, G_FLOG10, G_FLOG, G_FLOG2, G_FEXP, G_FEXP2, G_FPOW})
209 // We need a call for these, so we always need to scalarize.
210 .scalarize(0)
211 // Regardless of FP16 support, widen 16-bit elements to 32-bits.
212 .minScalar(0, s32)
213 .libcallFor({s32, s64, v2s32, v4s32, v2s64});
214
215 getActionDefinitionsBuilder(G_INSERT)
216 .unsupportedIf([=](const LegalityQuery &Query) {
217 return Query.Types[0].getSizeInBits() <= Query.Types[1].getSizeInBits();
218 })
219 .legalIf([=](const LegalityQuery &Query) {
220 const LLT &Ty0 = Query.Types[0];
221 const LLT &Ty1 = Query.Types[1];
222 if (Ty0 != s32 && Ty0 != s64 && Ty0 != p0)
223 return false;
224 return isPowerOf2_32(Ty1.getSizeInBits()) &&
225 (Ty1.getSizeInBits() == 1 || Ty1.getSizeInBits() >= 8);
226 })
227 .clampScalar(0, s32, s64)
228 .widenScalarToNextPow2(0)
229 .maxScalarIf(typeInSet(0, {s32}), 1, s16)
230 .maxScalarIf(typeInSet(0, {s64}), 1, s32)
231 .widenScalarToNextPow2(1);
232
233 getActionDefinitionsBuilder(G_EXTRACT)
234 .unsupportedIf([=](const LegalityQuery &Query) {
235 return Query.Types[0].getSizeInBits() >= Query.Types[1].getSizeInBits();
236 })
237 .legalIf([=](const LegalityQuery &Query) {
238 const LLT &Ty0 = Query.Types[0];
239 const LLT &Ty1 = Query.Types[1];
240 if (Ty1 != s32 && Ty1 != s64 && Ty1 != s128)
241 return false;
242 if (Ty1 == p0)
243 return true;
244 return isPowerOf2_32(Ty0.getSizeInBits()) &&
245 (Ty0.getSizeInBits() == 1 || Ty0.getSizeInBits() >= 8);
246 })
247 .clampScalar(1, s32, s128)
248 .widenScalarToNextPow2(1)
249 .maxScalarIf(typeInSet(1, {s32}), 0, s16)
250 .maxScalarIf(typeInSet(1, {s64}), 0, s32)
251 .widenScalarToNextPow2(0);
252
253 getActionDefinitionsBuilder({G_SEXTLOAD, G_ZEXTLOAD})
254 .lowerIf(atomicOrderingAtLeastOrStrongerThan(0, AtomicOrdering::Unordered))
255 .legalForTypesWithMemDesc({{s32, p0, 8, 8},
256 {s32, p0, 16, 8},
257 {s32, p0, 32, 8},
258 {s64, p0, 8, 2},
259 {s64, p0, 16, 2},
260 {s64, p0, 32, 4},
261 {s64, p0, 64, 8},
262 {p0, p0, 64, 8},
263 {v2s32, p0, 64, 8}})
264 .clampScalar(0, s32, s64)
265 .widenScalarToNextPow2(0)
266 // TODO: We could support sum-of-pow2's but the lowering code doesn't know
267 // how to do that yet.
268 .unsupportedIfMemSizeNotPow2()
269 // Lower anything left over into G_*EXT and G_LOAD
270 .lower();
271
272 auto IsPtrVecPred = [=](const LegalityQuery &Query) {
273 const LLT &ValTy = Query.Types[0];
274 if (!ValTy.isVector())
275 return false;
276 const LLT EltTy = ValTy.getElementType();
277 return EltTy.isPointer() && EltTy.getAddressSpace() == 0;
278 };
279
280 getActionDefinitionsBuilder(G_LOAD)
281 .legalForTypesWithMemDesc({{s8, p0, 8, 8},
282 {s16, p0, 16, 8},
283 {s32, p0, 32, 8},
284 {s64, p0, 64, 8},
285 {p0, p0, 64, 8},
286 {s128, p0, 128, 8},
287 {v8s8, p0, 64, 8},
288 {v16s8, p0, 128, 8},
289 {v4s16, p0, 64, 8},
290 {v8s16, p0, 128, 8},
291 {v2s32, p0, 64, 8},
292 {v4s32, p0, 128, 8},
293 {v2s64, p0, 128, 8}})
294 // These extends are also legal
295 .legalForTypesWithMemDesc({{s32, p0, 8, 8}, {s32, p0, 16, 8}})
296 .clampScalar(0, s8, s64)
297 .lowerIfMemSizeNotPow2()
298 .widenScalarToNextPow2(0)
299 .narrowScalarIf([=](const LegalityQuery &Query) {
300 // Clamp extending load results to 32-bits.
301 return Query.Types[0].isScalar() &&
302 Query.Types[0].getSizeInBits() != Query.MMODescrs[0].SizeInBits &&
303 Query.Types[0].getSizeInBits() > 32;
304 },
305 changeTo(0, s32))
306 // Lower any any-extending loads left into G_ANYEXT and G_LOAD
307 .lowerIf([=](const LegalityQuery &Query) {
308 return Query.Types[0].getSizeInBits() != Query.MMODescrs[0].SizeInBits;
309 })
310 .clampMaxNumElements(0, s8, 16)
311 .clampMaxNumElements(0, s16, 8)
312 .clampMaxNumElements(0, s32, 4)
313 .clampMaxNumElements(0, s64, 2)
314 .customIf(IsPtrVecPred);
315
316 getActionDefinitionsBuilder(G_STORE)
317 .legalForTypesWithMemDesc({{s8, p0, 8, 8},
318 {s16, p0, 8, 8}, // truncstorei8 from s16
319 {s32, p0, 8, 8}, // truncstorei8 from s32
320 {s64, p0, 8, 8}, // truncstorei8 from s64
321 {s16, p0, 16, 8},
322 {s32, p0, 16, 8}, // truncstorei16 from s32
323 {s64, p0, 16, 8}, // truncstorei16 from s64
324 {s32, p0, 8, 8},
325 {s32, p0, 16, 8},
326 {s32, p0, 32, 8},
327 {s64, p0, 64, 8},
328 {s64, p0, 32, 8}, // truncstorei32 from s64
329 {p0, p0, 64, 8},
330 {s128, p0, 128, 8},
331 {v16s8, p0, 128, 8},
332 {v8s8, p0, 64, 8},
333 {v4s16, p0, 64, 8},
334 {v8s16, p0, 128, 8},
335 {v2s32, p0, 64, 8},
336 {v4s32, p0, 128, 8},
337 {v2s64, p0, 128, 8}})
338 .clampScalar(0, s8, s64)
339 .lowerIf([=](const LegalityQuery &Query) {
340 return Query.Types[0].isScalar() &&
341 Query.Types[0].getSizeInBits() != Query.MMODescrs[0].SizeInBits;
342 })
343 // Maximum: sN * k = 128
344 .clampMaxNumElements(0, s8, 16)
345 .clampMaxNumElements(0, s16, 8)
346 .clampMaxNumElements(0, s32, 4)
347 .clampMaxNumElements(0, s64, 2)
348 .lowerIfMemSizeNotPow2()
349 .customIf(IsPtrVecPred);
350
351 // Constants
352 getActionDefinitionsBuilder(G_CONSTANT)
353 .legalFor({p0, s8, s16, s32, s64})
354 .clampScalar(0, s8, s64)
355 .widenScalarToNextPow2(0);
356 getActionDefinitionsBuilder(G_FCONSTANT)
357 .legalIf([=](const LegalityQuery &Query) {
358 const auto &Ty = Query.Types[0];
359 if (HasFP16 && Ty == s16)
360 return true;
361 return Ty == s32 || Ty == s64 || Ty == s128;
362 })
363 .clampScalar(0, MinFPScalar, s128);
364
365 getActionDefinitionsBuilder({G_ICMP, G_FCMP})
366 .legalFor({{s32, s32},
367 {s32, s64},
368 {s32, p0},
369 {v4s32, v4s32},
370 {v2s32, v2s32},
371 {v2s64, v2s64},
372 {v2s64, v2p0},
373 {v4s16, v4s16},
374 {v8s16, v8s16},
375 {v8s8, v8s8},
376 {v16s8, v16s8}})
377 .clampScalar(1, s32, s64)
378 .clampScalar(0, s32, s32)
379 .minScalarEltSameAsIf(
380 [=](const LegalityQuery &Query) {
381 const LLT &Ty = Query.Types[0];
382 const LLT &SrcTy = Query.Types[1];
383 return Ty.isVector() && !SrcTy.getElementType().isPointer() &&
384 Ty.getElementType() != SrcTy.getElementType();
385 },
386 0, 1)
387 .minScalarOrEltIf(
388 [=](const LegalityQuery &Query) { return Query.Types[1] == v2s16; },
389 1, s32)
390 .minScalarOrEltIf(
391 [=](const LegalityQuery &Query) { return Query.Types[1] == v2p0; }, 0,
392 s64)
393 .widenScalarOrEltToNextPow2(1)
394 .clampNumElements(0, v2s32, v4s32);
395
396 // Extensions
397 auto ExtLegalFunc = [=](const LegalityQuery &Query) {
398 unsigned DstSize = Query.Types[0].getSizeInBits();
399
400 if (DstSize == 128 && !Query.Types[0].isVector())
401 return false; // Extending to a scalar s128 needs narrowing.
402
403 // Make sure that we have something that will fit in a register, and
404 // make sure it's a power of 2.
405 if (DstSize < 8 || DstSize > 128 || !isPowerOf2_32(DstSize))
406 return false;
407
408 const LLT &SrcTy = Query.Types[1];
409
410 // Special case for s1.
411 if (SrcTy == s1)
412 return true;
413
414 // Make sure we fit in a register otherwise. Don't bother checking that
415 // the source type is below 128 bits. We shouldn't be allowing anything
416 // through which is wider than the destination in the first place.
417 unsigned SrcSize = SrcTy.getSizeInBits();
418 if (SrcSize < 8 || !isPowerOf2_32(SrcSize))
419 return false;
420
421 return true;
422 };
423 getActionDefinitionsBuilder({G_ZEXT, G_SEXT, G_ANYEXT})
424 .legalIf(ExtLegalFunc)
425 .clampScalar(0, s64, s64); // Just for s128, others are handled above.
426
427 getActionDefinitionsBuilder(G_TRUNC)
428 .minScalarOrEltIf(
429 [=](const LegalityQuery &Query) { return Query.Types[0].isVector(); },
430 0, s8)
431 .customIf([=](const LegalityQuery &Query) {
432 LLT DstTy = Query.Types[0];
433 LLT SrcTy = Query.Types[1];
434 return DstTy == v8s8 && SrcTy.getSizeInBits() > 128;
435 })
436 .alwaysLegal();
437
438 getActionDefinitionsBuilder(G_SEXT_INREG).legalFor({s32, s64}).lower();
439
440 // FP conversions
441 getActionDefinitionsBuilder(G_FPTRUNC)
442 .legalFor(
443 {{s16, s32}, {s16, s64}, {s32, s64}, {v4s16, v4s32}, {v2s32, v2s64}})
444 .clampMaxNumElements(0, s32, 2);
445 getActionDefinitionsBuilder(G_FPEXT)
446 .legalFor(
447 {{s32, s16}, {s64, s16}, {s64, s32}, {v4s32, v4s16}, {v2s64, v2s32}})
448 .clampMaxNumElements(0, s64, 2);
449
450 // Conversions
451 getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
452 .legalForCartesianProduct({s32, s64, v2s64, v4s32, v2s32})
453 .clampScalar(0, s32, s64)
454 .widenScalarToNextPow2(0)
455 .clampScalar(1, s32, s64)
456 .widenScalarToNextPow2(1);
457
458 getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
459 .legalForCartesianProduct({s32, s64, v2s64, v4s32, v2s32})
460 .clampScalar(1, s32, s64)
461 .minScalarSameAs(1, 0)
462 .clampScalar(0, s32, s64)
463 .widenScalarToNextPow2(0);
464
465 // Control-flow
466 getActionDefinitionsBuilder(G_BRCOND).legalFor({s1, s8, s16, s32});
467 getActionDefinitionsBuilder(G_BRINDIRECT).legalFor({p0});
468
469 getActionDefinitionsBuilder(G_SELECT)
470 .legalFor({{s32, s1}, {s64, s1}, {p0, s1}})
471 .clampScalar(0, s32, s64)
472 .widenScalarToNextPow2(0)
473 .minScalarEltSameAsIf(all(isVector(0), isVector(1)), 1, 0)
474 .lowerIf(isVector(0));
475
476 // Pointer-handling
477 getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0});
478
479 if (TM.getCodeModel() == CodeModel::Small)
480 getActionDefinitionsBuilder(G_GLOBAL_VALUE).custom();
481 else
482 getActionDefinitionsBuilder(G_GLOBAL_VALUE).legalFor({p0});
483
484 getActionDefinitionsBuilder(G_PTRTOINT)
485 .legalForCartesianProduct({s1, s8, s16, s32, s64}, {p0})
486 .maxScalar(0, s64)
487 .widenScalarToNextPow2(0, /*Min*/ 8);
488
489 getActionDefinitionsBuilder(G_INTTOPTR)
490 .unsupportedIf([&](const LegalityQuery &Query) {
491 return Query.Types[0].getSizeInBits() != Query.Types[1].getSizeInBits();
492 })
493 .legalFor({{p0, s64}});
494
495 // Casts for 32 and 64-bit width type are just copies.
496 // Same for 128-bit width type, except they are on the FPR bank.
497 getActionDefinitionsBuilder(G_BITCAST)
498 // FIXME: This is wrong since G_BITCAST is not allowed to change the
499 // number of bits but it's what the previous code described and fixing
500 // it breaks tests.
501 .legalForCartesianProduct({s1, s8, s16, s32, s64, s128, v16s8, v8s8, v4s8,
502 v8s16, v4s16, v2s16, v4s32, v2s32, v2s64,
503 v2p0});
504
505 getActionDefinitionsBuilder(G_VASTART).legalFor({p0});
506
507 // va_list must be a pointer, but most sized types are pretty easy to handle
508 // as the destination.
509 getActionDefinitionsBuilder(G_VAARG)
510 .customForCartesianProduct({s8, s16, s32, s64, p0}, {p0})
511 .clampScalar(0, s8, s64)
512 .widenScalarToNextPow2(0, /*Min*/ 8);
513
514 getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG_WITH_SUCCESS)
515 .lowerIf(
516 all(typeInSet(0, {s8, s16, s32, s64, s128}), typeIs(1, s1), typeIs(2, p0)));
517
518 getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG)
519 .legalIf(all(typeInSet(0, {s8, s16, s32, s64}), typeIs(1, p0)))
520 .customIf([](const LegalityQuery &Query) {
521 return Query.Types[0].getSizeInBits() == 128;
522 });
523
524 getActionDefinitionsBuilder(
525 {G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD, G_ATOMICRMW_SUB, G_ATOMICRMW_AND,
526 G_ATOMICRMW_OR, G_ATOMICRMW_XOR, G_ATOMICRMW_MIN, G_ATOMICRMW_MAX,
527 G_ATOMICRMW_UMIN, G_ATOMICRMW_UMAX})
528 .legalIf(all(typeInSet(0, {s8, s16, s32, s64}), typeIs(1, p0)));
529
530 getActionDefinitionsBuilder(G_BLOCK_ADDR).legalFor({p0});
531
532 // Merge/Unmerge
533 for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
534 unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
535 unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0;
536
537 auto notValidElt = [](const LegalityQuery &Query, unsigned TypeIdx) {
538 const LLT &Ty = Query.Types[TypeIdx];
539 if (Ty.isVector()) {
540 const LLT &EltTy = Ty.getElementType();
541 if (EltTy.getSizeInBits() < 8 || EltTy.getSizeInBits() > 64)
542 return true;
543 if (!isPowerOf2_32(EltTy.getSizeInBits()))
544 return true;
545 }
546 return false;
547 };
548
549 // FIXME: This rule is horrible, but specifies the same as what we had
550 // before with the particularly strange definitions removed (e.g.
551 // s8 = G_MERGE_VALUES s32, s32).
552 // Part of the complexity comes from these ops being extremely flexible. For
553 // example, you can build/decompose vectors with it, concatenate vectors,
554 // etc. and in addition to this you can also bitcast with it at the same
555 // time. We've been considering breaking it up into multiple ops to make it
556 // more manageable throughout the backend.
557 getActionDefinitionsBuilder(Op)
558 // Break up vectors with weird elements into scalars
559 .fewerElementsIf(
560 [=](const LegalityQuery &Query) { return notValidElt(Query, 0); },
561 scalarize(0))
562 .fewerElementsIf(
563 [=](const LegalityQuery &Query) { return notValidElt(Query, 1); },
564 scalarize(1))
565 // Clamp the big scalar to s8-s512 and make it either a power of 2, 192,
566 // or 384.
567 .clampScalar(BigTyIdx, s8, s512)
568 .widenScalarIf(
569 [=](const LegalityQuery &Query) {
570 const LLT &Ty = Query.Types[BigTyIdx];
571 return !isPowerOf2_32(Ty.getSizeInBits()) &&
572 Ty.getSizeInBits() % 64 != 0;
573 },
574 [=](const LegalityQuery &Query) {
575 // Pick the next power of 2, or a multiple of 64 over 128.
576 // Whichever is smaller.
577 const LLT &Ty = Query.Types[BigTyIdx];
578 unsigned NewSizeInBits = 1
579 << Log2_32_Ceil(Ty.getSizeInBits() + 1);
580 if (NewSizeInBits >= 256) {
581 unsigned RoundedTo = alignTo<64>(Ty.getSizeInBits() + 1);
582 if (RoundedTo < NewSizeInBits)
583 NewSizeInBits = RoundedTo;
584 }
585 return std::make_pair(BigTyIdx, LLT::scalar(NewSizeInBits));
586 })
587 // Clamp the little scalar to s8-s256 and make it a power of 2. It's not
588 // worth considering the multiples of 64 since 2*192 and 2*384 are not
589 // valid.
590 .clampScalar(LitTyIdx, s8, s256)
591 .widenScalarToNextPow2(LitTyIdx, /*Min*/ 8)
592 // So at this point, we have s8, s16, s32, s64, s128, s192, s256, s384,
593 // s512, <X x s8>, <X x s16>, <X x s32>, or <X x s64>.
594 // At this point it's simple enough to accept the legal types.
595 .legalIf([=](const LegalityQuery &Query) {
596 const LLT &BigTy = Query.Types[BigTyIdx];
597 const LLT &LitTy = Query.Types[LitTyIdx];
598 if (BigTy.isVector() && BigTy.getSizeInBits() < 32)
1
Calling 'LLT::isVector'
3
Returning from 'LLT::isVector'
4
Assuming the condition is false
599 return false;
600 if (LitTy.isVector() && LitTy.getSizeInBits() < 32)
5
Calling 'LLT::isVector'
7
Returning from 'LLT::isVector'
601 return false;
602 return BigTy.getSizeInBits() % LitTy.getSizeInBits() == 0;
8
Calling 'LLT::getSizeInBits'
11
Returning from 'LLT::getSizeInBits'
12
Division by zero
603 })
604 // Any vectors left are the wrong size. Scalarize them.
605 .scalarize(0)
606 .scalarize(1);
607 }
608
609 getActionDefinitionsBuilder(G_EXTRACT_VECTOR_ELT)
610 .unsupportedIf([=](const LegalityQuery &Query) {
611 const LLT &EltTy = Query.Types[1].getElementType();
612 return Query.Types[0] != EltTy;
613 })
614 .minScalar(2, s64)
615 .legalIf([=](const LegalityQuery &Query) {
616 const LLT &VecTy = Query.Types[1];
617 return VecTy == v2s16 || VecTy == v4s16 || VecTy == v8s16 ||
618 VecTy == v4s32 || VecTy == v2s64 || VecTy == v2s32 ||
619 VecTy == v16s8 || VecTy == v2s32 || VecTy == v2p0;
620 })
621 .minScalarOrEltIf(
622 [=](const LegalityQuery &Query) {
623 // We want to promote to <M x s1> to <M x s64> if that wouldn't
624 // cause the total vec size to be > 128b.
625 return Query.Types[1].getNumElements() <= 2;
626 },
627 0, s64)
628 .minScalarOrEltIf(
629 [=](const LegalityQuery &Query) {
630 return Query.Types[1].getNumElements() <= 4;
631 },
632 0, s32)
633 .minScalarOrEltIf(
634 [=](const LegalityQuery &Query) {
635 return Query.Types[1].getNumElements() <= 8;
636 },
637 0, s16)
638 .minScalarOrEltIf(
639 [=](const LegalityQuery &Query) {
640 return Query.Types[1].getNumElements() <= 16;
641 },
642 0, s8)
643 .minScalarOrElt(0, s8) // Worst case, we need at least s8.
644 .clampMaxNumElements(1, s64, 2)
645 .clampMaxNumElements(1, s32, 4)
646 .clampMaxNumElements(1, s16, 8);
647
648 getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT)
649 .legalIf(typeInSet(0, {v8s16, v2s32, v4s32, v2s64}));
650
651 getActionDefinitionsBuilder(G_BUILD_VECTOR)
652 .legalFor({{v8s8, s8},
653 {v16s8, s8},
654 {v4s16, s16},
655 {v8s16, s16},
656 {v2s32, s32},
657 {v4s32, s32},
658 {v2p0, p0},
659 {v2s64, s64}})
660 .clampNumElements(0, v4s32, v4s32)
661 .clampNumElements(0, v2s64, v2s64)
662 .minScalarSameAs(1, 0);
663
664 getActionDefinitionsBuilder(G_BUILD_VECTOR_TRUNC).lower();
665
666 getActionDefinitionsBuilder(G_CTLZ)
667 .legalForCartesianProduct(
668 {s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
669 .scalarize(1);
670 getActionDefinitionsBuilder(G_CTLZ_ZERO_UNDEF).lower();
671
672 // TODO: Custom lowering for v2s32, v4s32, v2s64.
673 getActionDefinitionsBuilder(G_BITREVERSE).legalFor({s32, s64, v8s8, v16s8});
674
675 getActionDefinitionsBuilder(G_CTTZ_ZERO_UNDEF).lower();
676
677 // TODO: Handle vector types.
678 getActionDefinitionsBuilder(G_CTTZ)
679 .clampScalar(0, s32, s64)
680 .scalarSameSizeAs(1, 0)
681 .customFor({s32, s64});
682
683 getActionDefinitionsBuilder(G_SHUFFLE_VECTOR)
684 .legalIf([=](const LegalityQuery &Query) {
685 const LLT &DstTy = Query.Types[0];
686 const LLT &SrcTy = Query.Types[1];
687 // For now just support the TBL2 variant which needs the source vectors
688 // to be the same size as the dest.
689 if (DstTy != SrcTy)
690 return false;
691 for (auto &Ty : {v2s32, v4s32, v2s64, v2p0, v16s8, v8s16}) {
692 if (DstTy == Ty)
693 return true;
694 }
695 return false;
696 })
697 // G_SHUFFLE_VECTOR can have scalar sources (from 1 x s vectors), we
698 // just want those lowered into G_BUILD_VECTOR
699 .lowerIf([=](const LegalityQuery &Query) {
700 return !Query.Types[1].isVector();
701 })
702 .clampNumElements(0, v4s32, v4s32)
703 .clampNumElements(0, v2s64, v2s64);
704
705 getActionDefinitionsBuilder(G_CONCAT_VECTORS)
706 .legalFor({{v4s32, v2s32}, {v8s16, v4s16}});
707
708 getActionDefinitionsBuilder(G_JUMP_TABLE).legalFor({{p0}, {s64}});
709
710 getActionDefinitionsBuilder(G_BRJT).legalIf([=](const LegalityQuery &Query) {
711 return Query.Types[0] == p0 && Query.Types[1] == s64;
712 });
713
714 getActionDefinitionsBuilder(G_DYN_STACKALLOC).lower();
715
716 getActionDefinitionsBuilder({G_BZERO, G_MEMCPY, G_MEMMOVE, G_MEMSET})
717 .libcall();
718
719 // FIXME: Legal types are only legal with NEON.
720 getActionDefinitionsBuilder(G_ABS)
721 .lowerIf(isScalar(0))
722 .legalFor(PackedVectorAllTypeList);
723
724 getActionDefinitionsBuilder(G_VECREDUCE_FADD)
725 // We only have FADDP to do reduction-like operations. Lower the rest.
726 .legalFor({{s32, v2s32}, {s64, v2s64}})
727 .clampMaxNumElements(1, s64, 2)
728 .clampMaxNumElements(1, s32, 2)
729 .lower();
730
731 getActionDefinitionsBuilder(G_VECREDUCE_ADD)
732 .legalFor(
733 {{s8, v16s8}, {s16, v8s16}, {s32, v4s32}, {s32, v2s32}, {s64, v2s64}})
734 .clampMaxNumElements(1, s64, 2)
735 .clampMaxNumElements(1, s32, 4)
736 .lower();
737
738 getActionDefinitionsBuilder({G_UADDSAT, G_USUBSAT})
739 .lowerIf([=](const LegalityQuery &Q) { return Q.Types[0].isScalar(); });
740
741 getActionDefinitionsBuilder({G_FSHL, G_FSHR}).lower();
742
743 getActionDefinitionsBuilder(G_ROTR)
744 .legalFor({{s32, s64}, {s64, s64}})
745 .customIf([=](const LegalityQuery &Q) {
746 return Q.Types[0].isScalar() && Q.Types[1].getScalarSizeInBits() < 64;
747 })
748 .lower();
749 getActionDefinitionsBuilder(G_ROTL).lower();
750
751 getActionDefinitionsBuilder({G_SBFX, G_UBFX})
752 .customFor({{s32, s32}, {s64, s64}});
753
754 // TODO: Custom legalization for s128
755 // TODO: v2s64, v2s32, v4s32, v4s16, v8s16
756 // TODO: Use generic lowering when custom lowering is not possible.
757 auto always = [=](const LegalityQuery &Q) { return true; };
758 getActionDefinitionsBuilder(G_CTPOP)
759 .legalFor({{v8s8, v8s8}, {v16s8, v16s8}})
760 .clampScalar(0, s32, s128)
761 .widenScalarToNextPow2(0)
762 .minScalarEltSameAsIf(always, 1, 0)
763 .maxScalarEltSameAsIf(always, 1, 0)
764 .customFor({{s32, s32}, {s64, s64}});
765
766 getLegacyLegalizerInfo().computeTables();
767 verify(*ST.getInstrInfo());
768}
769
770bool AArch64LegalizerInfo::legalizeCustom(LegalizerHelper &Helper,
771 MachineInstr &MI) const {
772 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
773 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
774 GISelChangeObserver &Observer = Helper.Observer;
775 switch (MI.getOpcode()) {
776 default:
777 // No idea what to do.
778 return false;
779 case TargetOpcode::G_VAARG:
780 return legalizeVaArg(MI, MRI, MIRBuilder);
781 case TargetOpcode::G_LOAD:
782 case TargetOpcode::G_STORE:
783 return legalizeLoadStore(MI, MRI, MIRBuilder, Observer);
784 case TargetOpcode::G_SHL:
785 case TargetOpcode::G_ASHR:
786 case TargetOpcode::G_LSHR:
787 return legalizeShlAshrLshr(MI, MRI, MIRBuilder, Observer);
788 case TargetOpcode::G_GLOBAL_VALUE:
789 return legalizeSmallCMGlobalValue(MI, MRI, MIRBuilder, Observer);
790 case TargetOpcode::G_TRUNC:
791 return legalizeVectorTrunc(MI, Helper);
792 case TargetOpcode::G_SBFX:
793 case TargetOpcode::G_UBFX:
794 return legalizeBitfieldExtract(MI, MRI, Helper);
795 case TargetOpcode::G_ROTR:
796 return legalizeRotate(MI, MRI, Helper);
797 case TargetOpcode::G_CTPOP:
798 return legalizeCTPOP(MI, MRI, Helper);
799 case TargetOpcode::G_ATOMIC_CMPXCHG:
800 return legalizeAtomicCmpxchg128(MI, MRI, Helper);
801 case TargetOpcode::G_CTTZ:
802 return legalizeCTTZ(MI, Helper);
803 }
804
805 llvm_unreachable("expected switch to return")::llvm::llvm_unreachable_internal("expected switch to return"
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp"
, 805)
;
806}
807
808bool AArch64LegalizerInfo::legalizeRotate(MachineInstr &MI,
809 MachineRegisterInfo &MRI,
810 LegalizerHelper &Helper) const {
811 // To allow for imported patterns to match, we ensure that the rotate amount
812 // is 64b with an extension.
813 Register AmtReg = MI.getOperand(2).getReg();
814 LLT AmtTy = MRI.getType(AmtReg);
815 (void)AmtTy;
816 assert(AmtTy.isScalar() && "Expected a scalar rotate")(static_cast <bool> (AmtTy.isScalar() && "Expected a scalar rotate"
) ? void (0) : __assert_fail ("AmtTy.isScalar() && \"Expected a scalar rotate\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp"
, 816, __extension__ __PRETTY_FUNCTION__))
;
817 assert(AmtTy.getSizeInBits() < 64 && "Expected this rotate to be legal")(static_cast <bool> (AmtTy.getSizeInBits() < 64 &&
"Expected this rotate to be legal") ? void (0) : __assert_fail
("AmtTy.getSizeInBits() < 64 && \"Expected this rotate to be legal\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp"
, 817, __extension__ __PRETTY_FUNCTION__))
;
818 auto NewAmt = Helper.MIRBuilder.buildSExt(LLT::scalar(64), AmtReg);
819 Helper.Observer.changingInstr(MI);
820 MI.getOperand(2).setReg(NewAmt.getReg(0));
821 Helper.Observer.changedInstr(MI);
822 return true;
823}
824
825static void extractParts(Register Reg, MachineRegisterInfo &MRI,
826 MachineIRBuilder &MIRBuilder, LLT Ty, int NumParts,
827 SmallVectorImpl<Register> &VRegs) {
828 for (int I = 0; I < NumParts; ++I)
829 VRegs.push_back(MRI.createGenericVirtualRegister(Ty));
830 MIRBuilder.buildUnmerge(VRegs, Reg);
831}
832
833bool AArch64LegalizerInfo::legalizeVectorTrunc(
834 MachineInstr &MI, LegalizerHelper &Helper) const {
835 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
836 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
837 // Similar to how operand splitting is done in SelectiondDAG, we can handle
838 // %res(v8s8) = G_TRUNC %in(v8s32) by generating:
839 // %inlo(<4x s32>), %inhi(<4 x s32>) = G_UNMERGE %in(<8 x s32>)
840 // %lo16(<4 x s16>) = G_TRUNC %inlo
841 // %hi16(<4 x s16>) = G_TRUNC %inhi
842 // %in16(<8 x s16>) = G_CONCAT_VECTORS %lo16, %hi16
843 // %res(<8 x s8>) = G_TRUNC %in16
844
845 Register DstReg = MI.getOperand(0).getReg();
846 Register SrcReg = MI.getOperand(1).getReg();
847 LLT DstTy = MRI.getType(DstReg);
848 LLT SrcTy = MRI.getType(SrcReg);
849 assert(isPowerOf2_32(DstTy.getSizeInBits()) &&(static_cast <bool> (isPowerOf2_32(DstTy.getSizeInBits(
)) && isPowerOf2_32(SrcTy.getSizeInBits())) ? void (0
) : __assert_fail ("isPowerOf2_32(DstTy.getSizeInBits()) && isPowerOf2_32(SrcTy.getSizeInBits())"
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp"
, 850, __extension__ __PRETTY_FUNCTION__))
850 isPowerOf2_32(SrcTy.getSizeInBits()))(static_cast <bool> (isPowerOf2_32(DstTy.getSizeInBits(
)) && isPowerOf2_32(SrcTy.getSizeInBits())) ? void (0
) : __assert_fail ("isPowerOf2_32(DstTy.getSizeInBits()) && isPowerOf2_32(SrcTy.getSizeInBits())"
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp"
, 850, __extension__ __PRETTY_FUNCTION__))
;
851
852 // Split input type.
853 LLT SplitSrcTy = SrcTy.changeNumElements(SrcTy.getNumElements() / 2);
854 // First, split the source into two smaller vectors.
855 SmallVector<Register, 2> SplitSrcs;
856 extractParts(SrcReg, MRI, MIRBuilder, SplitSrcTy, 2, SplitSrcs);
857
858 // Truncate the splits into intermediate narrower elements.
859 LLT InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits() * 2);
860 for (unsigned I = 0; I < SplitSrcs.size(); ++I)
861 SplitSrcs[I] = MIRBuilder.buildTrunc(InterTy, SplitSrcs[I]).getReg(0);
862
863 auto Concat = MIRBuilder.buildConcatVectors(
864 DstTy.changeElementSize(DstTy.getScalarSizeInBits() * 2), SplitSrcs);
865
866 Helper.Observer.changingInstr(MI);
867 MI.getOperand(1).setReg(Concat.getReg(0));
868 Helper.Observer.changedInstr(MI);
869 return true;
870}
871
872bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(
873 MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder,
874 GISelChangeObserver &Observer) const {
875 assert(MI.getOpcode() == TargetOpcode::G_GLOBAL_VALUE)(static_cast <bool> (MI.getOpcode() == TargetOpcode::G_GLOBAL_VALUE
) ? void (0) : __assert_fail ("MI.getOpcode() == TargetOpcode::G_GLOBAL_VALUE"
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp"
, 875, __extension__ __PRETTY_FUNCTION__))
;
876 // We do this custom legalization to convert G_GLOBAL_VALUE into target ADRP +
877 // G_ADD_LOW instructions.
878 // By splitting this here, we can optimize accesses in the small code model by
879 // folding in the G_ADD_LOW into the load/store offset.
880 auto &GlobalOp = MI.getOperand(1);
881 const auto* GV = GlobalOp.getGlobal();
882 if (GV->isThreadLocal())
883 return true; // Don't want to modify TLS vars.
884
885 auto &TM = ST->getTargetLowering()->getTargetMachine();
886 unsigned OpFlags = ST->ClassifyGlobalReference(GV, TM);
887
888 if (OpFlags & AArch64II::MO_GOT)
889 return true;
890
891 auto Offset = GlobalOp.getOffset();
892 Register DstReg = MI.getOperand(0).getReg();
893 auto ADRP = MIRBuilder.buildInstr(AArch64::ADRP, {LLT::pointer(0, 64)}, {})
894 .addGlobalAddress(GV, Offset, OpFlags | AArch64II::MO_PAGE);
895 // Set the regclass on the dest reg too.
896 MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass);
897
898 // MO_TAGGED on the page indicates a tagged address. Set the tag now. We do so
899 // by creating a MOVK that sets bits 48-63 of the register to (global address
900 // + 0x100000000 - PC) >> 48. The additional 0x100000000 offset here is to
901 // prevent an incorrect tag being generated during relocation when the the
902 // global appears before the code section. Without the offset, a global at
903 // `0x0f00'0000'0000'1000` (i.e. at `0x1000` with tag `0xf`) that's referenced
904 // by code at `0x2000` would result in `0x0f00'0000'0000'1000 - 0x2000 =
905 // 0x0eff'ffff'ffff'f000`, meaning the tag would be incorrectly set to `0xe`
906 // instead of `0xf`.
907 // This assumes that we're in the small code model so we can assume a binary
908 // size of <= 4GB, which makes the untagged PC relative offset positive. The
909 // binary must also be loaded into address range [0, 2^48). Both of these
910 // properties need to be ensured at runtime when using tagged addresses.
911 if (OpFlags & AArch64II::MO_TAGGED) {
912 assert(!Offset &&(static_cast <bool> (!Offset && "Should not have folded in an offset for a tagged global!"
) ? void (0) : __assert_fail ("!Offset && \"Should not have folded in an offset for a tagged global!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp"
, 913, __extension__ __PRETTY_FUNCTION__))
913 "Should not have folded in an offset for a tagged global!")(static_cast <bool> (!Offset && "Should not have folded in an offset for a tagged global!"
) ? void (0) : __assert_fail ("!Offset && \"Should not have folded in an offset for a tagged global!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp"
, 913, __extension__ __PRETTY_FUNCTION__))
;
914 ADRP = MIRBuilder.buildInstr(AArch64::MOVKXi, {LLT::pointer(0, 64)}, {ADRP})
915 .addGlobalAddress(GV, 0x100000000,
916 AArch64II::MO_PREL | AArch64II::MO_G3)
917 .addImm(48);
918 MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass);
919 }
920
921 MIRBuilder.buildInstr(AArch64::G_ADD_LOW, {DstReg}, {ADRP})
922 .addGlobalAddress(GV, Offset,
923 OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
924 MI.eraseFromParent();
925 return true;
926}
927
928bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
929 MachineInstr &MI) const {
930 return true;
931}
932
933bool AArch64LegalizerInfo::legalizeShlAshrLshr(
934 MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder,
935 GISelChangeObserver &Observer) const {
936 assert(MI.getOpcode() == TargetOpcode::G_ASHR ||(static_cast <bool> (MI.getOpcode() == TargetOpcode::G_ASHR
|| MI.getOpcode() == TargetOpcode::G_LSHR || MI.getOpcode() ==
TargetOpcode::G_SHL) ? void (0) : __assert_fail ("MI.getOpcode() == TargetOpcode::G_ASHR || MI.getOpcode() == TargetOpcode::G_LSHR || MI.getOpcode() == TargetOpcode::G_SHL"
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp"
, 938, __extension__ __PRETTY_FUNCTION__))
937 MI.getOpcode() == TargetOpcode::G_LSHR ||(static_cast <bool> (MI.getOpcode() == TargetOpcode::G_ASHR
|| MI.getOpcode() == TargetOpcode::G_LSHR || MI.getOpcode() ==
TargetOpcode::G_SHL) ? void (0) : __assert_fail ("MI.getOpcode() == TargetOpcode::G_ASHR || MI.getOpcode() == TargetOpcode::G_LSHR || MI.getOpcode() == TargetOpcode::G_SHL"
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp"
, 938, __extension__ __PRETTY_FUNCTION__))
938 MI.getOpcode() == TargetOpcode::G_SHL)(static_cast <bool> (MI.getOpcode() == TargetOpcode::G_ASHR
|| MI.getOpcode() == TargetOpcode::G_LSHR || MI.getOpcode() ==
TargetOpcode::G_SHL) ? void (0) : __assert_fail ("MI.getOpcode() == TargetOpcode::G_ASHR || MI.getOpcode() == TargetOpcode::G_LSHR || MI.getOpcode() == TargetOpcode::G_SHL"
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp"
, 938, __extension__ __PRETTY_FUNCTION__))
;
939 // If the shift amount is a G_CONSTANT, promote it to a 64 bit type so the
940 // imported patterns can select it later. Either way, it will be legal.
941 Register AmtReg = MI.getOperand(2).getReg();
942 auto VRegAndVal = getConstantVRegValWithLookThrough(AmtReg, MRI);
943 if (!VRegAndVal)
944 return true;
945 // Check the shift amount is in range for an immediate form.
946 int64_t Amount = VRegAndVal->Value.getSExtValue();
947 if (Amount > 31)
948 return true; // This will have to remain a register variant.
949 auto ExtCst = MIRBuilder.buildConstant(LLT::scalar(64), Amount);
950 Observer.changingInstr(MI);
951 MI.getOperand(2).setReg(ExtCst.getReg(0));
952 Observer.changedInstr(MI);
953 return true;
954}
955
956bool AArch64LegalizerInfo::legalizeLoadStore(
957 MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder,
958 GISelChangeObserver &Observer) const {
959 assert(MI.getOpcode() == TargetOpcode::G_STORE ||(static_cast <bool> (MI.getOpcode() == TargetOpcode::G_STORE
|| MI.getOpcode() == TargetOpcode::G_LOAD) ? void (0) : __assert_fail
("MI.getOpcode() == TargetOpcode::G_STORE || MI.getOpcode() == TargetOpcode::G_LOAD"
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp"
, 960, __extension__ __PRETTY_FUNCTION__))
960 MI.getOpcode() == TargetOpcode::G_LOAD)(static_cast <bool> (MI.getOpcode() == TargetOpcode::G_STORE
|| MI.getOpcode() == TargetOpcode::G_LOAD) ? void (0) : __assert_fail
("MI.getOpcode() == TargetOpcode::G_STORE || MI.getOpcode() == TargetOpcode::G_LOAD"
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp"
, 960, __extension__ __PRETTY_FUNCTION__))
;
961 // Here we just try to handle vector loads/stores where our value type might
962 // have pointer elements, which the SelectionDAG importer can't handle. To
963 // allow the existing patterns for s64 to fire for p0, we just try to bitcast
964 // the value to use s64 types.
965
966 // Custom legalization requires the instruction, if not deleted, must be fully
967 // legalized. In order to allow further legalization of the inst, we create
968 // a new instruction and erase the existing one.
969
970 Register ValReg = MI.getOperand(0).getReg();
971 const LLT ValTy = MRI.getType(ValReg);
972
973 if (!ValTy.isVector() || !ValTy.getElementType().isPointer() ||
974 ValTy.getElementType().getAddressSpace() != 0) {
975 LLVM_DEBUG(dbgs() << "Tried to do custom legalization on wrong load/store")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-legalinfo")) { dbgs() << "Tried to do custom legalization on wrong load/store"
; } } while (false)
;
976 return false;
977 }
978
979 unsigned PtrSize = ValTy.getElementType().getSizeInBits();
980 const LLT NewTy = LLT::vector(ValTy.getNumElements(), PtrSize);
981 auto &MMO = **MI.memoperands_begin();
982 if (MI.getOpcode() == TargetOpcode::G_STORE) {
983 auto Bitcast = MIRBuilder.buildBitcast(NewTy, ValReg);
984 MIRBuilder.buildStore(Bitcast.getReg(0), MI.getOperand(1), MMO);
985 } else {
986 auto NewLoad = MIRBuilder.buildLoad(NewTy, MI.getOperand(1), MMO);
987 MIRBuilder.buildBitcast(ValReg, NewLoad);
988 }
989 MI.eraseFromParent();
990 return true;
991}
992
993bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI,
994 MachineRegisterInfo &MRI,
995 MachineIRBuilder &MIRBuilder) const {
996 MachineFunction &MF = MIRBuilder.getMF();
997 Align Alignment(MI.getOperand(2).getImm());
998 Register Dst = MI.getOperand(0).getReg();
999 Register ListPtr = MI.getOperand(1).getReg();
1000
1001 LLT PtrTy = MRI.getType(ListPtr);
1002 LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
1003
1004 const unsigned PtrSize = PtrTy.getSizeInBits() / 8;
1005 const Align PtrAlign = Align(PtrSize);
1006 auto List = MIRBuilder.buildLoad(
1007 PtrTy, ListPtr,
1008 *MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOLoad,
1009 PtrSize, PtrAlign));
1010
1011 MachineInstrBuilder DstPtr;
1012 if (Alignment > PtrAlign) {
1013 // Realign the list to the actual required alignment.
1014 auto AlignMinus1 =
1015 MIRBuilder.buildConstant(IntPtrTy, Alignment.value() - 1);
1016 auto ListTmp = MIRBuilder.buildPtrAdd(PtrTy, List, AlignMinus1.getReg(0));
1017 DstPtr = MIRBuilder.buildMaskLowPtrBits(PtrTy, ListTmp, Log2(Alignment));
1018 } else
1019 DstPtr = List;
1020
1021 uint64_t ValSize = MRI.getType(Dst).getSizeInBits() / 8;
1022 MIRBuilder.buildLoad(
1023 Dst, DstPtr,
1024 *MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOLoad,
1025 ValSize, std::max(Alignment, PtrAlign)));
1026
1027 auto Size = MIRBuilder.buildConstant(IntPtrTy, alignTo(ValSize, PtrAlign));
1028
1029 auto NewList = MIRBuilder.buildPtrAdd(PtrTy, DstPtr, Size.getReg(0));
1030
1031 MIRBuilder.buildStore(NewList, ListPtr,
1032 *MF.getMachineMemOperand(MachinePointerInfo(),
1033 MachineMemOperand::MOStore,
1034 PtrSize, PtrAlign));
1035
1036 MI.eraseFromParent();
1037 return true;
1038}
1039
1040bool AArch64LegalizerInfo::legalizeBitfieldExtract(
1041 MachineInstr &MI, MachineRegisterInfo &MRI, LegalizerHelper &Helper) const {
1042 // Only legal if we can select immediate forms.
1043 // TODO: Lower this otherwise.
1044 return getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI) &&
1045 getConstantVRegValWithLookThrough(MI.getOperand(3).getReg(), MRI);
1046}
1047
1048bool AArch64LegalizerInfo::legalizeCTPOP(MachineInstr &MI,
1049 MachineRegisterInfo &MRI,
1050 LegalizerHelper &Helper) const {
1051 // While there is no integer popcount instruction, it can
1052 // be more efficiently lowered to the following sequence that uses
1053 // AdvSIMD registers/instructions as long as the copies to/from
1054 // the AdvSIMD registers are cheap.
1055 // FMOV D0, X0 // copy 64-bit int to vector, high bits zero'd
1056 // CNT V0.8B, V0.8B // 8xbyte pop-counts
1057 // ADDV B0, V0.8B // sum 8xbyte pop-counts
1058 // UMOV X0, V0.B[0] // copy byte result back to integer reg
1059 if (!ST->hasNEON() ||
1060 MI.getMF()->getFunction().hasFnAttribute(Attribute::NoImplicitFloat))
1061 return false;
1062 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1063 Register Dst = MI.getOperand(0).getReg();
1064 Register Val = MI.getOperand(1).getReg();
1065 LLT Ty = MRI.getType(Val);
1066
1067 // TODO: Handle vector types.
1068 assert(!Ty.isVector() && "Vector types not handled yet!")(static_cast <bool> (!Ty.isVector() && "Vector types not handled yet!"
) ? void (0) : __assert_fail ("!Ty.isVector() && \"Vector types not handled yet!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp"
, 1068, __extension__ __PRETTY_FUNCTION__))
;
1069 assert(Ty == MRI.getType(Dst) &&(static_cast <bool> (Ty == MRI.getType(Dst) && "Expected src and dst to have the same type!"
) ? void (0) : __assert_fail ("Ty == MRI.getType(Dst) && \"Expected src and dst to have the same type!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp"
, 1070, __extension__ __PRETTY_FUNCTION__))
1070 "Expected src and dst to have the same type!")(static_cast <bool> (Ty == MRI.getType(Dst) && "Expected src and dst to have the same type!"
) ? void (0) : __assert_fail ("Ty == MRI.getType(Dst) && \"Expected src and dst to have the same type!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp"
, 1070, __extension__ __PRETTY_FUNCTION__))
;
1071 // TODO: Handle s128.
1072 unsigned Size = Ty.getSizeInBits();
1073 assert((Size == 32 || Size == 64) && "Expected only 32 or 64 bit scalars!")(static_cast <bool> ((Size == 32 || Size == 64) &&
"Expected only 32 or 64 bit scalars!") ? void (0) : __assert_fail
("(Size == 32 || Size == 64) && \"Expected only 32 or 64 bit scalars!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp"
, 1073, __extension__ __PRETTY_FUNCTION__))
;
1074 if (Size == 32)
1075 Val = MIRBuilder.buildZExt(LLT::scalar(64), Val).getReg(0);
1076 const LLT V8S8 = LLT::vector(8, LLT::scalar(8));
1077 Val = MIRBuilder.buildBitcast(V8S8, Val).getReg(0);
1078 auto CTPOP = MIRBuilder.buildCTPOP(V8S8, Val);
1079 auto UADDLV =
1080 MIRBuilder
1081 .buildIntrinsic(Intrinsic::aarch64_neon_uaddlv, {LLT::scalar(32)},
1082 /*HasSideEffects = */ false)
1083 .addUse(CTPOP.getReg(0));
1084 if (Size == 64)
1085 MIRBuilder.buildZExt(Dst, UADDLV);
1086 else
1087 UADDLV->getOperand(0).setReg(Dst);
1088 MI.eraseFromParent();
1089 return true;
1090}
1091
1092bool AArch64LegalizerInfo::legalizeAtomicCmpxchg128(
1093 MachineInstr &MI, MachineRegisterInfo &MRI, LegalizerHelper &Helper) const {
1094 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1095 LLT s64 = LLT::scalar(64);
1096 auto Addr = MI.getOperand(1).getReg();
1097 auto DesiredI = MIRBuilder.buildUnmerge({s64, s64}, MI.getOperand(2));
1098 auto NewI = MIRBuilder.buildUnmerge({s64, s64}, MI.getOperand(3));
1099 auto DstLo = MRI.createGenericVirtualRegister(s64);
1100 auto DstHi = MRI.createGenericVirtualRegister(s64);
1101
1102 MachineInstrBuilder CAS;
1103 if (ST->hasLSE()) {
1104 // We have 128-bit CASP instructions taking XSeqPair registers, which are
1105 // s128. We need the merge/unmerge to bracket the expansion and pair up with
1106 // the rest of the MIR so we must reassemble the extracted registers into a
1107 // 128-bit known-regclass one with code like this:
1108 //
1109 // %in1 = REG_SEQUENCE Lo, Hi ; One for each input
1110 // %out = CASP %in1, ...
1111 // %OldLo = G_EXTRACT %out, 0
1112 // %OldHi = G_EXTRACT %out, 64
1113 auto Ordering = (*MI.memoperands_begin())->getMergedOrdering();
1114 unsigned Opcode;
1115 switch (Ordering) {
1116 case AtomicOrdering::Acquire:
1117 Opcode = AArch64::CASPAX;
1118 break;
1119 case AtomicOrdering::Release:
1120 Opcode = AArch64::CASPLX;
1121 break;
1122 case AtomicOrdering::AcquireRelease:
1123 case AtomicOrdering::SequentiallyConsistent:
1124 Opcode = AArch64::CASPALX;
1125 break;
1126 default:
1127 Opcode = AArch64::CASPX;
1128 break;
1129 }
1130
1131 LLT s128 = LLT::scalar(128);
1132 auto CASDst = MRI.createGenericVirtualRegister(s128);
1133 auto CASDesired = MRI.createGenericVirtualRegister(s128);
1134 auto CASNew = MRI.createGenericVirtualRegister(s128);
1135 MIRBuilder.buildInstr(TargetOpcode::REG_SEQUENCE, {CASDesired}, {})
1136 .addUse(DesiredI->getOperand(0).getReg())
1137 .addImm(AArch64::sube64)
1138 .addUse(DesiredI->getOperand(1).getReg())
1139 .addImm(AArch64::subo64);
1140 MIRBuilder.buildInstr(TargetOpcode::REG_SEQUENCE, {CASNew}, {})
1141 .addUse(NewI->getOperand(0).getReg())
1142 .addImm(AArch64::sube64)
1143 .addUse(NewI->getOperand(1).getReg())
1144 .addImm(AArch64::subo64);
1145
1146 CAS = MIRBuilder.buildInstr(Opcode, {CASDst}, {CASDesired, CASNew, Addr});
1147
1148 MIRBuilder.buildExtract({DstLo}, {CASDst}, 0);
1149 MIRBuilder.buildExtract({DstHi}, {CASDst}, 64);
1150 } else {
1151 // The -O0 CMP_SWAP_128 is friendlier to generate code for because LDXP/STXP
1152 // can take arbitrary registers so it just has the normal GPR64 operands the
1153 // rest of AArch64 is expecting.
1154 auto Scratch = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1155 CAS = MIRBuilder.buildInstr(AArch64::CMP_SWAP_128, {DstLo, DstHi, Scratch},
1156 {Addr, DesiredI->getOperand(0),
1157 DesiredI->getOperand(1), NewI->getOperand(0),
1158 NewI->getOperand(1)});
1159 }
1160
1161 CAS.cloneMemRefs(MI);
1162 constrainSelectedInstRegOperands(*CAS, *ST->getInstrInfo(),
1163 *MRI.getTargetRegisterInfo(),
1164 *ST->getRegBankInfo());
1165
1166 MIRBuilder.buildMerge(MI.getOperand(0), {DstLo, DstHi});
1167 MI.eraseFromParent();
1168 return true;
1169}
1170
1171bool AArch64LegalizerInfo::legalizeCTTZ(MachineInstr &MI,
1172 LegalizerHelper &Helper) const {
1173 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1174 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
1175 LLT Ty = MRI.getType(MI.getOperand(1).getReg());
1176 auto BitReverse = MIRBuilder.buildBitReverse(Ty, MI.getOperand(1));
1177 MIRBuilder.buildCTLZ(MI.getOperand(0).getReg(), BitReverse);
1178 MI.eraseFromParent();
1179 return true;
1180}

/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/include/llvm/Support/LowLevelTypeImpl.h

1//== llvm/Support/LowLevelTypeImpl.h --------------------------- -*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// Implement a low-level type suitable for MachineInstr level instruction
10/// selection.
11///
12/// For a type attached to a MachineInstr, we only care about 2 details: total
13/// size and the number of vector lanes (if any). Accordingly, there are 4
14/// possible valid type-kinds:
15///
16/// * `sN` for scalars and aggregates
17/// * `<N x sM>` for vectors, which must have at least 2 elements.
18/// * `pN` for pointers
19///
20/// Other information required for correct selection is expected to be carried
21/// by the opcode, or non-type flags. For example the distinction between G_ADD
22/// and G_FADD for int/float or fast-math flags.
23///
24//===----------------------------------------------------------------------===//
25
26#ifndef LLVM_SUPPORT_LOWLEVELTYPEIMPL_H
27#define LLVM_SUPPORT_LOWLEVELTYPEIMPL_H
28
29#include "llvm/ADT/DenseMapInfo.h"
30#include "llvm/Support/Debug.h"
31#include "llvm/Support/MachineValueType.h"
32#include <cassert>
33
34namespace llvm {
35
36class DataLayout;
37class Type;
38class raw_ostream;
39
40class LLT {
41public:
42 /// Get a low-level scalar or aggregate "bag of bits".
43 static LLT scalar(unsigned SizeInBits) {
44 assert(SizeInBits > 0 && "invalid scalar size")(static_cast <bool> (SizeInBits > 0 && "invalid scalar size"
) ? void (0) : __assert_fail ("SizeInBits > 0 && \"invalid scalar size\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/include/llvm/Support/LowLevelTypeImpl.h"
, 44, __extension__ __PRETTY_FUNCTION__))
;
45 return LLT{/*isPointer=*/false, /*isVector=*/false, /*NumElements=*/0,
46 SizeInBits, /*AddressSpace=*/0};
47 }
48
49 /// Get a low-level pointer in the given address space.
50 static LLT pointer(unsigned AddressSpace, unsigned SizeInBits) {
51 assert(SizeInBits > 0 && "invalid pointer size")(static_cast <bool> (SizeInBits > 0 && "invalid pointer size"
) ? void (0) : __assert_fail ("SizeInBits > 0 && \"invalid pointer size\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/include/llvm/Support/LowLevelTypeImpl.h"
, 51, __extension__ __PRETTY_FUNCTION__))
;
52 return LLT{/*isPointer=*/true, /*isVector=*/false, /*NumElements=*/0,
53 SizeInBits, AddressSpace};
54 }
55
56 /// Get a low-level vector of some number of elements and element width.
57 /// \p NumElements must be at least 2.
58 static LLT vector(uint16_t NumElements, unsigned ScalarSizeInBits) {
59 assert(NumElements > 1 && "invalid number of vector elements")(static_cast <bool> (NumElements > 1 && "invalid number of vector elements"
) ? void (0) : __assert_fail ("NumElements > 1 && \"invalid number of vector elements\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/include/llvm/Support/LowLevelTypeImpl.h"
, 59, __extension__ __PRETTY_FUNCTION__))
;
60 assert(ScalarSizeInBits > 0 && "invalid vector element size")(static_cast <bool> (ScalarSizeInBits > 0 &&
"invalid vector element size") ? void (0) : __assert_fail ("ScalarSizeInBits > 0 && \"invalid vector element size\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/include/llvm/Support/LowLevelTypeImpl.h"
, 60, __extension__ __PRETTY_FUNCTION__))
;
61 return LLT{/*isPointer=*/false, /*isVector=*/true, NumElements,
62 ScalarSizeInBits, /*AddressSpace=*/0};
63 }
64
65 /// Get a low-level vector of some number of elements and element type.
66 static LLT vector(uint16_t NumElements, LLT ScalarTy) {
67 assert(NumElements > 1 && "invalid number of vector elements")(static_cast <bool> (NumElements > 1 && "invalid number of vector elements"
) ? void (0) : __assert_fail ("NumElements > 1 && \"invalid number of vector elements\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/include/llvm/Support/LowLevelTypeImpl.h"
, 67, __extension__ __PRETTY_FUNCTION__))
;
68 assert(!ScalarTy.isVector() && "invalid vector element type")(static_cast <bool> (!ScalarTy.isVector() && "invalid vector element type"
) ? void (0) : __assert_fail ("!ScalarTy.isVector() && \"invalid vector element type\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/include/llvm/Support/LowLevelTypeImpl.h"
, 68, __extension__ __PRETTY_FUNCTION__))
;
69 return LLT{ScalarTy.isPointer(), /*isVector=*/true, NumElements,
70 ScalarTy.getSizeInBits(),
71 ScalarTy.isPointer() ? ScalarTy.getAddressSpace() : 0};
72 }
73
74 static LLT scalarOrVector(uint16_t NumElements, LLT ScalarTy) {
75 return NumElements == 1 ? ScalarTy : LLT::vector(NumElements, ScalarTy);
76 }
77
78 static LLT scalarOrVector(uint16_t NumElements, unsigned ScalarSize) {
79 return scalarOrVector(NumElements, LLT::scalar(ScalarSize));
80 }
81
82 explicit LLT(bool isPointer, bool isVector, uint16_t NumElements,
83 unsigned SizeInBits, unsigned AddressSpace) {
84 init(isPointer, isVector, NumElements, SizeInBits, AddressSpace);
85 }
86 explicit LLT() : IsPointer(false), IsVector(false), RawData(0) {}
87
88 explicit LLT(MVT VT);
89
90 bool isValid() const { return RawData != 0; }
91
92 bool isScalar() const { return isValid() && !IsPointer && !IsVector; }
93
94 bool isPointer() const { return isValid() && IsPointer && !IsVector; }
95
96 bool isVector() const { return isValid() && IsVector; }
2
Returning value, which participates in a condition later
6
Returning zero, which participates in a condition later
97
98 /// Returns the number of elements in a vector LLT. Must only be called on
99 /// vector types.
100 uint16_t getNumElements() const {
101 assert(IsVector && "cannot get number of elements on scalar/aggregate")(static_cast <bool> (IsVector && "cannot get number of elements on scalar/aggregate"
) ? void (0) : __assert_fail ("IsVector && \"cannot get number of elements on scalar/aggregate\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/include/llvm/Support/LowLevelTypeImpl.h"
, 101, __extension__ __PRETTY_FUNCTION__))
;
102 if (!IsPointer)
103 return getFieldValue(VectorElementsFieldInfo);
104 else
105 return getFieldValue(PointerVectorElementsFieldInfo);
106 }
107
108 /// Returns the total size of the type. Must only be called on sized types.
109 unsigned getSizeInBits() const {
110 if (isPointer() || isScalar())
9
Taking false branch
111 return getScalarSizeInBits();
112 return getScalarSizeInBits() * getNumElements();
10
Returning zero
113 }
114
115 /// Returns the total size of the type in bytes, i.e. number of whole bytes
116 /// needed to represent the size in bits. Must only be called on sized types.
117 unsigned getSizeInBytes() const {
118 return (getSizeInBits() + 7) / 8;
119 }
120
121 LLT getScalarType() const {
122 return isVector() ? getElementType() : *this;
123 }
124
125 /// If this type is a vector, return a vector with the same number of elements
126 /// but the new element type. Otherwise, return the new element type.
127 LLT changeElementType(LLT NewEltTy) const {
128 return isVector() ? LLT::vector(getNumElements(), NewEltTy) : NewEltTy;
129 }
130
131 /// If this type is a vector, return a vector with the same number of elements
132 /// but the new element size. Otherwise, return the new element type. Invalid
133 /// for pointer types. For pointer types, use changeElementType.
134 LLT changeElementSize(unsigned NewEltSize) const {
135 assert(!getScalarType().isPointer() &&(static_cast <bool> (!getScalarType().isPointer() &&
"invalid to directly change element size for pointers") ? void
(0) : __assert_fail ("!getScalarType().isPointer() && \"invalid to directly change element size for pointers\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/include/llvm/Support/LowLevelTypeImpl.h"
, 136, __extension__ __PRETTY_FUNCTION__))
136 "invalid to directly change element size for pointers")(static_cast <bool> (!getScalarType().isPointer() &&
"invalid to directly change element size for pointers") ? void
(0) : __assert_fail ("!getScalarType().isPointer() && \"invalid to directly change element size for pointers\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/include/llvm/Support/LowLevelTypeImpl.h"
, 136, __extension__ __PRETTY_FUNCTION__))
;
137 return isVector() ? LLT::vector(getNumElements(), NewEltSize)
138 : LLT::scalar(NewEltSize);
139 }
140
141 /// Return a vector or scalar with the same element type and the new number of
142 /// elements.
143 LLT changeNumElements(unsigned NewNumElts) const {
144 return LLT::scalarOrVector(NewNumElts, getScalarType());
145 }
146
147 /// Return a type that is \p Factor times smaller. Reduces the number of
148 /// elements if this is a vector, or the bitwidth for scalar/pointers. Does
149 /// not attempt to handle cases that aren't evenly divisible.
150 LLT divide(int Factor) const {
151 assert(Factor != 1)(static_cast <bool> (Factor != 1) ? void (0) : __assert_fail
("Factor != 1", "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/include/llvm/Support/LowLevelTypeImpl.h"
, 151, __extension__ __PRETTY_FUNCTION__))
;
152 if (isVector()) {
153 assert(getNumElements() % Factor == 0)(static_cast <bool> (getNumElements() % Factor == 0) ? void
(0) : __assert_fail ("getNumElements() % Factor == 0", "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/include/llvm/Support/LowLevelTypeImpl.h"
, 153, __extension__ __PRETTY_FUNCTION__))
;
154 return scalarOrVector(getNumElements() / Factor, getElementType());
155 }
156
157 assert(getSizeInBits() % Factor == 0)(static_cast <bool> (getSizeInBits() % Factor == 0) ? void
(0) : __assert_fail ("getSizeInBits() % Factor == 0", "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/include/llvm/Support/LowLevelTypeImpl.h"
, 157, __extension__ __PRETTY_FUNCTION__))
;
158 return scalar(getSizeInBits() / Factor);
159 }
160
161 bool isByteSized() const { return (getSizeInBits() & 7) == 0; }
162
163 unsigned getScalarSizeInBits() const {
164 assert(RawData != 0 && "Invalid Type")(static_cast <bool> (RawData != 0 && "Invalid Type"
) ? void (0) : __assert_fail ("RawData != 0 && \"Invalid Type\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/include/llvm/Support/LowLevelTypeImpl.h"
, 164, __extension__ __PRETTY_FUNCTION__))
;
165 if (!IsVector) {
166 if (!IsPointer)
167 return getFieldValue(ScalarSizeFieldInfo);
168 else
169 return getFieldValue(PointerSizeFieldInfo);
170 } else {
171 if (!IsPointer)
172 return getFieldValue(VectorSizeFieldInfo);
173 else
174 return getFieldValue(PointerVectorSizeFieldInfo);
175 }
176 }
177
178 unsigned getAddressSpace() const {
179 assert(RawData != 0 && "Invalid Type")(static_cast <bool> (RawData != 0 && "Invalid Type"
) ? void (0) : __assert_fail ("RawData != 0 && \"Invalid Type\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/include/llvm/Support/LowLevelTypeImpl.h"
, 179, __extension__ __PRETTY_FUNCTION__))
;
180 assert(IsPointer && "cannot get address space of non-pointer type")(static_cast <bool> (IsPointer && "cannot get address space of non-pointer type"
) ? void (0) : __assert_fail ("IsPointer && \"cannot get address space of non-pointer type\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/include/llvm/Support/LowLevelTypeImpl.h"
, 180, __extension__ __PRETTY_FUNCTION__))
;
181 if (!IsVector)
182 return getFieldValue(PointerAddressSpaceFieldInfo);
183 else
184 return getFieldValue(PointerVectorAddressSpaceFieldInfo);
185 }
186
187 /// Returns the vector's element type. Only valid for vector types.
188 LLT getElementType() const {
189 assert(isVector() && "cannot get element type of scalar/aggregate")(static_cast <bool> (isVector() && "cannot get element type of scalar/aggregate"
) ? void (0) : __assert_fail ("isVector() && \"cannot get element type of scalar/aggregate\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/include/llvm/Support/LowLevelTypeImpl.h"
, 189, __extension__ __PRETTY_FUNCTION__))
;
190 if (IsPointer)
191 return pointer(getAddressSpace(), getScalarSizeInBits());
192 else
193 return scalar(getScalarSizeInBits());
194 }
195
196 void print(raw_ostream &OS) const;
197
198#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
199 LLVM_DUMP_METHOD__attribute__((noinline)) __attribute__((__used__)) void dump() const {
200 print(dbgs());
201 dbgs() << '\n';
202 }
203#endif
204
205 bool operator==(const LLT &RHS) const {
206 return IsPointer == RHS.IsPointer && IsVector == RHS.IsVector &&
207 RHS.RawData == RawData;
208 }
209
210 bool operator!=(const LLT &RHS) const { return !(*this == RHS); }
211
212 friend struct DenseMapInfo<LLT>;
213 friend class GISelInstProfileBuilder;
214
215private:
216 /// LLT is packed into 64 bits as follows:
217 /// isPointer : 1
218 /// isVector : 1
219 /// with 62 bits remaining for Kind-specific data, packed in bitfields
220 /// as described below. As there isn't a simple portable way to pack bits
221 /// into bitfields, here the different fields in the packed structure is
222 /// described in static const *Field variables. Each of these variables
223 /// is a 2-element array, with the first element describing the bitfield size
224 /// and the second element describing the bitfield offset.
225 typedef int BitFieldInfo[2];
226 ///
227 /// This is how the bitfields are packed per Kind:
228 /// * Invalid:
229 /// gets encoded as RawData == 0, as that is an invalid encoding, since for
230 /// valid encodings, SizeInBits/SizeOfElement must be larger than 0.
231 /// * Non-pointer scalar (isPointer == 0 && isVector == 0):
232 /// SizeInBits: 32;
233 static const constexpr BitFieldInfo ScalarSizeFieldInfo{32, 0};
234 /// * Pointer (isPointer == 1 && isVector == 0):
235 /// SizeInBits: 16;
236 /// AddressSpace: 24;
237 static const constexpr BitFieldInfo PointerSizeFieldInfo{16, 0};
238 static const constexpr BitFieldInfo PointerAddressSpaceFieldInfo{
239 24, PointerSizeFieldInfo[0] + PointerSizeFieldInfo[1]};
240 /// * Vector-of-non-pointer (isPointer == 0 && isVector == 1):
241 /// NumElements: 16;
242 /// SizeOfElement: 32;
243 static const constexpr BitFieldInfo VectorElementsFieldInfo{16, 0};
244 static const constexpr BitFieldInfo VectorSizeFieldInfo{
245 32, VectorElementsFieldInfo[0] + VectorElementsFieldInfo[1]};
246 /// * Vector-of-pointer (isPointer == 1 && isVector == 1):
247 /// NumElements: 16;
248 /// SizeOfElement: 16;
249 /// AddressSpace: 24;
250 static const constexpr BitFieldInfo PointerVectorElementsFieldInfo{16, 0};
251 static const constexpr BitFieldInfo PointerVectorSizeFieldInfo{
252 16,
253 PointerVectorElementsFieldInfo[1] + PointerVectorElementsFieldInfo[0]};
254 static const constexpr BitFieldInfo PointerVectorAddressSpaceFieldInfo{
255 24, PointerVectorSizeFieldInfo[1] + PointerVectorSizeFieldInfo[0]};
256
257 uint64_t IsPointer : 1;
258 uint64_t IsVector : 1;
259 uint64_t RawData : 62;
260
261 static uint64_t getMask(const BitFieldInfo FieldInfo) {
262 const int FieldSizeInBits = FieldInfo[0];
263 return (((uint64_t)1) << FieldSizeInBits) - 1;
264 }
265 static uint64_t maskAndShift(uint64_t Val, uint64_t Mask, uint8_t Shift) {
266 assert(Val <= Mask && "Value too large for field")(static_cast <bool> (Val <= Mask && "Value too large for field"
) ? void (0) : __assert_fail ("Val <= Mask && \"Value too large for field\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/include/llvm/Support/LowLevelTypeImpl.h"
, 266, __extension__ __PRETTY_FUNCTION__))
;
267 return (Val & Mask) << Shift;
268 }
269 static uint64_t maskAndShift(uint64_t Val, const BitFieldInfo FieldInfo) {
270 return maskAndShift(Val, getMask(FieldInfo), FieldInfo[1]);
271 }
272 uint64_t getFieldValue(const BitFieldInfo FieldInfo) const {
273 return getMask(FieldInfo) & (RawData >> FieldInfo[1]);
274 }
275
276 void init(bool IsPointer, bool IsVector, uint16_t NumElements,
277 unsigned SizeInBits, unsigned AddressSpace) {
278 this->IsPointer = IsPointer;
279 this->IsVector = IsVector;
280 if (!IsVector) {
281 if (!IsPointer)
282 RawData = maskAndShift(SizeInBits, ScalarSizeFieldInfo);
283 else
284 RawData = maskAndShift(SizeInBits, PointerSizeFieldInfo) |
285 maskAndShift(AddressSpace, PointerAddressSpaceFieldInfo);
286 } else {
287 assert(NumElements > 1 && "invalid number of vector elements")(static_cast <bool> (NumElements > 1 && "invalid number of vector elements"
) ? void (0) : __assert_fail ("NumElements > 1 && \"invalid number of vector elements\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/include/llvm/Support/LowLevelTypeImpl.h"
, 287, __extension__ __PRETTY_FUNCTION__))
;
288 if (!IsPointer)
289 RawData = maskAndShift(NumElements, VectorElementsFieldInfo) |
290 maskAndShift(SizeInBits, VectorSizeFieldInfo);
291 else
292 RawData =
293 maskAndShift(NumElements, PointerVectorElementsFieldInfo) |
294 maskAndShift(SizeInBits, PointerVectorSizeFieldInfo) |
295 maskAndShift(AddressSpace, PointerVectorAddressSpaceFieldInfo);
296 }
297 }
298
299 uint64_t getUniqueRAWLLTData() const {
300 return ((uint64_t)RawData) << 2 | ((uint64_t)IsPointer) << 1 |
301 ((uint64_t)IsVector);
302 }
303};
304
305inline raw_ostream& operator<<(raw_ostream &OS, const LLT &Ty) {
306 Ty.print(OS);
307 return OS;
308}
309
310template<> struct DenseMapInfo<LLT> {
311 static inline LLT getEmptyKey() {
312 LLT Invalid;
313 Invalid.IsPointer = true;
314 return Invalid;
315 }
316 static inline LLT getTombstoneKey() {
317 LLT Invalid;
318 Invalid.IsVector = true;
319 return Invalid;
320 }
321 static inline unsigned getHashValue(const LLT &Ty) {
322 uint64_t Val = Ty.getUniqueRAWLLTData();
323 return DenseMapInfo<uint64_t>::getHashValue(Val);
324 }
325 static bool isEqual(const LLT &LHS, const LLT &RHS) {
326 return LHS == RHS;
327 }
328};
329
330}
331
332#endif // LLVM_SUPPORT_LOWLEVELTYPEIMPL_H