File: | llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp |
Warning: | line 602, column 40 Division by zero |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===- AArch64LegalizerInfo.cpp ----------------------------------*- C++ -*-==// | |||
2 | // | |||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |||
4 | // See https://llvm.org/LICENSE.txt for license information. | |||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |||
6 | // | |||
7 | //===----------------------------------------------------------------------===// | |||
8 | /// \file | |||
9 | /// This file implements the targeting of the Machinelegalizer class for | |||
10 | /// AArch64. | |||
11 | /// \todo This should be generated by TableGen. | |||
12 | //===----------------------------------------------------------------------===// | |||
13 | ||||
14 | #include "AArch64LegalizerInfo.h" | |||
15 | #include "AArch64RegisterBankInfo.h" | |||
16 | #include "AArch64Subtarget.h" | |||
17 | #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" | |||
18 | #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" | |||
19 | #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" | |||
20 | #include "llvm/CodeGen/GlobalISel/Utils.h" | |||
21 | #include "llvm/CodeGen/MachineInstr.h" | |||
22 | #include "llvm/CodeGen/MachineRegisterInfo.h" | |||
23 | #include "llvm/CodeGen/TargetOpcodes.h" | |||
24 | #include "llvm/CodeGen/ValueTypes.h" | |||
25 | #include "llvm/IR/DerivedTypes.h" | |||
26 | #include "llvm/IR/IntrinsicsAArch64.h" | |||
27 | #include "llvm/IR/Type.h" | |||
28 | #include "llvm/Support/MathExtras.h" | |||
29 | #include <initializer_list> | |||
30 | ||||
31 | #define DEBUG_TYPE"aarch64-legalinfo" "aarch64-legalinfo" | |||
32 | ||||
33 | using namespace llvm; | |||
34 | using namespace LegalizeActions; | |||
35 | using namespace LegalizeMutations; | |||
36 | using namespace LegalityPredicates; | |||
37 | ||||
38 | AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) | |||
39 | : ST(&ST) { | |||
40 | using namespace TargetOpcode; | |||
41 | const LLT p0 = LLT::pointer(0, 64); | |||
42 | const LLT s1 = LLT::scalar(1); | |||
43 | const LLT s8 = LLT::scalar(8); | |||
44 | const LLT s16 = LLT::scalar(16); | |||
45 | const LLT s32 = LLT::scalar(32); | |||
46 | const LLT s64 = LLT::scalar(64); | |||
47 | const LLT s128 = LLT::scalar(128); | |||
48 | const LLT s256 = LLT::scalar(256); | |||
49 | const LLT s512 = LLT::scalar(512); | |||
50 | const LLT v16s8 = LLT::vector(16, 8); | |||
51 | const LLT v8s8 = LLT::vector(8, 8); | |||
52 | const LLT v4s8 = LLT::vector(4, 8); | |||
53 | const LLT v8s16 = LLT::vector(8, 16); | |||
54 | const LLT v4s16 = LLT::vector(4, 16); | |||
55 | const LLT v2s16 = LLT::vector(2, 16); | |||
56 | const LLT v2s32 = LLT::vector(2, 32); | |||
57 | const LLT v4s32 = LLT::vector(4, 32); | |||
58 | const LLT v2s64 = LLT::vector(2, 64); | |||
59 | const LLT v2p0 = LLT::vector(2, p0); | |||
60 | ||||
61 | std::initializer_list<LLT> PackedVectorAllTypeList = {/* Begin 128bit types */ | |||
62 | v16s8, v8s16, v4s32, | |||
63 | v2s64, v2p0, | |||
64 | /* End 128bit types */ | |||
65 | /* Begin 64bit types */ | |||
66 | v8s8, v4s16, v2s32}; | |||
67 | ||||
68 | const TargetMachine &TM = ST.getTargetLowering()->getTargetMachine(); | |||
69 | ||||
70 | // FIXME: support subtargets which have neon/fp-armv8 disabled. | |||
71 | if (!ST.hasNEON() || !ST.hasFPARMv8()) { | |||
72 | getLegacyLegalizerInfo().computeTables(); | |||
73 | return; | |||
74 | } | |||
75 | ||||
76 | // Some instructions only support s16 if the subtarget has full 16-bit FP | |||
77 | // support. | |||
78 | const bool HasFP16 = ST.hasFullFP16(); | |||
79 | const LLT &MinFPScalar = HasFP16 ? s16 : s32; | |||
80 | ||||
81 | getActionDefinitionsBuilder({G_IMPLICIT_DEF, G_FREEZE}) | |||
82 | .legalFor({p0, s1, s8, s16, s32, s64}) | |||
83 | .legalFor(PackedVectorAllTypeList) | |||
84 | .clampScalar(0, s1, s64) | |||
85 | .widenScalarToNextPow2(0, 8) | |||
86 | .fewerElementsIf( | |||
87 | [=](const LegalityQuery &Query) { | |||
88 | return Query.Types[0].isVector() && | |||
89 | (Query.Types[0].getElementType() != s64 || | |||
90 | Query.Types[0].getNumElements() != 2); | |||
91 | }, | |||
92 | [=](const LegalityQuery &Query) { | |||
93 | LLT EltTy = Query.Types[0].getElementType(); | |||
94 | if (EltTy == s64) | |||
95 | return std::make_pair(0, LLT::vector(2, 64)); | |||
96 | return std::make_pair(0, EltTy); | |||
97 | }); | |||
98 | ||||
99 | getActionDefinitionsBuilder(G_PHI).legalFor({p0, s16, s32, s64}) | |||
100 | .legalFor(PackedVectorAllTypeList) | |||
101 | .clampScalar(0, s16, s64) | |||
102 | .widenScalarToNextPow2(0); | |||
103 | ||||
104 | getActionDefinitionsBuilder(G_BSWAP) | |||
105 | .legalFor({s32, s64, v4s32, v2s32, v2s64}) | |||
106 | .clampScalar(0, s32, s64) | |||
107 | .widenScalarToNextPow2(0); | |||
108 | ||||
109 | getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR}) | |||
110 | .legalFor({s32, s64, v2s32, v4s32, v4s16, v8s16, v16s8, v8s8}) | |||
111 | .scalarizeIf( | |||
112 | [=](const LegalityQuery &Query) { | |||
113 | return Query.Opcode == G_MUL && Query.Types[0] == v2s64; | |||
114 | }, | |||
115 | 0) | |||
116 | .legalFor({v2s64}) | |||
117 | .clampScalar(0, s32, s64) | |||
118 | .widenScalarToNextPow2(0) | |||
119 | .clampNumElements(0, v2s32, v4s32) | |||
120 | .clampNumElements(0, v2s64, v2s64) | |||
121 | .moreElementsToNextPow2(0); | |||
122 | ||||
123 | getActionDefinitionsBuilder({G_SHL, G_ASHR, G_LSHR}) | |||
124 | .customIf([=](const LegalityQuery &Query) { | |||
125 | const auto &SrcTy = Query.Types[0]; | |||
126 | const auto &AmtTy = Query.Types[1]; | |||
127 | return !SrcTy.isVector() && SrcTy.getSizeInBits() == 32 && | |||
128 | AmtTy.getSizeInBits() == 32; | |||
129 | }) | |||
130 | .legalFor({ | |||
131 | {s32, s32}, | |||
132 | {s32, s64}, | |||
133 | {s64, s64}, | |||
134 | {v8s8, v8s8}, | |||
135 | {v16s8, v16s8}, | |||
136 | {v4s16, v4s16}, | |||
137 | {v8s16, v8s16}, | |||
138 | {v2s32, v2s32}, | |||
139 | {v4s32, v4s32}, | |||
140 | {v2s64, v2s64}, | |||
141 | }) | |||
142 | .clampScalar(1, s32, s64) | |||
143 | .clampScalar(0, s32, s64) | |||
144 | .widenScalarToNextPow2(0) | |||
145 | .clampNumElements(0, v2s32, v4s32) | |||
146 | .clampNumElements(0, v2s64, v2s64) | |||
147 | .moreElementsToNextPow2(0) | |||
148 | .minScalarSameAs(1, 0); | |||
149 | ||||
150 | getActionDefinitionsBuilder(G_PTR_ADD) | |||
151 | .legalFor({{p0, s64}, {v2p0, v2s64}}) | |||
152 | .clampScalar(1, s64, s64); | |||
153 | ||||
154 | getActionDefinitionsBuilder(G_PTRMASK).legalFor({{p0, s64}}); | |||
155 | ||||
156 | getActionDefinitionsBuilder({G_SDIV, G_UDIV}) | |||
157 | .legalFor({s32, s64}) | |||
158 | .libcallFor({s128}) | |||
159 | .clampScalar(0, s32, s64) | |||
160 | .widenScalarToNextPow2(0) | |||
161 | .scalarize(0); | |||
162 | ||||
163 | getActionDefinitionsBuilder({G_SREM, G_UREM, G_SDIVREM, G_UDIVREM}) | |||
164 | .lowerFor({s1, s8, s16, s32, s64}); | |||
165 | ||||
166 | getActionDefinitionsBuilder({G_SMULO, G_UMULO}).lowerFor({{s64, s1}}); | |||
167 | ||||
168 | getActionDefinitionsBuilder({G_SMULH, G_UMULH}).legalFor({s32, s64}); | |||
169 | ||||
170 | getActionDefinitionsBuilder({G_SMIN, G_SMAX, G_UMIN, G_UMAX}) | |||
171 | .lowerIf([=](const LegalityQuery &Q) { return Q.Types[0].isScalar(); }); | |||
172 | ||||
173 | getActionDefinitionsBuilder( | |||
174 | {G_SADDE, G_SSUBE, G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_UADDO, G_USUBO}) | |||
175 | .legalFor({{s32, s1}, {s64, s1}}) | |||
176 | .clampScalar(0, s32, s64) | |||
177 | .widenScalarToNextPow2(0); | |||
178 | ||||
179 | getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FNEG}) | |||
180 | .legalFor({s32, s64, v2s64, v4s32, v2s32}) | |||
181 | .clampNumElements(0, v2s32, v4s32) | |||
182 | .clampNumElements(0, v2s64, v2s64); | |||
183 | ||||
184 | getActionDefinitionsBuilder(G_FREM).libcallFor({s32, s64}); | |||
185 | ||||
186 | getActionDefinitionsBuilder({G_FCEIL, G_FABS, G_FSQRT, G_FFLOOR, G_FRINT, | |||
187 | G_FMA, G_INTRINSIC_TRUNC, G_INTRINSIC_ROUND, | |||
188 | G_FNEARBYINT, G_INTRINSIC_LRINT}) | |||
189 | // If we don't have full FP16 support, then scalarize the elements of | |||
190 | // vectors containing fp16 types. | |||
191 | .fewerElementsIf( | |||
192 | [=, &ST](const LegalityQuery &Query) { | |||
193 | const auto &Ty = Query.Types[0]; | |||
194 | return Ty.isVector() && Ty.getElementType() == s16 && | |||
195 | !ST.hasFullFP16(); | |||
196 | }, | |||
197 | [=](const LegalityQuery &Query) { return std::make_pair(0, s16); }) | |||
198 | // If we don't have full FP16 support, then widen s16 to s32 if we | |||
199 | // encounter it. | |||
200 | .widenScalarIf( | |||
201 | [=, &ST](const LegalityQuery &Query) { | |||
202 | return Query.Types[0] == s16 && !ST.hasFullFP16(); | |||
203 | }, | |||
204 | [=](const LegalityQuery &Query) { return std::make_pair(0, s32); }) | |||
205 | .legalFor({s16, s32, s64, v2s32, v4s32, v2s64, v2s16, v4s16, v8s16}); | |||
206 | ||||
207 | getActionDefinitionsBuilder( | |||
208 | {G_FCOS, G_FSIN, G_FLOG10, G_FLOG, G_FLOG2, G_FEXP, G_FEXP2, G_FPOW}) | |||
209 | // We need a call for these, so we always need to scalarize. | |||
210 | .scalarize(0) | |||
211 | // Regardless of FP16 support, widen 16-bit elements to 32-bits. | |||
212 | .minScalar(0, s32) | |||
213 | .libcallFor({s32, s64, v2s32, v4s32, v2s64}); | |||
214 | ||||
215 | getActionDefinitionsBuilder(G_INSERT) | |||
216 | .unsupportedIf([=](const LegalityQuery &Query) { | |||
217 | return Query.Types[0].getSizeInBits() <= Query.Types[1].getSizeInBits(); | |||
218 | }) | |||
219 | .legalIf([=](const LegalityQuery &Query) { | |||
220 | const LLT &Ty0 = Query.Types[0]; | |||
221 | const LLT &Ty1 = Query.Types[1]; | |||
222 | if (Ty0 != s32 && Ty0 != s64 && Ty0 != p0) | |||
223 | return false; | |||
224 | return isPowerOf2_32(Ty1.getSizeInBits()) && | |||
225 | (Ty1.getSizeInBits() == 1 || Ty1.getSizeInBits() >= 8); | |||
226 | }) | |||
227 | .clampScalar(0, s32, s64) | |||
228 | .widenScalarToNextPow2(0) | |||
229 | .maxScalarIf(typeInSet(0, {s32}), 1, s16) | |||
230 | .maxScalarIf(typeInSet(0, {s64}), 1, s32) | |||
231 | .widenScalarToNextPow2(1); | |||
232 | ||||
233 | getActionDefinitionsBuilder(G_EXTRACT) | |||
234 | .unsupportedIf([=](const LegalityQuery &Query) { | |||
235 | return Query.Types[0].getSizeInBits() >= Query.Types[1].getSizeInBits(); | |||
236 | }) | |||
237 | .legalIf([=](const LegalityQuery &Query) { | |||
238 | const LLT &Ty0 = Query.Types[0]; | |||
239 | const LLT &Ty1 = Query.Types[1]; | |||
240 | if (Ty1 != s32 && Ty1 != s64 && Ty1 != s128) | |||
241 | return false; | |||
242 | if (Ty1 == p0) | |||
243 | return true; | |||
244 | return isPowerOf2_32(Ty0.getSizeInBits()) && | |||
245 | (Ty0.getSizeInBits() == 1 || Ty0.getSizeInBits() >= 8); | |||
246 | }) | |||
247 | .clampScalar(1, s32, s128) | |||
248 | .widenScalarToNextPow2(1) | |||
249 | .maxScalarIf(typeInSet(1, {s32}), 0, s16) | |||
250 | .maxScalarIf(typeInSet(1, {s64}), 0, s32) | |||
251 | .widenScalarToNextPow2(0); | |||
252 | ||||
253 | getActionDefinitionsBuilder({G_SEXTLOAD, G_ZEXTLOAD}) | |||
254 | .lowerIf(atomicOrderingAtLeastOrStrongerThan(0, AtomicOrdering::Unordered)) | |||
255 | .legalForTypesWithMemDesc({{s32, p0, 8, 8}, | |||
256 | {s32, p0, 16, 8}, | |||
257 | {s32, p0, 32, 8}, | |||
258 | {s64, p0, 8, 2}, | |||
259 | {s64, p0, 16, 2}, | |||
260 | {s64, p0, 32, 4}, | |||
261 | {s64, p0, 64, 8}, | |||
262 | {p0, p0, 64, 8}, | |||
263 | {v2s32, p0, 64, 8}}) | |||
264 | .clampScalar(0, s32, s64) | |||
265 | .widenScalarToNextPow2(0) | |||
266 | // TODO: We could support sum-of-pow2's but the lowering code doesn't know | |||
267 | // how to do that yet. | |||
268 | .unsupportedIfMemSizeNotPow2() | |||
269 | // Lower anything left over into G_*EXT and G_LOAD | |||
270 | .lower(); | |||
271 | ||||
272 | auto IsPtrVecPred = [=](const LegalityQuery &Query) { | |||
273 | const LLT &ValTy = Query.Types[0]; | |||
274 | if (!ValTy.isVector()) | |||
275 | return false; | |||
276 | const LLT EltTy = ValTy.getElementType(); | |||
277 | return EltTy.isPointer() && EltTy.getAddressSpace() == 0; | |||
278 | }; | |||
279 | ||||
280 | getActionDefinitionsBuilder(G_LOAD) | |||
281 | .legalForTypesWithMemDesc({{s8, p0, 8, 8}, | |||
282 | {s16, p0, 16, 8}, | |||
283 | {s32, p0, 32, 8}, | |||
284 | {s64, p0, 64, 8}, | |||
285 | {p0, p0, 64, 8}, | |||
286 | {s128, p0, 128, 8}, | |||
287 | {v8s8, p0, 64, 8}, | |||
288 | {v16s8, p0, 128, 8}, | |||
289 | {v4s16, p0, 64, 8}, | |||
290 | {v8s16, p0, 128, 8}, | |||
291 | {v2s32, p0, 64, 8}, | |||
292 | {v4s32, p0, 128, 8}, | |||
293 | {v2s64, p0, 128, 8}}) | |||
294 | // These extends are also legal | |||
295 | .legalForTypesWithMemDesc({{s32, p0, 8, 8}, {s32, p0, 16, 8}}) | |||
296 | .clampScalar(0, s8, s64) | |||
297 | .lowerIfMemSizeNotPow2() | |||
298 | .widenScalarToNextPow2(0) | |||
299 | .narrowScalarIf([=](const LegalityQuery &Query) { | |||
300 | // Clamp extending load results to 32-bits. | |||
301 | return Query.Types[0].isScalar() && | |||
302 | Query.Types[0].getSizeInBits() != Query.MMODescrs[0].SizeInBits && | |||
303 | Query.Types[0].getSizeInBits() > 32; | |||
304 | }, | |||
305 | changeTo(0, s32)) | |||
306 | // Lower any any-extending loads left into G_ANYEXT and G_LOAD | |||
307 | .lowerIf([=](const LegalityQuery &Query) { | |||
308 | return Query.Types[0].getSizeInBits() != Query.MMODescrs[0].SizeInBits; | |||
309 | }) | |||
310 | .clampMaxNumElements(0, s8, 16) | |||
311 | .clampMaxNumElements(0, s16, 8) | |||
312 | .clampMaxNumElements(0, s32, 4) | |||
313 | .clampMaxNumElements(0, s64, 2) | |||
314 | .customIf(IsPtrVecPred); | |||
315 | ||||
316 | getActionDefinitionsBuilder(G_STORE) | |||
317 | .legalForTypesWithMemDesc({{s8, p0, 8, 8}, | |||
318 | {s16, p0, 8, 8}, // truncstorei8 from s16 | |||
319 | {s32, p0, 8, 8}, // truncstorei8 from s32 | |||
320 | {s64, p0, 8, 8}, // truncstorei8 from s64 | |||
321 | {s16, p0, 16, 8}, | |||
322 | {s32, p0, 16, 8}, // truncstorei16 from s32 | |||
323 | {s64, p0, 16, 8}, // truncstorei16 from s64 | |||
324 | {s32, p0, 8, 8}, | |||
325 | {s32, p0, 16, 8}, | |||
326 | {s32, p0, 32, 8}, | |||
327 | {s64, p0, 64, 8}, | |||
328 | {s64, p0, 32, 8}, // truncstorei32 from s64 | |||
329 | {p0, p0, 64, 8}, | |||
330 | {s128, p0, 128, 8}, | |||
331 | {v16s8, p0, 128, 8}, | |||
332 | {v8s8, p0, 64, 8}, | |||
333 | {v4s16, p0, 64, 8}, | |||
334 | {v8s16, p0, 128, 8}, | |||
335 | {v2s32, p0, 64, 8}, | |||
336 | {v4s32, p0, 128, 8}, | |||
337 | {v2s64, p0, 128, 8}}) | |||
338 | .clampScalar(0, s8, s64) | |||
339 | .lowerIf([=](const LegalityQuery &Query) { | |||
340 | return Query.Types[0].isScalar() && | |||
341 | Query.Types[0].getSizeInBits() != Query.MMODescrs[0].SizeInBits; | |||
342 | }) | |||
343 | // Maximum: sN * k = 128 | |||
344 | .clampMaxNumElements(0, s8, 16) | |||
345 | .clampMaxNumElements(0, s16, 8) | |||
346 | .clampMaxNumElements(0, s32, 4) | |||
347 | .clampMaxNumElements(0, s64, 2) | |||
348 | .lowerIfMemSizeNotPow2() | |||
349 | .customIf(IsPtrVecPred); | |||
350 | ||||
351 | // Constants | |||
352 | getActionDefinitionsBuilder(G_CONSTANT) | |||
353 | .legalFor({p0, s8, s16, s32, s64}) | |||
354 | .clampScalar(0, s8, s64) | |||
355 | .widenScalarToNextPow2(0); | |||
356 | getActionDefinitionsBuilder(G_FCONSTANT) | |||
357 | .legalIf([=](const LegalityQuery &Query) { | |||
358 | const auto &Ty = Query.Types[0]; | |||
359 | if (HasFP16 && Ty == s16) | |||
360 | return true; | |||
361 | return Ty == s32 || Ty == s64 || Ty == s128; | |||
362 | }) | |||
363 | .clampScalar(0, MinFPScalar, s128); | |||
364 | ||||
365 | getActionDefinitionsBuilder({G_ICMP, G_FCMP}) | |||
366 | .legalFor({{s32, s32}, | |||
367 | {s32, s64}, | |||
368 | {s32, p0}, | |||
369 | {v4s32, v4s32}, | |||
370 | {v2s32, v2s32}, | |||
371 | {v2s64, v2s64}, | |||
372 | {v2s64, v2p0}, | |||
373 | {v4s16, v4s16}, | |||
374 | {v8s16, v8s16}, | |||
375 | {v8s8, v8s8}, | |||
376 | {v16s8, v16s8}}) | |||
377 | .clampScalar(1, s32, s64) | |||
378 | .clampScalar(0, s32, s32) | |||
379 | .minScalarEltSameAsIf( | |||
380 | [=](const LegalityQuery &Query) { | |||
381 | const LLT &Ty = Query.Types[0]; | |||
382 | const LLT &SrcTy = Query.Types[1]; | |||
383 | return Ty.isVector() && !SrcTy.getElementType().isPointer() && | |||
384 | Ty.getElementType() != SrcTy.getElementType(); | |||
385 | }, | |||
386 | 0, 1) | |||
387 | .minScalarOrEltIf( | |||
388 | [=](const LegalityQuery &Query) { return Query.Types[1] == v2s16; }, | |||
389 | 1, s32) | |||
390 | .minScalarOrEltIf( | |||
391 | [=](const LegalityQuery &Query) { return Query.Types[1] == v2p0; }, 0, | |||
392 | s64) | |||
393 | .widenScalarOrEltToNextPow2(1) | |||
394 | .clampNumElements(0, v2s32, v4s32); | |||
395 | ||||
396 | // Extensions | |||
397 | auto ExtLegalFunc = [=](const LegalityQuery &Query) { | |||
398 | unsigned DstSize = Query.Types[0].getSizeInBits(); | |||
399 | ||||
400 | if (DstSize == 128 && !Query.Types[0].isVector()) | |||
401 | return false; // Extending to a scalar s128 needs narrowing. | |||
402 | ||||
403 | // Make sure that we have something that will fit in a register, and | |||
404 | // make sure it's a power of 2. | |||
405 | if (DstSize < 8 || DstSize > 128 || !isPowerOf2_32(DstSize)) | |||
406 | return false; | |||
407 | ||||
408 | const LLT &SrcTy = Query.Types[1]; | |||
409 | ||||
410 | // Special case for s1. | |||
411 | if (SrcTy == s1) | |||
412 | return true; | |||
413 | ||||
414 | // Make sure we fit in a register otherwise. Don't bother checking that | |||
415 | // the source type is below 128 bits. We shouldn't be allowing anything | |||
416 | // through which is wider than the destination in the first place. | |||
417 | unsigned SrcSize = SrcTy.getSizeInBits(); | |||
418 | if (SrcSize < 8 || !isPowerOf2_32(SrcSize)) | |||
419 | return false; | |||
420 | ||||
421 | return true; | |||
422 | }; | |||
423 | getActionDefinitionsBuilder({G_ZEXT, G_SEXT, G_ANYEXT}) | |||
424 | .legalIf(ExtLegalFunc) | |||
425 | .clampScalar(0, s64, s64); // Just for s128, others are handled above. | |||
426 | ||||
427 | getActionDefinitionsBuilder(G_TRUNC) | |||
428 | .minScalarOrEltIf( | |||
429 | [=](const LegalityQuery &Query) { return Query.Types[0].isVector(); }, | |||
430 | 0, s8) | |||
431 | .customIf([=](const LegalityQuery &Query) { | |||
432 | LLT DstTy = Query.Types[0]; | |||
433 | LLT SrcTy = Query.Types[1]; | |||
434 | return DstTy == v8s8 && SrcTy.getSizeInBits() > 128; | |||
435 | }) | |||
436 | .alwaysLegal(); | |||
437 | ||||
438 | getActionDefinitionsBuilder(G_SEXT_INREG).legalFor({s32, s64}).lower(); | |||
439 | ||||
440 | // FP conversions | |||
441 | getActionDefinitionsBuilder(G_FPTRUNC) | |||
442 | .legalFor( | |||
443 | {{s16, s32}, {s16, s64}, {s32, s64}, {v4s16, v4s32}, {v2s32, v2s64}}) | |||
444 | .clampMaxNumElements(0, s32, 2); | |||
445 | getActionDefinitionsBuilder(G_FPEXT) | |||
446 | .legalFor( | |||
447 | {{s32, s16}, {s64, s16}, {s64, s32}, {v4s32, v4s16}, {v2s64, v2s32}}) | |||
448 | .clampMaxNumElements(0, s64, 2); | |||
449 | ||||
450 | // Conversions | |||
451 | getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI}) | |||
452 | .legalForCartesianProduct({s32, s64, v2s64, v4s32, v2s32}) | |||
453 | .clampScalar(0, s32, s64) | |||
454 | .widenScalarToNextPow2(0) | |||
455 | .clampScalar(1, s32, s64) | |||
456 | .widenScalarToNextPow2(1); | |||
457 | ||||
458 | getActionDefinitionsBuilder({G_SITOFP, G_UITOFP}) | |||
459 | .legalForCartesianProduct({s32, s64, v2s64, v4s32, v2s32}) | |||
460 | .clampScalar(1, s32, s64) | |||
461 | .minScalarSameAs(1, 0) | |||
462 | .clampScalar(0, s32, s64) | |||
463 | .widenScalarToNextPow2(0); | |||
464 | ||||
465 | // Control-flow | |||
466 | getActionDefinitionsBuilder(G_BRCOND).legalFor({s1, s8, s16, s32}); | |||
467 | getActionDefinitionsBuilder(G_BRINDIRECT).legalFor({p0}); | |||
468 | ||||
469 | getActionDefinitionsBuilder(G_SELECT) | |||
470 | .legalFor({{s32, s1}, {s64, s1}, {p0, s1}}) | |||
471 | .clampScalar(0, s32, s64) | |||
472 | .widenScalarToNextPow2(0) | |||
473 | .minScalarEltSameAsIf(all(isVector(0), isVector(1)), 1, 0) | |||
474 | .lowerIf(isVector(0)); | |||
475 | ||||
476 | // Pointer-handling | |||
477 | getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0}); | |||
478 | ||||
479 | if (TM.getCodeModel() == CodeModel::Small) | |||
480 | getActionDefinitionsBuilder(G_GLOBAL_VALUE).custom(); | |||
481 | else | |||
482 | getActionDefinitionsBuilder(G_GLOBAL_VALUE).legalFor({p0}); | |||
483 | ||||
484 | getActionDefinitionsBuilder(G_PTRTOINT) | |||
485 | .legalForCartesianProduct({s1, s8, s16, s32, s64}, {p0}) | |||
486 | .maxScalar(0, s64) | |||
487 | .widenScalarToNextPow2(0, /*Min*/ 8); | |||
488 | ||||
489 | getActionDefinitionsBuilder(G_INTTOPTR) | |||
490 | .unsupportedIf([&](const LegalityQuery &Query) { | |||
491 | return Query.Types[0].getSizeInBits() != Query.Types[1].getSizeInBits(); | |||
492 | }) | |||
493 | .legalFor({{p0, s64}}); | |||
494 | ||||
495 | // Casts for 32 and 64-bit width type are just copies. | |||
496 | // Same for 128-bit width type, except they are on the FPR bank. | |||
497 | getActionDefinitionsBuilder(G_BITCAST) | |||
498 | // FIXME: This is wrong since G_BITCAST is not allowed to change the | |||
499 | // number of bits but it's what the previous code described and fixing | |||
500 | // it breaks tests. | |||
501 | .legalForCartesianProduct({s1, s8, s16, s32, s64, s128, v16s8, v8s8, v4s8, | |||
502 | v8s16, v4s16, v2s16, v4s32, v2s32, v2s64, | |||
503 | v2p0}); | |||
504 | ||||
505 | getActionDefinitionsBuilder(G_VASTART).legalFor({p0}); | |||
506 | ||||
507 | // va_list must be a pointer, but most sized types are pretty easy to handle | |||
508 | // as the destination. | |||
509 | getActionDefinitionsBuilder(G_VAARG) | |||
510 | .customForCartesianProduct({s8, s16, s32, s64, p0}, {p0}) | |||
511 | .clampScalar(0, s8, s64) | |||
512 | .widenScalarToNextPow2(0, /*Min*/ 8); | |||
513 | ||||
514 | getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG_WITH_SUCCESS) | |||
515 | .lowerIf( | |||
516 | all(typeInSet(0, {s8, s16, s32, s64, s128}), typeIs(1, s1), typeIs(2, p0))); | |||
517 | ||||
518 | getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG) | |||
519 | .legalIf(all(typeInSet(0, {s8, s16, s32, s64}), typeIs(1, p0))) | |||
520 | .customIf([](const LegalityQuery &Query) { | |||
521 | return Query.Types[0].getSizeInBits() == 128; | |||
522 | }); | |||
523 | ||||
524 | getActionDefinitionsBuilder( | |||
525 | {G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD, G_ATOMICRMW_SUB, G_ATOMICRMW_AND, | |||
526 | G_ATOMICRMW_OR, G_ATOMICRMW_XOR, G_ATOMICRMW_MIN, G_ATOMICRMW_MAX, | |||
527 | G_ATOMICRMW_UMIN, G_ATOMICRMW_UMAX}) | |||
528 | .legalIf(all(typeInSet(0, {s8, s16, s32, s64}), typeIs(1, p0))); | |||
529 | ||||
530 | getActionDefinitionsBuilder(G_BLOCK_ADDR).legalFor({p0}); | |||
531 | ||||
532 | // Merge/Unmerge | |||
533 | for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) { | |||
534 | unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1; | |||
535 | unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0; | |||
536 | ||||
537 | auto notValidElt = [](const LegalityQuery &Query, unsigned TypeIdx) { | |||
538 | const LLT &Ty = Query.Types[TypeIdx]; | |||
539 | if (Ty.isVector()) { | |||
540 | const LLT &EltTy = Ty.getElementType(); | |||
541 | if (EltTy.getSizeInBits() < 8 || EltTy.getSizeInBits() > 64) | |||
542 | return true; | |||
543 | if (!isPowerOf2_32(EltTy.getSizeInBits())) | |||
544 | return true; | |||
545 | } | |||
546 | return false; | |||
547 | }; | |||
548 | ||||
549 | // FIXME: This rule is horrible, but specifies the same as what we had | |||
550 | // before with the particularly strange definitions removed (e.g. | |||
551 | // s8 = G_MERGE_VALUES s32, s32). | |||
552 | // Part of the complexity comes from these ops being extremely flexible. For | |||
553 | // example, you can build/decompose vectors with it, concatenate vectors, | |||
554 | // etc. and in addition to this you can also bitcast with it at the same | |||
555 | // time. We've been considering breaking it up into multiple ops to make it | |||
556 | // more manageable throughout the backend. | |||
557 | getActionDefinitionsBuilder(Op) | |||
558 | // Break up vectors with weird elements into scalars | |||
559 | .fewerElementsIf( | |||
560 | [=](const LegalityQuery &Query) { return notValidElt(Query, 0); }, | |||
561 | scalarize(0)) | |||
562 | .fewerElementsIf( | |||
563 | [=](const LegalityQuery &Query) { return notValidElt(Query, 1); }, | |||
564 | scalarize(1)) | |||
565 | // Clamp the big scalar to s8-s512 and make it either a power of 2, 192, | |||
566 | // or 384. | |||
567 | .clampScalar(BigTyIdx, s8, s512) | |||
568 | .widenScalarIf( | |||
569 | [=](const LegalityQuery &Query) { | |||
570 | const LLT &Ty = Query.Types[BigTyIdx]; | |||
571 | return !isPowerOf2_32(Ty.getSizeInBits()) && | |||
572 | Ty.getSizeInBits() % 64 != 0; | |||
573 | }, | |||
574 | [=](const LegalityQuery &Query) { | |||
575 | // Pick the next power of 2, or a multiple of 64 over 128. | |||
576 | // Whichever is smaller. | |||
577 | const LLT &Ty = Query.Types[BigTyIdx]; | |||
578 | unsigned NewSizeInBits = 1 | |||
579 | << Log2_32_Ceil(Ty.getSizeInBits() + 1); | |||
580 | if (NewSizeInBits >= 256) { | |||
581 | unsigned RoundedTo = alignTo<64>(Ty.getSizeInBits() + 1); | |||
582 | if (RoundedTo < NewSizeInBits) | |||
583 | NewSizeInBits = RoundedTo; | |||
584 | } | |||
585 | return std::make_pair(BigTyIdx, LLT::scalar(NewSizeInBits)); | |||
586 | }) | |||
587 | // Clamp the little scalar to s8-s256 and make it a power of 2. It's not | |||
588 | // worth considering the multiples of 64 since 2*192 and 2*384 are not | |||
589 | // valid. | |||
590 | .clampScalar(LitTyIdx, s8, s256) | |||
591 | .widenScalarToNextPow2(LitTyIdx, /*Min*/ 8) | |||
592 | // So at this point, we have s8, s16, s32, s64, s128, s192, s256, s384, | |||
593 | // s512, <X x s8>, <X x s16>, <X x s32>, or <X x s64>. | |||
594 | // At this point it's simple enough to accept the legal types. | |||
595 | .legalIf([=](const LegalityQuery &Query) { | |||
596 | const LLT &BigTy = Query.Types[BigTyIdx]; | |||
597 | const LLT &LitTy = Query.Types[LitTyIdx]; | |||
598 | if (BigTy.isVector() && BigTy.getSizeInBits() < 32) | |||
| ||||
599 | return false; | |||
600 | if (LitTy.isVector() && LitTy.getSizeInBits() < 32) | |||
601 | return false; | |||
602 | return BigTy.getSizeInBits() % LitTy.getSizeInBits() == 0; | |||
| ||||
603 | }) | |||
604 | // Any vectors left are the wrong size. Scalarize them. | |||
605 | .scalarize(0) | |||
606 | .scalarize(1); | |||
607 | } | |||
608 | ||||
609 | getActionDefinitionsBuilder(G_EXTRACT_VECTOR_ELT) | |||
610 | .unsupportedIf([=](const LegalityQuery &Query) { | |||
611 | const LLT &EltTy = Query.Types[1].getElementType(); | |||
612 | return Query.Types[0] != EltTy; | |||
613 | }) | |||
614 | .minScalar(2, s64) | |||
615 | .legalIf([=](const LegalityQuery &Query) { | |||
616 | const LLT &VecTy = Query.Types[1]; | |||
617 | return VecTy == v2s16 || VecTy == v4s16 || VecTy == v8s16 || | |||
618 | VecTy == v4s32 || VecTy == v2s64 || VecTy == v2s32 || | |||
619 | VecTy == v16s8 || VecTy == v2s32 || VecTy == v2p0; | |||
620 | }) | |||
621 | .minScalarOrEltIf( | |||
622 | [=](const LegalityQuery &Query) { | |||
623 | // We want to promote to <M x s1> to <M x s64> if that wouldn't | |||
624 | // cause the total vec size to be > 128b. | |||
625 | return Query.Types[1].getNumElements() <= 2; | |||
626 | }, | |||
627 | 0, s64) | |||
628 | .minScalarOrEltIf( | |||
629 | [=](const LegalityQuery &Query) { | |||
630 | return Query.Types[1].getNumElements() <= 4; | |||
631 | }, | |||
632 | 0, s32) | |||
633 | .minScalarOrEltIf( | |||
634 | [=](const LegalityQuery &Query) { | |||
635 | return Query.Types[1].getNumElements() <= 8; | |||
636 | }, | |||
637 | 0, s16) | |||
638 | .minScalarOrEltIf( | |||
639 | [=](const LegalityQuery &Query) { | |||
640 | return Query.Types[1].getNumElements() <= 16; | |||
641 | }, | |||
642 | 0, s8) | |||
643 | .minScalarOrElt(0, s8) // Worst case, we need at least s8. | |||
644 | .clampMaxNumElements(1, s64, 2) | |||
645 | .clampMaxNumElements(1, s32, 4) | |||
646 | .clampMaxNumElements(1, s16, 8); | |||
647 | ||||
648 | getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT) | |||
649 | .legalIf(typeInSet(0, {v8s16, v2s32, v4s32, v2s64})); | |||
650 | ||||
651 | getActionDefinitionsBuilder(G_BUILD_VECTOR) | |||
652 | .legalFor({{v8s8, s8}, | |||
653 | {v16s8, s8}, | |||
654 | {v4s16, s16}, | |||
655 | {v8s16, s16}, | |||
656 | {v2s32, s32}, | |||
657 | {v4s32, s32}, | |||
658 | {v2p0, p0}, | |||
659 | {v2s64, s64}}) | |||
660 | .clampNumElements(0, v4s32, v4s32) | |||
661 | .clampNumElements(0, v2s64, v2s64) | |||
662 | .minScalarSameAs(1, 0); | |||
663 | ||||
664 | getActionDefinitionsBuilder(G_BUILD_VECTOR_TRUNC).lower(); | |||
665 | ||||
666 | getActionDefinitionsBuilder(G_CTLZ) | |||
667 | .legalForCartesianProduct( | |||
668 | {s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32}) | |||
669 | .scalarize(1); | |||
670 | getActionDefinitionsBuilder(G_CTLZ_ZERO_UNDEF).lower(); | |||
671 | ||||
672 | // TODO: Custom lowering for v2s32, v4s32, v2s64. | |||
673 | getActionDefinitionsBuilder(G_BITREVERSE).legalFor({s32, s64, v8s8, v16s8}); | |||
674 | ||||
675 | getActionDefinitionsBuilder(G_CTTZ_ZERO_UNDEF).lower(); | |||
676 | ||||
677 | // TODO: Handle vector types. | |||
678 | getActionDefinitionsBuilder(G_CTTZ) | |||
679 | .clampScalar(0, s32, s64) | |||
680 | .scalarSameSizeAs(1, 0) | |||
681 | .customFor({s32, s64}); | |||
682 | ||||
683 | getActionDefinitionsBuilder(G_SHUFFLE_VECTOR) | |||
684 | .legalIf([=](const LegalityQuery &Query) { | |||
685 | const LLT &DstTy = Query.Types[0]; | |||
686 | const LLT &SrcTy = Query.Types[1]; | |||
687 | // For now just support the TBL2 variant which needs the source vectors | |||
688 | // to be the same size as the dest. | |||
689 | if (DstTy != SrcTy) | |||
690 | return false; | |||
691 | for (auto &Ty : {v2s32, v4s32, v2s64, v2p0, v16s8, v8s16}) { | |||
692 | if (DstTy == Ty) | |||
693 | return true; | |||
694 | } | |||
695 | return false; | |||
696 | }) | |||
697 | // G_SHUFFLE_VECTOR can have scalar sources (from 1 x s vectors), we | |||
698 | // just want those lowered into G_BUILD_VECTOR | |||
699 | .lowerIf([=](const LegalityQuery &Query) { | |||
700 | return !Query.Types[1].isVector(); | |||
701 | }) | |||
702 | .clampNumElements(0, v4s32, v4s32) | |||
703 | .clampNumElements(0, v2s64, v2s64); | |||
704 | ||||
705 | getActionDefinitionsBuilder(G_CONCAT_VECTORS) | |||
706 | .legalFor({{v4s32, v2s32}, {v8s16, v4s16}}); | |||
707 | ||||
708 | getActionDefinitionsBuilder(G_JUMP_TABLE).legalFor({{p0}, {s64}}); | |||
709 | ||||
710 | getActionDefinitionsBuilder(G_BRJT).legalIf([=](const LegalityQuery &Query) { | |||
711 | return Query.Types[0] == p0 && Query.Types[1] == s64; | |||
712 | }); | |||
713 | ||||
714 | getActionDefinitionsBuilder(G_DYN_STACKALLOC).lower(); | |||
715 | ||||
716 | getActionDefinitionsBuilder({G_BZERO, G_MEMCPY, G_MEMMOVE, G_MEMSET}) | |||
717 | .libcall(); | |||
718 | ||||
719 | // FIXME: Legal types are only legal with NEON. | |||
720 | getActionDefinitionsBuilder(G_ABS) | |||
721 | .lowerIf(isScalar(0)) | |||
722 | .legalFor(PackedVectorAllTypeList); | |||
723 | ||||
724 | getActionDefinitionsBuilder(G_VECREDUCE_FADD) | |||
725 | // We only have FADDP to do reduction-like operations. Lower the rest. | |||
726 | .legalFor({{s32, v2s32}, {s64, v2s64}}) | |||
727 | .clampMaxNumElements(1, s64, 2) | |||
728 | .clampMaxNumElements(1, s32, 2) | |||
729 | .lower(); | |||
730 | ||||
731 | getActionDefinitionsBuilder(G_VECREDUCE_ADD) | |||
732 | .legalFor( | |||
733 | {{s8, v16s8}, {s16, v8s16}, {s32, v4s32}, {s32, v2s32}, {s64, v2s64}}) | |||
734 | .clampMaxNumElements(1, s64, 2) | |||
735 | .clampMaxNumElements(1, s32, 4) | |||
736 | .lower(); | |||
737 | ||||
738 | getActionDefinitionsBuilder({G_UADDSAT, G_USUBSAT}) | |||
739 | .lowerIf([=](const LegalityQuery &Q) { return Q.Types[0].isScalar(); }); | |||
740 | ||||
741 | getActionDefinitionsBuilder({G_FSHL, G_FSHR}).lower(); | |||
742 | ||||
743 | getActionDefinitionsBuilder(G_ROTR) | |||
744 | .legalFor({{s32, s64}, {s64, s64}}) | |||
745 | .customIf([=](const LegalityQuery &Q) { | |||
746 | return Q.Types[0].isScalar() && Q.Types[1].getScalarSizeInBits() < 64; | |||
747 | }) | |||
748 | .lower(); | |||
749 | getActionDefinitionsBuilder(G_ROTL).lower(); | |||
750 | ||||
751 | getActionDefinitionsBuilder({G_SBFX, G_UBFX}) | |||
752 | .customFor({{s32, s32}, {s64, s64}}); | |||
753 | ||||
754 | // TODO: Custom legalization for s128 | |||
755 | // TODO: v2s64, v2s32, v4s32, v4s16, v8s16 | |||
756 | // TODO: Use generic lowering when custom lowering is not possible. | |||
757 | auto always = [=](const LegalityQuery &Q) { return true; }; | |||
758 | getActionDefinitionsBuilder(G_CTPOP) | |||
759 | .legalFor({{v8s8, v8s8}, {v16s8, v16s8}}) | |||
760 | .clampScalar(0, s32, s128) | |||
761 | .widenScalarToNextPow2(0) | |||
762 | .minScalarEltSameAsIf(always, 1, 0) | |||
763 | .maxScalarEltSameAsIf(always, 1, 0) | |||
764 | .customFor({{s32, s32}, {s64, s64}}); | |||
765 | ||||
766 | getLegacyLegalizerInfo().computeTables(); | |||
767 | verify(*ST.getInstrInfo()); | |||
768 | } | |||
769 | ||||
770 | bool AArch64LegalizerInfo::legalizeCustom(LegalizerHelper &Helper, | |||
771 | MachineInstr &MI) const { | |||
772 | MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; | |||
773 | MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); | |||
774 | GISelChangeObserver &Observer = Helper.Observer; | |||
775 | switch (MI.getOpcode()) { | |||
776 | default: | |||
777 | // No idea what to do. | |||
778 | return false; | |||
779 | case TargetOpcode::G_VAARG: | |||
780 | return legalizeVaArg(MI, MRI, MIRBuilder); | |||
781 | case TargetOpcode::G_LOAD: | |||
782 | case TargetOpcode::G_STORE: | |||
783 | return legalizeLoadStore(MI, MRI, MIRBuilder, Observer); | |||
784 | case TargetOpcode::G_SHL: | |||
785 | case TargetOpcode::G_ASHR: | |||
786 | case TargetOpcode::G_LSHR: | |||
787 | return legalizeShlAshrLshr(MI, MRI, MIRBuilder, Observer); | |||
788 | case TargetOpcode::G_GLOBAL_VALUE: | |||
789 | return legalizeSmallCMGlobalValue(MI, MRI, MIRBuilder, Observer); | |||
790 | case TargetOpcode::G_TRUNC: | |||
791 | return legalizeVectorTrunc(MI, Helper); | |||
792 | case TargetOpcode::G_SBFX: | |||
793 | case TargetOpcode::G_UBFX: | |||
794 | return legalizeBitfieldExtract(MI, MRI, Helper); | |||
795 | case TargetOpcode::G_ROTR: | |||
796 | return legalizeRotate(MI, MRI, Helper); | |||
797 | case TargetOpcode::G_CTPOP: | |||
798 | return legalizeCTPOP(MI, MRI, Helper); | |||
799 | case TargetOpcode::G_ATOMIC_CMPXCHG: | |||
800 | return legalizeAtomicCmpxchg128(MI, MRI, Helper); | |||
801 | case TargetOpcode::G_CTTZ: | |||
802 | return legalizeCTTZ(MI, Helper); | |||
803 | } | |||
804 | ||||
805 | llvm_unreachable("expected switch to return")::llvm::llvm_unreachable_internal("expected switch to return" , "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp" , 805); | |||
806 | } | |||
807 | ||||
808 | bool AArch64LegalizerInfo::legalizeRotate(MachineInstr &MI, | |||
809 | MachineRegisterInfo &MRI, | |||
810 | LegalizerHelper &Helper) const { | |||
811 | // To allow for imported patterns to match, we ensure that the rotate amount | |||
812 | // is 64b with an extension. | |||
813 | Register AmtReg = MI.getOperand(2).getReg(); | |||
814 | LLT AmtTy = MRI.getType(AmtReg); | |||
815 | (void)AmtTy; | |||
816 | assert(AmtTy.isScalar() && "Expected a scalar rotate")(static_cast <bool> (AmtTy.isScalar() && "Expected a scalar rotate" ) ? void (0) : __assert_fail ("AmtTy.isScalar() && \"Expected a scalar rotate\"" , "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp" , 816, __extension__ __PRETTY_FUNCTION__)); | |||
817 | assert(AmtTy.getSizeInBits() < 64 && "Expected this rotate to be legal")(static_cast <bool> (AmtTy.getSizeInBits() < 64 && "Expected this rotate to be legal") ? void (0) : __assert_fail ("AmtTy.getSizeInBits() < 64 && \"Expected this rotate to be legal\"" , "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp" , 817, __extension__ __PRETTY_FUNCTION__)); | |||
818 | auto NewAmt = Helper.MIRBuilder.buildSExt(LLT::scalar(64), AmtReg); | |||
819 | Helper.Observer.changingInstr(MI); | |||
820 | MI.getOperand(2).setReg(NewAmt.getReg(0)); | |||
821 | Helper.Observer.changedInstr(MI); | |||
822 | return true; | |||
823 | } | |||
824 | ||||
825 | static void extractParts(Register Reg, MachineRegisterInfo &MRI, | |||
826 | MachineIRBuilder &MIRBuilder, LLT Ty, int NumParts, | |||
827 | SmallVectorImpl<Register> &VRegs) { | |||
828 | for (int I = 0; I < NumParts; ++I) | |||
829 | VRegs.push_back(MRI.createGenericVirtualRegister(Ty)); | |||
830 | MIRBuilder.buildUnmerge(VRegs, Reg); | |||
831 | } | |||
832 | ||||
833 | bool AArch64LegalizerInfo::legalizeVectorTrunc( | |||
834 | MachineInstr &MI, LegalizerHelper &Helper) const { | |||
835 | MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; | |||
836 | MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); | |||
837 | // Similar to how operand splitting is done in SelectiondDAG, we can handle | |||
838 | // %res(v8s8) = G_TRUNC %in(v8s32) by generating: | |||
839 | // %inlo(<4x s32>), %inhi(<4 x s32>) = G_UNMERGE %in(<8 x s32>) | |||
840 | // %lo16(<4 x s16>) = G_TRUNC %inlo | |||
841 | // %hi16(<4 x s16>) = G_TRUNC %inhi | |||
842 | // %in16(<8 x s16>) = G_CONCAT_VECTORS %lo16, %hi16 | |||
843 | // %res(<8 x s8>) = G_TRUNC %in16 | |||
844 | ||||
845 | Register DstReg = MI.getOperand(0).getReg(); | |||
846 | Register SrcReg = MI.getOperand(1).getReg(); | |||
847 | LLT DstTy = MRI.getType(DstReg); | |||
848 | LLT SrcTy = MRI.getType(SrcReg); | |||
849 | assert(isPowerOf2_32(DstTy.getSizeInBits()) &&(static_cast <bool> (isPowerOf2_32(DstTy.getSizeInBits( )) && isPowerOf2_32(SrcTy.getSizeInBits())) ? void (0 ) : __assert_fail ("isPowerOf2_32(DstTy.getSizeInBits()) && isPowerOf2_32(SrcTy.getSizeInBits())" , "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp" , 850, __extension__ __PRETTY_FUNCTION__)) | |||
850 | isPowerOf2_32(SrcTy.getSizeInBits()))(static_cast <bool> (isPowerOf2_32(DstTy.getSizeInBits( )) && isPowerOf2_32(SrcTy.getSizeInBits())) ? void (0 ) : __assert_fail ("isPowerOf2_32(DstTy.getSizeInBits()) && isPowerOf2_32(SrcTy.getSizeInBits())" , "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp" , 850, __extension__ __PRETTY_FUNCTION__)); | |||
851 | ||||
852 | // Split input type. | |||
853 | LLT SplitSrcTy = SrcTy.changeNumElements(SrcTy.getNumElements() / 2); | |||
854 | // First, split the source into two smaller vectors. | |||
855 | SmallVector<Register, 2> SplitSrcs; | |||
856 | extractParts(SrcReg, MRI, MIRBuilder, SplitSrcTy, 2, SplitSrcs); | |||
857 | ||||
858 | // Truncate the splits into intermediate narrower elements. | |||
859 | LLT InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits() * 2); | |||
860 | for (unsigned I = 0; I < SplitSrcs.size(); ++I) | |||
861 | SplitSrcs[I] = MIRBuilder.buildTrunc(InterTy, SplitSrcs[I]).getReg(0); | |||
862 | ||||
863 | auto Concat = MIRBuilder.buildConcatVectors( | |||
864 | DstTy.changeElementSize(DstTy.getScalarSizeInBits() * 2), SplitSrcs); | |||
865 | ||||
866 | Helper.Observer.changingInstr(MI); | |||
867 | MI.getOperand(1).setReg(Concat.getReg(0)); | |||
868 | Helper.Observer.changedInstr(MI); | |||
869 | return true; | |||
870 | } | |||
871 | ||||
872 | bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue( | |||
873 | MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder, | |||
874 | GISelChangeObserver &Observer) const { | |||
875 | assert(MI.getOpcode() == TargetOpcode::G_GLOBAL_VALUE)(static_cast <bool> (MI.getOpcode() == TargetOpcode::G_GLOBAL_VALUE ) ? void (0) : __assert_fail ("MI.getOpcode() == TargetOpcode::G_GLOBAL_VALUE" , "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp" , 875, __extension__ __PRETTY_FUNCTION__)); | |||
876 | // We do this custom legalization to convert G_GLOBAL_VALUE into target ADRP + | |||
877 | // G_ADD_LOW instructions. | |||
878 | // By splitting this here, we can optimize accesses in the small code model by | |||
879 | // folding in the G_ADD_LOW into the load/store offset. | |||
880 | auto &GlobalOp = MI.getOperand(1); | |||
881 | const auto* GV = GlobalOp.getGlobal(); | |||
882 | if (GV->isThreadLocal()) | |||
883 | return true; // Don't want to modify TLS vars. | |||
884 | ||||
885 | auto &TM = ST->getTargetLowering()->getTargetMachine(); | |||
886 | unsigned OpFlags = ST->ClassifyGlobalReference(GV, TM); | |||
887 | ||||
888 | if (OpFlags & AArch64II::MO_GOT) | |||
889 | return true; | |||
890 | ||||
891 | auto Offset = GlobalOp.getOffset(); | |||
892 | Register DstReg = MI.getOperand(0).getReg(); | |||
893 | auto ADRP = MIRBuilder.buildInstr(AArch64::ADRP, {LLT::pointer(0, 64)}, {}) | |||
894 | .addGlobalAddress(GV, Offset, OpFlags | AArch64II::MO_PAGE); | |||
895 | // Set the regclass on the dest reg too. | |||
896 | MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass); | |||
897 | ||||
898 | // MO_TAGGED on the page indicates a tagged address. Set the tag now. We do so | |||
899 | // by creating a MOVK that sets bits 48-63 of the register to (global address | |||
900 | // + 0x100000000 - PC) >> 48. The additional 0x100000000 offset here is to | |||
901 | // prevent an incorrect tag being generated during relocation when the the | |||
902 | // global appears before the code section. Without the offset, a global at | |||
903 | // `0x0f00'0000'0000'1000` (i.e. at `0x1000` with tag `0xf`) that's referenced | |||
904 | // by code at `0x2000` would result in `0x0f00'0000'0000'1000 - 0x2000 = | |||
905 | // 0x0eff'ffff'ffff'f000`, meaning the tag would be incorrectly set to `0xe` | |||
906 | // instead of `0xf`. | |||
907 | // This assumes that we're in the small code model so we can assume a binary | |||
908 | // size of <= 4GB, which makes the untagged PC relative offset positive. The | |||
909 | // binary must also be loaded into address range [0, 2^48). Both of these | |||
910 | // properties need to be ensured at runtime when using tagged addresses. | |||
911 | if (OpFlags & AArch64II::MO_TAGGED) { | |||
912 | assert(!Offset &&(static_cast <bool> (!Offset && "Should not have folded in an offset for a tagged global!" ) ? void (0) : __assert_fail ("!Offset && \"Should not have folded in an offset for a tagged global!\"" , "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp" , 913, __extension__ __PRETTY_FUNCTION__)) | |||
913 | "Should not have folded in an offset for a tagged global!")(static_cast <bool> (!Offset && "Should not have folded in an offset for a tagged global!" ) ? void (0) : __assert_fail ("!Offset && \"Should not have folded in an offset for a tagged global!\"" , "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp" , 913, __extension__ __PRETTY_FUNCTION__)); | |||
914 | ADRP = MIRBuilder.buildInstr(AArch64::MOVKXi, {LLT::pointer(0, 64)}, {ADRP}) | |||
915 | .addGlobalAddress(GV, 0x100000000, | |||
916 | AArch64II::MO_PREL | AArch64II::MO_G3) | |||
917 | .addImm(48); | |||
918 | MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass); | |||
919 | } | |||
920 | ||||
921 | MIRBuilder.buildInstr(AArch64::G_ADD_LOW, {DstReg}, {ADRP}) | |||
922 | .addGlobalAddress(GV, Offset, | |||
923 | OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); | |||
924 | MI.eraseFromParent(); | |||
925 | return true; | |||
926 | } | |||
927 | ||||
928 | bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper, | |||
929 | MachineInstr &MI) const { | |||
930 | return true; | |||
931 | } | |||
932 | ||||
933 | bool AArch64LegalizerInfo::legalizeShlAshrLshr( | |||
934 | MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder, | |||
935 | GISelChangeObserver &Observer) const { | |||
936 | assert(MI.getOpcode() == TargetOpcode::G_ASHR ||(static_cast <bool> (MI.getOpcode() == TargetOpcode::G_ASHR || MI.getOpcode() == TargetOpcode::G_LSHR || MI.getOpcode() == TargetOpcode::G_SHL) ? void (0) : __assert_fail ("MI.getOpcode() == TargetOpcode::G_ASHR || MI.getOpcode() == TargetOpcode::G_LSHR || MI.getOpcode() == TargetOpcode::G_SHL" , "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp" , 938, __extension__ __PRETTY_FUNCTION__)) | |||
937 | MI.getOpcode() == TargetOpcode::G_LSHR ||(static_cast <bool> (MI.getOpcode() == TargetOpcode::G_ASHR || MI.getOpcode() == TargetOpcode::G_LSHR || MI.getOpcode() == TargetOpcode::G_SHL) ? void (0) : __assert_fail ("MI.getOpcode() == TargetOpcode::G_ASHR || MI.getOpcode() == TargetOpcode::G_LSHR || MI.getOpcode() == TargetOpcode::G_SHL" , "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp" , 938, __extension__ __PRETTY_FUNCTION__)) | |||
938 | MI.getOpcode() == TargetOpcode::G_SHL)(static_cast <bool> (MI.getOpcode() == TargetOpcode::G_ASHR || MI.getOpcode() == TargetOpcode::G_LSHR || MI.getOpcode() == TargetOpcode::G_SHL) ? void (0) : __assert_fail ("MI.getOpcode() == TargetOpcode::G_ASHR || MI.getOpcode() == TargetOpcode::G_LSHR || MI.getOpcode() == TargetOpcode::G_SHL" , "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp" , 938, __extension__ __PRETTY_FUNCTION__)); | |||
939 | // If the shift amount is a G_CONSTANT, promote it to a 64 bit type so the | |||
940 | // imported patterns can select it later. Either way, it will be legal. | |||
941 | Register AmtReg = MI.getOperand(2).getReg(); | |||
942 | auto VRegAndVal = getConstantVRegValWithLookThrough(AmtReg, MRI); | |||
943 | if (!VRegAndVal) | |||
944 | return true; | |||
945 | // Check the shift amount is in range for an immediate form. | |||
946 | int64_t Amount = VRegAndVal->Value.getSExtValue(); | |||
947 | if (Amount > 31) | |||
948 | return true; // This will have to remain a register variant. | |||
949 | auto ExtCst = MIRBuilder.buildConstant(LLT::scalar(64), Amount); | |||
950 | Observer.changingInstr(MI); | |||
951 | MI.getOperand(2).setReg(ExtCst.getReg(0)); | |||
952 | Observer.changedInstr(MI); | |||
953 | return true; | |||
954 | } | |||
955 | ||||
956 | bool AArch64LegalizerInfo::legalizeLoadStore( | |||
957 | MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder, | |||
958 | GISelChangeObserver &Observer) const { | |||
959 | assert(MI.getOpcode() == TargetOpcode::G_STORE ||(static_cast <bool> (MI.getOpcode() == TargetOpcode::G_STORE || MI.getOpcode() == TargetOpcode::G_LOAD) ? void (0) : __assert_fail ("MI.getOpcode() == TargetOpcode::G_STORE || MI.getOpcode() == TargetOpcode::G_LOAD" , "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp" , 960, __extension__ __PRETTY_FUNCTION__)) | |||
960 | MI.getOpcode() == TargetOpcode::G_LOAD)(static_cast <bool> (MI.getOpcode() == TargetOpcode::G_STORE || MI.getOpcode() == TargetOpcode::G_LOAD) ? void (0) : __assert_fail ("MI.getOpcode() == TargetOpcode::G_STORE || MI.getOpcode() == TargetOpcode::G_LOAD" , "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp" , 960, __extension__ __PRETTY_FUNCTION__)); | |||
961 | // Here we just try to handle vector loads/stores where our value type might | |||
962 | // have pointer elements, which the SelectionDAG importer can't handle. To | |||
963 | // allow the existing patterns for s64 to fire for p0, we just try to bitcast | |||
964 | // the value to use s64 types. | |||
965 | ||||
966 | // Custom legalization requires the instruction, if not deleted, must be fully | |||
967 | // legalized. In order to allow further legalization of the inst, we create | |||
968 | // a new instruction and erase the existing one. | |||
969 | ||||
970 | Register ValReg = MI.getOperand(0).getReg(); | |||
971 | const LLT ValTy = MRI.getType(ValReg); | |||
972 | ||||
973 | if (!ValTy.isVector() || !ValTy.getElementType().isPointer() || | |||
974 | ValTy.getElementType().getAddressSpace() != 0) { | |||
975 | LLVM_DEBUG(dbgs() << "Tried to do custom legalization on wrong load/store")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-legalinfo")) { dbgs() << "Tried to do custom legalization on wrong load/store" ; } } while (false); | |||
976 | return false; | |||
977 | } | |||
978 | ||||
979 | unsigned PtrSize = ValTy.getElementType().getSizeInBits(); | |||
980 | const LLT NewTy = LLT::vector(ValTy.getNumElements(), PtrSize); | |||
981 | auto &MMO = **MI.memoperands_begin(); | |||
982 | if (MI.getOpcode() == TargetOpcode::G_STORE) { | |||
983 | auto Bitcast = MIRBuilder.buildBitcast(NewTy, ValReg); | |||
984 | MIRBuilder.buildStore(Bitcast.getReg(0), MI.getOperand(1), MMO); | |||
985 | } else { | |||
986 | auto NewLoad = MIRBuilder.buildLoad(NewTy, MI.getOperand(1), MMO); | |||
987 | MIRBuilder.buildBitcast(ValReg, NewLoad); | |||
988 | } | |||
989 | MI.eraseFromParent(); | |||
990 | return true; | |||
991 | } | |||
992 | ||||
993 | bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI, | |||
994 | MachineRegisterInfo &MRI, | |||
995 | MachineIRBuilder &MIRBuilder) const { | |||
996 | MachineFunction &MF = MIRBuilder.getMF(); | |||
997 | Align Alignment(MI.getOperand(2).getImm()); | |||
998 | Register Dst = MI.getOperand(0).getReg(); | |||
999 | Register ListPtr = MI.getOperand(1).getReg(); | |||
1000 | ||||
1001 | LLT PtrTy = MRI.getType(ListPtr); | |||
1002 | LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits()); | |||
1003 | ||||
1004 | const unsigned PtrSize = PtrTy.getSizeInBits() / 8; | |||
1005 | const Align PtrAlign = Align(PtrSize); | |||
1006 | auto List = MIRBuilder.buildLoad( | |||
1007 | PtrTy, ListPtr, | |||
1008 | *MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOLoad, | |||
1009 | PtrSize, PtrAlign)); | |||
1010 | ||||
1011 | MachineInstrBuilder DstPtr; | |||
1012 | if (Alignment > PtrAlign) { | |||
1013 | // Realign the list to the actual required alignment. | |||
1014 | auto AlignMinus1 = | |||
1015 | MIRBuilder.buildConstant(IntPtrTy, Alignment.value() - 1); | |||
1016 | auto ListTmp = MIRBuilder.buildPtrAdd(PtrTy, List, AlignMinus1.getReg(0)); | |||
1017 | DstPtr = MIRBuilder.buildMaskLowPtrBits(PtrTy, ListTmp, Log2(Alignment)); | |||
1018 | } else | |||
1019 | DstPtr = List; | |||
1020 | ||||
1021 | uint64_t ValSize = MRI.getType(Dst).getSizeInBits() / 8; | |||
1022 | MIRBuilder.buildLoad( | |||
1023 | Dst, DstPtr, | |||
1024 | *MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOLoad, | |||
1025 | ValSize, std::max(Alignment, PtrAlign))); | |||
1026 | ||||
1027 | auto Size = MIRBuilder.buildConstant(IntPtrTy, alignTo(ValSize, PtrAlign)); | |||
1028 | ||||
1029 | auto NewList = MIRBuilder.buildPtrAdd(PtrTy, DstPtr, Size.getReg(0)); | |||
1030 | ||||
1031 | MIRBuilder.buildStore(NewList, ListPtr, | |||
1032 | *MF.getMachineMemOperand(MachinePointerInfo(), | |||
1033 | MachineMemOperand::MOStore, | |||
1034 | PtrSize, PtrAlign)); | |||
1035 | ||||
1036 | MI.eraseFromParent(); | |||
1037 | return true; | |||
1038 | } | |||
1039 | ||||
1040 | bool AArch64LegalizerInfo::legalizeBitfieldExtract( | |||
1041 | MachineInstr &MI, MachineRegisterInfo &MRI, LegalizerHelper &Helper) const { | |||
1042 | // Only legal if we can select immediate forms. | |||
1043 | // TODO: Lower this otherwise. | |||
1044 | return getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI) && | |||
1045 | getConstantVRegValWithLookThrough(MI.getOperand(3).getReg(), MRI); | |||
1046 | } | |||
1047 | ||||
1048 | bool AArch64LegalizerInfo::legalizeCTPOP(MachineInstr &MI, | |||
1049 | MachineRegisterInfo &MRI, | |||
1050 | LegalizerHelper &Helper) const { | |||
1051 | // While there is no integer popcount instruction, it can | |||
1052 | // be more efficiently lowered to the following sequence that uses | |||
1053 | // AdvSIMD registers/instructions as long as the copies to/from | |||
1054 | // the AdvSIMD registers are cheap. | |||
1055 | // FMOV D0, X0 // copy 64-bit int to vector, high bits zero'd | |||
1056 | // CNT V0.8B, V0.8B // 8xbyte pop-counts | |||
1057 | // ADDV B0, V0.8B // sum 8xbyte pop-counts | |||
1058 | // UMOV X0, V0.B[0] // copy byte result back to integer reg | |||
1059 | if (!ST->hasNEON() || | |||
1060 | MI.getMF()->getFunction().hasFnAttribute(Attribute::NoImplicitFloat)) | |||
1061 | return false; | |||
1062 | MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; | |||
1063 | Register Dst = MI.getOperand(0).getReg(); | |||
1064 | Register Val = MI.getOperand(1).getReg(); | |||
1065 | LLT Ty = MRI.getType(Val); | |||
1066 | ||||
1067 | // TODO: Handle vector types. | |||
1068 | assert(!Ty.isVector() && "Vector types not handled yet!")(static_cast <bool> (!Ty.isVector() && "Vector types not handled yet!" ) ? void (0) : __assert_fail ("!Ty.isVector() && \"Vector types not handled yet!\"" , "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp" , 1068, __extension__ __PRETTY_FUNCTION__)); | |||
1069 | assert(Ty == MRI.getType(Dst) &&(static_cast <bool> (Ty == MRI.getType(Dst) && "Expected src and dst to have the same type!" ) ? void (0) : __assert_fail ("Ty == MRI.getType(Dst) && \"Expected src and dst to have the same type!\"" , "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp" , 1070, __extension__ __PRETTY_FUNCTION__)) | |||
1070 | "Expected src and dst to have the same type!")(static_cast <bool> (Ty == MRI.getType(Dst) && "Expected src and dst to have the same type!" ) ? void (0) : __assert_fail ("Ty == MRI.getType(Dst) && \"Expected src and dst to have the same type!\"" , "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp" , 1070, __extension__ __PRETTY_FUNCTION__)); | |||
1071 | // TODO: Handle s128. | |||
1072 | unsigned Size = Ty.getSizeInBits(); | |||
1073 | assert((Size == 32 || Size == 64) && "Expected only 32 or 64 bit scalars!")(static_cast <bool> ((Size == 32 || Size == 64) && "Expected only 32 or 64 bit scalars!") ? void (0) : __assert_fail ("(Size == 32 || Size == 64) && \"Expected only 32 or 64 bit scalars!\"" , "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp" , 1073, __extension__ __PRETTY_FUNCTION__)); | |||
1074 | if (Size == 32) | |||
1075 | Val = MIRBuilder.buildZExt(LLT::scalar(64), Val).getReg(0); | |||
1076 | const LLT V8S8 = LLT::vector(8, LLT::scalar(8)); | |||
1077 | Val = MIRBuilder.buildBitcast(V8S8, Val).getReg(0); | |||
1078 | auto CTPOP = MIRBuilder.buildCTPOP(V8S8, Val); | |||
1079 | auto UADDLV = | |||
1080 | MIRBuilder | |||
1081 | .buildIntrinsic(Intrinsic::aarch64_neon_uaddlv, {LLT::scalar(32)}, | |||
1082 | /*HasSideEffects = */ false) | |||
1083 | .addUse(CTPOP.getReg(0)); | |||
1084 | if (Size == 64) | |||
1085 | MIRBuilder.buildZExt(Dst, UADDLV); | |||
1086 | else | |||
1087 | UADDLV->getOperand(0).setReg(Dst); | |||
1088 | MI.eraseFromParent(); | |||
1089 | return true; | |||
1090 | } | |||
1091 | ||||
1092 | bool AArch64LegalizerInfo::legalizeAtomicCmpxchg128( | |||
1093 | MachineInstr &MI, MachineRegisterInfo &MRI, LegalizerHelper &Helper) const { | |||
1094 | MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; | |||
1095 | LLT s64 = LLT::scalar(64); | |||
1096 | auto Addr = MI.getOperand(1).getReg(); | |||
1097 | auto DesiredI = MIRBuilder.buildUnmerge({s64, s64}, MI.getOperand(2)); | |||
1098 | auto NewI = MIRBuilder.buildUnmerge({s64, s64}, MI.getOperand(3)); | |||
1099 | auto DstLo = MRI.createGenericVirtualRegister(s64); | |||
1100 | auto DstHi = MRI.createGenericVirtualRegister(s64); | |||
1101 | ||||
1102 | MachineInstrBuilder CAS; | |||
1103 | if (ST->hasLSE()) { | |||
1104 | // We have 128-bit CASP instructions taking XSeqPair registers, which are | |||
1105 | // s128. We need the merge/unmerge to bracket the expansion and pair up with | |||
1106 | // the rest of the MIR so we must reassemble the extracted registers into a | |||
1107 | // 128-bit known-regclass one with code like this: | |||
1108 | // | |||
1109 | // %in1 = REG_SEQUENCE Lo, Hi ; One for each input | |||
1110 | // %out = CASP %in1, ... | |||
1111 | // %OldLo = G_EXTRACT %out, 0 | |||
1112 | // %OldHi = G_EXTRACT %out, 64 | |||
1113 | auto Ordering = (*MI.memoperands_begin())->getMergedOrdering(); | |||
1114 | unsigned Opcode; | |||
1115 | switch (Ordering) { | |||
1116 | case AtomicOrdering::Acquire: | |||
1117 | Opcode = AArch64::CASPAX; | |||
1118 | break; | |||
1119 | case AtomicOrdering::Release: | |||
1120 | Opcode = AArch64::CASPLX; | |||
1121 | break; | |||
1122 | case AtomicOrdering::AcquireRelease: | |||
1123 | case AtomicOrdering::SequentiallyConsistent: | |||
1124 | Opcode = AArch64::CASPALX; | |||
1125 | break; | |||
1126 | default: | |||
1127 | Opcode = AArch64::CASPX; | |||
1128 | break; | |||
1129 | } | |||
1130 | ||||
1131 | LLT s128 = LLT::scalar(128); | |||
1132 | auto CASDst = MRI.createGenericVirtualRegister(s128); | |||
1133 | auto CASDesired = MRI.createGenericVirtualRegister(s128); | |||
1134 | auto CASNew = MRI.createGenericVirtualRegister(s128); | |||
1135 | MIRBuilder.buildInstr(TargetOpcode::REG_SEQUENCE, {CASDesired}, {}) | |||
1136 | .addUse(DesiredI->getOperand(0).getReg()) | |||
1137 | .addImm(AArch64::sube64) | |||
1138 | .addUse(DesiredI->getOperand(1).getReg()) | |||
1139 | .addImm(AArch64::subo64); | |||
1140 | MIRBuilder.buildInstr(TargetOpcode::REG_SEQUENCE, {CASNew}, {}) | |||
1141 | .addUse(NewI->getOperand(0).getReg()) | |||
1142 | .addImm(AArch64::sube64) | |||
1143 | .addUse(NewI->getOperand(1).getReg()) | |||
1144 | .addImm(AArch64::subo64); | |||
1145 | ||||
1146 | CAS = MIRBuilder.buildInstr(Opcode, {CASDst}, {CASDesired, CASNew, Addr}); | |||
1147 | ||||
1148 | MIRBuilder.buildExtract({DstLo}, {CASDst}, 0); | |||
1149 | MIRBuilder.buildExtract({DstHi}, {CASDst}, 64); | |||
1150 | } else { | |||
1151 | // The -O0 CMP_SWAP_128 is friendlier to generate code for because LDXP/STXP | |||
1152 | // can take arbitrary registers so it just has the normal GPR64 operands the | |||
1153 | // rest of AArch64 is expecting. | |||
1154 | auto Scratch = MRI.createVirtualRegister(&AArch64::GPR64RegClass); | |||
1155 | CAS = MIRBuilder.buildInstr(AArch64::CMP_SWAP_128, {DstLo, DstHi, Scratch}, | |||
1156 | {Addr, DesiredI->getOperand(0), | |||
1157 | DesiredI->getOperand(1), NewI->getOperand(0), | |||
1158 | NewI->getOperand(1)}); | |||
1159 | } | |||
1160 | ||||
1161 | CAS.cloneMemRefs(MI); | |||
1162 | constrainSelectedInstRegOperands(*CAS, *ST->getInstrInfo(), | |||
1163 | *MRI.getTargetRegisterInfo(), | |||
1164 | *ST->getRegBankInfo()); | |||
1165 | ||||
1166 | MIRBuilder.buildMerge(MI.getOperand(0), {DstLo, DstHi}); | |||
1167 | MI.eraseFromParent(); | |||
1168 | return true; | |||
1169 | } | |||
1170 | ||||
1171 | bool AArch64LegalizerInfo::legalizeCTTZ(MachineInstr &MI, | |||
1172 | LegalizerHelper &Helper) const { | |||
1173 | MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; | |||
1174 | MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); | |||
1175 | LLT Ty = MRI.getType(MI.getOperand(1).getReg()); | |||
1176 | auto BitReverse = MIRBuilder.buildBitReverse(Ty, MI.getOperand(1)); | |||
1177 | MIRBuilder.buildCTLZ(MI.getOperand(0).getReg(), BitReverse); | |||
1178 | MI.eraseFromParent(); | |||
1179 | return true; | |||
1180 | } |
1 | //== llvm/Support/LowLevelTypeImpl.h --------------------------- -*- C++ -*-==// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /// \file |
9 | /// Implement a low-level type suitable for MachineInstr level instruction |
10 | /// selection. |
11 | /// |
12 | /// For a type attached to a MachineInstr, we only care about 2 details: total |
13 | /// size and the number of vector lanes (if any). Accordingly, there are 4 |
14 | /// possible valid type-kinds: |
15 | /// |
16 | /// * `sN` for scalars and aggregates |
17 | /// * `<N x sM>` for vectors, which must have at least 2 elements. |
18 | /// * `pN` for pointers |
19 | /// |
20 | /// Other information required for correct selection is expected to be carried |
21 | /// by the opcode, or non-type flags. For example the distinction between G_ADD |
22 | /// and G_FADD for int/float or fast-math flags. |
23 | /// |
24 | //===----------------------------------------------------------------------===// |
25 | |
26 | #ifndef LLVM_SUPPORT_LOWLEVELTYPEIMPL_H |
27 | #define LLVM_SUPPORT_LOWLEVELTYPEIMPL_H |
28 | |
29 | #include "llvm/ADT/DenseMapInfo.h" |
30 | #include "llvm/Support/Debug.h" |
31 | #include "llvm/Support/MachineValueType.h" |
32 | #include <cassert> |
33 | |
34 | namespace llvm { |
35 | |
36 | class DataLayout; |
37 | class Type; |
38 | class raw_ostream; |
39 | |
40 | class LLT { |
41 | public: |
42 | /// Get a low-level scalar or aggregate "bag of bits". |
43 | static LLT scalar(unsigned SizeInBits) { |
44 | assert(SizeInBits > 0 && "invalid scalar size")(static_cast <bool> (SizeInBits > 0 && "invalid scalar size" ) ? void (0) : __assert_fail ("SizeInBits > 0 && \"invalid scalar size\"" , "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/include/llvm/Support/LowLevelTypeImpl.h" , 44, __extension__ __PRETTY_FUNCTION__)); |
45 | return LLT{/*isPointer=*/false, /*isVector=*/false, /*NumElements=*/0, |
46 | SizeInBits, /*AddressSpace=*/0}; |
47 | } |
48 | |
49 | /// Get a low-level pointer in the given address space. |
50 | static LLT pointer(unsigned AddressSpace, unsigned SizeInBits) { |
51 | assert(SizeInBits > 0 && "invalid pointer size")(static_cast <bool> (SizeInBits > 0 && "invalid pointer size" ) ? void (0) : __assert_fail ("SizeInBits > 0 && \"invalid pointer size\"" , "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/include/llvm/Support/LowLevelTypeImpl.h" , 51, __extension__ __PRETTY_FUNCTION__)); |
52 | return LLT{/*isPointer=*/true, /*isVector=*/false, /*NumElements=*/0, |
53 | SizeInBits, AddressSpace}; |
54 | } |
55 | |
56 | /// Get a low-level vector of some number of elements and element width. |
57 | /// \p NumElements must be at least 2. |
58 | static LLT vector(uint16_t NumElements, unsigned ScalarSizeInBits) { |
59 | assert(NumElements > 1 && "invalid number of vector elements")(static_cast <bool> (NumElements > 1 && "invalid number of vector elements" ) ? void (0) : __assert_fail ("NumElements > 1 && \"invalid number of vector elements\"" , "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/include/llvm/Support/LowLevelTypeImpl.h" , 59, __extension__ __PRETTY_FUNCTION__)); |
60 | assert(ScalarSizeInBits > 0 && "invalid vector element size")(static_cast <bool> (ScalarSizeInBits > 0 && "invalid vector element size") ? void (0) : __assert_fail ("ScalarSizeInBits > 0 && \"invalid vector element size\"" , "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/include/llvm/Support/LowLevelTypeImpl.h" , 60, __extension__ __PRETTY_FUNCTION__)); |
61 | return LLT{/*isPointer=*/false, /*isVector=*/true, NumElements, |
62 | ScalarSizeInBits, /*AddressSpace=*/0}; |
63 | } |
64 | |
65 | /// Get a low-level vector of some number of elements and element type. |
66 | static LLT vector(uint16_t NumElements, LLT ScalarTy) { |
67 | assert(NumElements > 1 && "invalid number of vector elements")(static_cast <bool> (NumElements > 1 && "invalid number of vector elements" ) ? void (0) : __assert_fail ("NumElements > 1 && \"invalid number of vector elements\"" , "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/include/llvm/Support/LowLevelTypeImpl.h" , 67, __extension__ __PRETTY_FUNCTION__)); |
68 | assert(!ScalarTy.isVector() && "invalid vector element type")(static_cast <bool> (!ScalarTy.isVector() && "invalid vector element type" ) ? void (0) : __assert_fail ("!ScalarTy.isVector() && \"invalid vector element type\"" , "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/include/llvm/Support/LowLevelTypeImpl.h" , 68, __extension__ __PRETTY_FUNCTION__)); |
69 | return LLT{ScalarTy.isPointer(), /*isVector=*/true, NumElements, |
70 | ScalarTy.getSizeInBits(), |
71 | ScalarTy.isPointer() ? ScalarTy.getAddressSpace() : 0}; |
72 | } |
73 | |
74 | static LLT scalarOrVector(uint16_t NumElements, LLT ScalarTy) { |
75 | return NumElements == 1 ? ScalarTy : LLT::vector(NumElements, ScalarTy); |
76 | } |
77 | |
78 | static LLT scalarOrVector(uint16_t NumElements, unsigned ScalarSize) { |
79 | return scalarOrVector(NumElements, LLT::scalar(ScalarSize)); |
80 | } |
81 | |
82 | explicit LLT(bool isPointer, bool isVector, uint16_t NumElements, |
83 | unsigned SizeInBits, unsigned AddressSpace) { |
84 | init(isPointer, isVector, NumElements, SizeInBits, AddressSpace); |
85 | } |
86 | explicit LLT() : IsPointer(false), IsVector(false), RawData(0) {} |
87 | |
88 | explicit LLT(MVT VT); |
89 | |
90 | bool isValid() const { return RawData != 0; } |
91 | |
92 | bool isScalar() const { return isValid() && !IsPointer && !IsVector; } |
93 | |
94 | bool isPointer() const { return isValid() && IsPointer && !IsVector; } |
95 | |
96 | bool isVector() const { return isValid() && IsVector; } |
97 | |
98 | /// Returns the number of elements in a vector LLT. Must only be called on |
99 | /// vector types. |
100 | uint16_t getNumElements() const { |
101 | assert(IsVector && "cannot get number of elements on scalar/aggregate")(static_cast <bool> (IsVector && "cannot get number of elements on scalar/aggregate" ) ? void (0) : __assert_fail ("IsVector && \"cannot get number of elements on scalar/aggregate\"" , "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/include/llvm/Support/LowLevelTypeImpl.h" , 101, __extension__ __PRETTY_FUNCTION__)); |
102 | if (!IsPointer) |
103 | return getFieldValue(VectorElementsFieldInfo); |
104 | else |
105 | return getFieldValue(PointerVectorElementsFieldInfo); |
106 | } |
107 | |
108 | /// Returns the total size of the type. Must only be called on sized types. |
109 | unsigned getSizeInBits() const { |
110 | if (isPointer() || isScalar()) |
111 | return getScalarSizeInBits(); |
112 | return getScalarSizeInBits() * getNumElements(); |
113 | } |
114 | |
115 | /// Returns the total size of the type in bytes, i.e. number of whole bytes |
116 | /// needed to represent the size in bits. Must only be called on sized types. |
117 | unsigned getSizeInBytes() const { |
118 | return (getSizeInBits() + 7) / 8; |
119 | } |
120 | |
121 | LLT getScalarType() const { |
122 | return isVector() ? getElementType() : *this; |
123 | } |
124 | |
125 | /// If this type is a vector, return a vector with the same number of elements |
126 | /// but the new element type. Otherwise, return the new element type. |
127 | LLT changeElementType(LLT NewEltTy) const { |
128 | return isVector() ? LLT::vector(getNumElements(), NewEltTy) : NewEltTy; |
129 | } |
130 | |
131 | /// If this type is a vector, return a vector with the same number of elements |
132 | /// but the new element size. Otherwise, return the new element type. Invalid |
133 | /// for pointer types. For pointer types, use changeElementType. |
134 | LLT changeElementSize(unsigned NewEltSize) const { |
135 | assert(!getScalarType().isPointer() &&(static_cast <bool> (!getScalarType().isPointer() && "invalid to directly change element size for pointers") ? void (0) : __assert_fail ("!getScalarType().isPointer() && \"invalid to directly change element size for pointers\"" , "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/include/llvm/Support/LowLevelTypeImpl.h" , 136, __extension__ __PRETTY_FUNCTION__)) |
136 | "invalid to directly change element size for pointers")(static_cast <bool> (!getScalarType().isPointer() && "invalid to directly change element size for pointers") ? void (0) : __assert_fail ("!getScalarType().isPointer() && \"invalid to directly change element size for pointers\"" , "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/include/llvm/Support/LowLevelTypeImpl.h" , 136, __extension__ __PRETTY_FUNCTION__)); |
137 | return isVector() ? LLT::vector(getNumElements(), NewEltSize) |
138 | : LLT::scalar(NewEltSize); |
139 | } |
140 | |
141 | /// Return a vector or scalar with the same element type and the new number of |
142 | /// elements. |
143 | LLT changeNumElements(unsigned NewNumElts) const { |
144 | return LLT::scalarOrVector(NewNumElts, getScalarType()); |
145 | } |
146 | |
147 | /// Return a type that is \p Factor times smaller. Reduces the number of |
148 | /// elements if this is a vector, or the bitwidth for scalar/pointers. Does |
149 | /// not attempt to handle cases that aren't evenly divisible. |
150 | LLT divide(int Factor) const { |
151 | assert(Factor != 1)(static_cast <bool> (Factor != 1) ? void (0) : __assert_fail ("Factor != 1", "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/include/llvm/Support/LowLevelTypeImpl.h" , 151, __extension__ __PRETTY_FUNCTION__)); |
152 | if (isVector()) { |
153 | assert(getNumElements() % Factor == 0)(static_cast <bool> (getNumElements() % Factor == 0) ? void (0) : __assert_fail ("getNumElements() % Factor == 0", "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/include/llvm/Support/LowLevelTypeImpl.h" , 153, __extension__ __PRETTY_FUNCTION__)); |
154 | return scalarOrVector(getNumElements() / Factor, getElementType()); |
155 | } |
156 | |
157 | assert(getSizeInBits() % Factor == 0)(static_cast <bool> (getSizeInBits() % Factor == 0) ? void (0) : __assert_fail ("getSizeInBits() % Factor == 0", "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/include/llvm/Support/LowLevelTypeImpl.h" , 157, __extension__ __PRETTY_FUNCTION__)); |
158 | return scalar(getSizeInBits() / Factor); |
159 | } |
160 | |
161 | bool isByteSized() const { return (getSizeInBits() & 7) == 0; } |
162 | |
163 | unsigned getScalarSizeInBits() const { |
164 | assert(RawData != 0 && "Invalid Type")(static_cast <bool> (RawData != 0 && "Invalid Type" ) ? void (0) : __assert_fail ("RawData != 0 && \"Invalid Type\"" , "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/include/llvm/Support/LowLevelTypeImpl.h" , 164, __extension__ __PRETTY_FUNCTION__)); |
165 | if (!IsVector) { |
166 | if (!IsPointer) |
167 | return getFieldValue(ScalarSizeFieldInfo); |
168 | else |
169 | return getFieldValue(PointerSizeFieldInfo); |
170 | } else { |
171 | if (!IsPointer) |
172 | return getFieldValue(VectorSizeFieldInfo); |
173 | else |
174 | return getFieldValue(PointerVectorSizeFieldInfo); |
175 | } |
176 | } |
177 | |
178 | unsigned getAddressSpace() const { |
179 | assert(RawData != 0 && "Invalid Type")(static_cast <bool> (RawData != 0 && "Invalid Type" ) ? void (0) : __assert_fail ("RawData != 0 && \"Invalid Type\"" , "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/include/llvm/Support/LowLevelTypeImpl.h" , 179, __extension__ __PRETTY_FUNCTION__)); |
180 | assert(IsPointer && "cannot get address space of non-pointer type")(static_cast <bool> (IsPointer && "cannot get address space of non-pointer type" ) ? void (0) : __assert_fail ("IsPointer && \"cannot get address space of non-pointer type\"" , "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/include/llvm/Support/LowLevelTypeImpl.h" , 180, __extension__ __PRETTY_FUNCTION__)); |
181 | if (!IsVector) |
182 | return getFieldValue(PointerAddressSpaceFieldInfo); |
183 | else |
184 | return getFieldValue(PointerVectorAddressSpaceFieldInfo); |
185 | } |
186 | |
187 | /// Returns the vector's element type. Only valid for vector types. |
188 | LLT getElementType() const { |
189 | assert(isVector() && "cannot get element type of scalar/aggregate")(static_cast <bool> (isVector() && "cannot get element type of scalar/aggregate" ) ? void (0) : __assert_fail ("isVector() && \"cannot get element type of scalar/aggregate\"" , "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/include/llvm/Support/LowLevelTypeImpl.h" , 189, __extension__ __PRETTY_FUNCTION__)); |
190 | if (IsPointer) |
191 | return pointer(getAddressSpace(), getScalarSizeInBits()); |
192 | else |
193 | return scalar(getScalarSizeInBits()); |
194 | } |
195 | |
196 | void print(raw_ostream &OS) const; |
197 | |
198 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
199 | LLVM_DUMP_METHOD__attribute__((noinline)) __attribute__((__used__)) void dump() const { |
200 | print(dbgs()); |
201 | dbgs() << '\n'; |
202 | } |
203 | #endif |
204 | |
205 | bool operator==(const LLT &RHS) const { |
206 | return IsPointer == RHS.IsPointer && IsVector == RHS.IsVector && |
207 | RHS.RawData == RawData; |
208 | } |
209 | |
210 | bool operator!=(const LLT &RHS) const { return !(*this == RHS); } |
211 | |
212 | friend struct DenseMapInfo<LLT>; |
213 | friend class GISelInstProfileBuilder; |
214 | |
215 | private: |
216 | /// LLT is packed into 64 bits as follows: |
217 | /// isPointer : 1 |
218 | /// isVector : 1 |
219 | /// with 62 bits remaining for Kind-specific data, packed in bitfields |
220 | /// as described below. As there isn't a simple portable way to pack bits |
221 | /// into bitfields, here the different fields in the packed structure is |
222 | /// described in static const *Field variables. Each of these variables |
223 | /// is a 2-element array, with the first element describing the bitfield size |
224 | /// and the second element describing the bitfield offset. |
225 | typedef int BitFieldInfo[2]; |
226 | /// |
227 | /// This is how the bitfields are packed per Kind: |
228 | /// * Invalid: |
229 | /// gets encoded as RawData == 0, as that is an invalid encoding, since for |
230 | /// valid encodings, SizeInBits/SizeOfElement must be larger than 0. |
231 | /// * Non-pointer scalar (isPointer == 0 && isVector == 0): |
232 | /// SizeInBits: 32; |
233 | static const constexpr BitFieldInfo ScalarSizeFieldInfo{32, 0}; |
234 | /// * Pointer (isPointer == 1 && isVector == 0): |
235 | /// SizeInBits: 16; |
236 | /// AddressSpace: 24; |
237 | static const constexpr BitFieldInfo PointerSizeFieldInfo{16, 0}; |
238 | static const constexpr BitFieldInfo PointerAddressSpaceFieldInfo{ |
239 | 24, PointerSizeFieldInfo[0] + PointerSizeFieldInfo[1]}; |
240 | /// * Vector-of-non-pointer (isPointer == 0 && isVector == 1): |
241 | /// NumElements: 16; |
242 | /// SizeOfElement: 32; |
243 | static const constexpr BitFieldInfo VectorElementsFieldInfo{16, 0}; |
244 | static const constexpr BitFieldInfo VectorSizeFieldInfo{ |
245 | 32, VectorElementsFieldInfo[0] + VectorElementsFieldInfo[1]}; |
246 | /// * Vector-of-pointer (isPointer == 1 && isVector == 1): |
247 | /// NumElements: 16; |
248 | /// SizeOfElement: 16; |
249 | /// AddressSpace: 24; |
250 | static const constexpr BitFieldInfo PointerVectorElementsFieldInfo{16, 0}; |
251 | static const constexpr BitFieldInfo PointerVectorSizeFieldInfo{ |
252 | 16, |
253 | PointerVectorElementsFieldInfo[1] + PointerVectorElementsFieldInfo[0]}; |
254 | static const constexpr BitFieldInfo PointerVectorAddressSpaceFieldInfo{ |
255 | 24, PointerVectorSizeFieldInfo[1] + PointerVectorSizeFieldInfo[0]}; |
256 | |
257 | uint64_t IsPointer : 1; |
258 | uint64_t IsVector : 1; |
259 | uint64_t RawData : 62; |
260 | |
261 | static uint64_t getMask(const BitFieldInfo FieldInfo) { |
262 | const int FieldSizeInBits = FieldInfo[0]; |
263 | return (((uint64_t)1) << FieldSizeInBits) - 1; |
264 | } |
265 | static uint64_t maskAndShift(uint64_t Val, uint64_t Mask, uint8_t Shift) { |
266 | assert(Val <= Mask && "Value too large for field")(static_cast <bool> (Val <= Mask && "Value too large for field" ) ? void (0) : __assert_fail ("Val <= Mask && \"Value too large for field\"" , "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/include/llvm/Support/LowLevelTypeImpl.h" , 266, __extension__ __PRETTY_FUNCTION__)); |
267 | return (Val & Mask) << Shift; |
268 | } |
269 | static uint64_t maskAndShift(uint64_t Val, const BitFieldInfo FieldInfo) { |
270 | return maskAndShift(Val, getMask(FieldInfo), FieldInfo[1]); |
271 | } |
272 | uint64_t getFieldValue(const BitFieldInfo FieldInfo) const { |
273 | return getMask(FieldInfo) & (RawData >> FieldInfo[1]); |
274 | } |
275 | |
276 | void init(bool IsPointer, bool IsVector, uint16_t NumElements, |
277 | unsigned SizeInBits, unsigned AddressSpace) { |
278 | this->IsPointer = IsPointer; |
279 | this->IsVector = IsVector; |
280 | if (!IsVector) { |
281 | if (!IsPointer) |
282 | RawData = maskAndShift(SizeInBits, ScalarSizeFieldInfo); |
283 | else |
284 | RawData = maskAndShift(SizeInBits, PointerSizeFieldInfo) | |
285 | maskAndShift(AddressSpace, PointerAddressSpaceFieldInfo); |
286 | } else { |
287 | assert(NumElements > 1 && "invalid number of vector elements")(static_cast <bool> (NumElements > 1 && "invalid number of vector elements" ) ? void (0) : __assert_fail ("NumElements > 1 && \"invalid number of vector elements\"" , "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/include/llvm/Support/LowLevelTypeImpl.h" , 287, __extension__ __PRETTY_FUNCTION__)); |
288 | if (!IsPointer) |
289 | RawData = maskAndShift(NumElements, VectorElementsFieldInfo) | |
290 | maskAndShift(SizeInBits, VectorSizeFieldInfo); |
291 | else |
292 | RawData = |
293 | maskAndShift(NumElements, PointerVectorElementsFieldInfo) | |
294 | maskAndShift(SizeInBits, PointerVectorSizeFieldInfo) | |
295 | maskAndShift(AddressSpace, PointerVectorAddressSpaceFieldInfo); |
296 | } |
297 | } |
298 | |
299 | uint64_t getUniqueRAWLLTData() const { |
300 | return ((uint64_t)RawData) << 2 | ((uint64_t)IsPointer) << 1 | |
301 | ((uint64_t)IsVector); |
302 | } |
303 | }; |
304 | |
305 | inline raw_ostream& operator<<(raw_ostream &OS, const LLT &Ty) { |
306 | Ty.print(OS); |
307 | return OS; |
308 | } |
309 | |
310 | template<> struct DenseMapInfo<LLT> { |
311 | static inline LLT getEmptyKey() { |
312 | LLT Invalid; |
313 | Invalid.IsPointer = true; |
314 | return Invalid; |
315 | } |
316 | static inline LLT getTombstoneKey() { |
317 | LLT Invalid; |
318 | Invalid.IsVector = true; |
319 | return Invalid; |
320 | } |
321 | static inline unsigned getHashValue(const LLT &Ty) { |
322 | uint64_t Val = Ty.getUniqueRAWLLTData(); |
323 | return DenseMapInfo<uint64_t>::getHashValue(Val); |
324 | } |
325 | static bool isEqual(const LLT &LHS, const LLT &RHS) { |
326 | return LHS == RHS; |
327 | } |
328 | }; |
329 | |
330 | } |
331 | |
332 | #endif // LLVM_SUPPORT_LOWLEVELTYPEIMPL_H |