File: | llvm/include/llvm/Analysis/TargetTransformInfoImpl.h |
Warning: | line 76, column 25 Called C++ object pointer is null |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===-- X86TargetTransformInfo.cpp - X86 specific TTI pass ----------------===// | |||
2 | // | |||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |||
4 | // See https://llvm.org/LICENSE.txt for license information. | |||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |||
6 | // | |||
7 | //===----------------------------------------------------------------------===// | |||
8 | /// \file | |||
9 | /// This file implements a TargetTransformInfo analysis pass specific to the | |||
10 | /// X86 target machine. It uses the target's detailed information to provide | |||
11 | /// more precise answers to certain TTI queries, while letting the target | |||
12 | /// independent and default TTI implementations handle the rest. | |||
13 | /// | |||
14 | //===----------------------------------------------------------------------===// | |||
15 | /// About Cost Model numbers used below it's necessary to say the following: | |||
16 | /// the numbers correspond to some "generic" X86 CPU instead of usage of | |||
17 | /// concrete CPU model. Usually the numbers correspond to CPU where the feature | |||
18 | /// apeared at the first time. For example, if we do Subtarget.hasSSE42() in | |||
19 | /// the lookups below the cost is based on Nehalem as that was the first CPU | |||
20 | /// to support that feature level and thus has most likely the worst case cost. | |||
21 | /// Some examples of other technologies/CPUs: | |||
22 | /// SSE 3 - Pentium4 / Athlon64 | |||
23 | /// SSE 4.1 - Penryn | |||
24 | /// SSE 4.2 - Nehalem | |||
25 | /// AVX - Sandy Bridge | |||
26 | /// AVX2 - Haswell | |||
27 | /// AVX-512 - Xeon Phi / Skylake | |||
28 | /// And some examples of instruction target dependent costs (latency) | |||
29 | /// divss sqrtss rsqrtss | |||
30 | /// AMD K7 11-16 19 3 | |||
31 | /// Piledriver 9-24 13-15 5 | |||
32 | /// Jaguar 14 16 2 | |||
33 | /// Pentium II,III 18 30 2 | |||
34 | /// Nehalem 7-14 7-18 3 | |||
35 | /// Haswell 10-13 11 5 | |||
36 | /// TODO: Develop and implement the target dependent cost model and | |||
37 | /// specialize cost numbers for different Cost Model Targets such as throughput, | |||
38 | /// code size, latency and uop count. | |||
39 | //===----------------------------------------------------------------------===// | |||
40 | ||||
41 | #include "X86TargetTransformInfo.h" | |||
42 | #include "llvm/Analysis/TargetTransformInfo.h" | |||
43 | #include "llvm/CodeGen/BasicTTIImpl.h" | |||
44 | #include "llvm/CodeGen/CostTable.h" | |||
45 | #include "llvm/CodeGen/TargetLowering.h" | |||
46 | #include "llvm/IR/IntrinsicInst.h" | |||
47 | #include "llvm/Support/Debug.h" | |||
48 | ||||
49 | using namespace llvm; | |||
50 | ||||
51 | #define DEBUG_TYPE"x86tti" "x86tti" | |||
52 | ||||
53 | //===----------------------------------------------------------------------===// | |||
54 | // | |||
55 | // X86 cost model. | |||
56 | // | |||
57 | //===----------------------------------------------------------------------===// | |||
58 | ||||
59 | TargetTransformInfo::PopcntSupportKind | |||
60 | X86TTIImpl::getPopcntSupport(unsigned TyWidth) { | |||
61 | assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2")((isPowerOf2_32(TyWidth) && "Ty width must be power of 2" ) ? static_cast<void> (0) : __assert_fail ("isPowerOf2_32(TyWidth) && \"Ty width must be power of 2\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp" , 61, __PRETTY_FUNCTION__)); | |||
62 | // TODO: Currently the __builtin_popcount() implementation using SSE3 | |||
63 | // instructions is inefficient. Once the problem is fixed, we should | |||
64 | // call ST->hasSSE3() instead of ST->hasPOPCNT(). | |||
65 | return ST->hasPOPCNT() ? TTI::PSK_FastHardware : TTI::PSK_Software; | |||
66 | } | |||
67 | ||||
68 | llvm::Optional<unsigned> X86TTIImpl::getCacheSize( | |||
69 | TargetTransformInfo::CacheLevel Level) const { | |||
70 | switch (Level) { | |||
71 | case TargetTransformInfo::CacheLevel::L1D: | |||
72 | // - Penryn | |||
73 | // - Nehalem | |||
74 | // - Westmere | |||
75 | // - Sandy Bridge | |||
76 | // - Ivy Bridge | |||
77 | // - Haswell | |||
78 | // - Broadwell | |||
79 | // - Skylake | |||
80 | // - Kabylake | |||
81 | return 32 * 1024; // 32 KByte | |||
82 | case TargetTransformInfo::CacheLevel::L2D: | |||
83 | // - Penryn | |||
84 | // - Nehalem | |||
85 | // - Westmere | |||
86 | // - Sandy Bridge | |||
87 | // - Ivy Bridge | |||
88 | // - Haswell | |||
89 | // - Broadwell | |||
90 | // - Skylake | |||
91 | // - Kabylake | |||
92 | return 256 * 1024; // 256 KByte | |||
93 | } | |||
94 | ||||
95 | llvm_unreachable("Unknown TargetTransformInfo::CacheLevel")::llvm::llvm_unreachable_internal("Unknown TargetTransformInfo::CacheLevel" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp" , 95); | |||
96 | } | |||
97 | ||||
98 | llvm::Optional<unsigned> X86TTIImpl::getCacheAssociativity( | |||
99 | TargetTransformInfo::CacheLevel Level) const { | |||
100 | // - Penryn | |||
101 | // - Nehalem | |||
102 | // - Westmere | |||
103 | // - Sandy Bridge | |||
104 | // - Ivy Bridge | |||
105 | // - Haswell | |||
106 | // - Broadwell | |||
107 | // - Skylake | |||
108 | // - Kabylake | |||
109 | switch (Level) { | |||
110 | case TargetTransformInfo::CacheLevel::L1D: | |||
111 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | |||
112 | case TargetTransformInfo::CacheLevel::L2D: | |||
113 | return 8; | |||
114 | } | |||
115 | ||||
116 | llvm_unreachable("Unknown TargetTransformInfo::CacheLevel")::llvm::llvm_unreachable_internal("Unknown TargetTransformInfo::CacheLevel" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp" , 116); | |||
117 | } | |||
118 | ||||
119 | unsigned X86TTIImpl::getNumberOfRegisters(unsigned ClassID) const { | |||
120 | bool Vector = (ClassID == 1); | |||
121 | if (Vector && !ST->hasSSE1()) | |||
122 | return 0; | |||
123 | ||||
124 | if (ST->is64Bit()) { | |||
125 | if (Vector && ST->hasAVX512()) | |||
126 | return 32; | |||
127 | return 16; | |||
128 | } | |||
129 | return 8; | |||
130 | } | |||
131 | ||||
132 | unsigned X86TTIImpl::getRegisterBitWidth(bool Vector) const { | |||
133 | unsigned PreferVectorWidth = ST->getPreferVectorWidth(); | |||
134 | if (Vector) { | |||
135 | if (ST->hasAVX512() && PreferVectorWidth >= 512) | |||
136 | return 512; | |||
137 | if (ST->hasAVX() && PreferVectorWidth >= 256) | |||
138 | return 256; | |||
139 | if (ST->hasSSE1() && PreferVectorWidth >= 128) | |||
140 | return 128; | |||
141 | return 0; | |||
142 | } | |||
143 | ||||
144 | if (ST->is64Bit()) | |||
145 | return 64; | |||
146 | ||||
147 | return 32; | |||
148 | } | |||
149 | ||||
150 | unsigned X86TTIImpl::getLoadStoreVecRegBitWidth(unsigned) const { | |||
151 | return getRegisterBitWidth(true); | |||
152 | } | |||
153 | ||||
154 | unsigned X86TTIImpl::getMaxInterleaveFactor(unsigned VF) { | |||
155 | // If the loop will not be vectorized, don't interleave the loop. | |||
156 | // Let regular unroll to unroll the loop, which saves the overflow | |||
157 | // check and memory check cost. | |||
158 | if (VF == 1) | |||
159 | return 1; | |||
160 | ||||
161 | if (ST->isAtom()) | |||
162 | return 1; | |||
163 | ||||
164 | // Sandybridge and Haswell have multiple execution ports and pipelined | |||
165 | // vector units. | |||
166 | if (ST->hasAVX()) | |||
167 | return 4; | |||
168 | ||||
169 | return 2; | |||
170 | } | |||
171 | ||||
172 | int X86TTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty, | |||
173 | TTI::OperandValueKind Op1Info, | |||
174 | TTI::OperandValueKind Op2Info, | |||
175 | TTI::OperandValueProperties Opd1PropInfo, | |||
176 | TTI::OperandValueProperties Opd2PropInfo, | |||
177 | ArrayRef<const Value *> Args, | |||
178 | const Instruction *CxtI) { | |||
179 | // Legalize the type. | |||
180 | std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty); | |||
181 | ||||
182 | int ISD = TLI->InstructionOpcodeToISD(Opcode); | |||
183 | assert(ISD && "Invalid opcode")((ISD && "Invalid opcode") ? static_cast<void> ( 0) : __assert_fail ("ISD && \"Invalid opcode\"", "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp" , 183, __PRETTY_FUNCTION__)); | |||
184 | ||||
185 | static const CostTblEntry GLMCostTable[] = { | |||
186 | { ISD::FDIV, MVT::f32, 18 }, // divss | |||
187 | { ISD::FDIV, MVT::v4f32, 35 }, // divps | |||
188 | { ISD::FDIV, MVT::f64, 33 }, // divsd | |||
189 | { ISD::FDIV, MVT::v2f64, 65 }, // divpd | |||
190 | }; | |||
191 | ||||
192 | if (ST->useGLMDivSqrtCosts()) | |||
193 | if (const auto *Entry = CostTableLookup(GLMCostTable, ISD, | |||
194 | LT.second)) | |||
195 | return LT.first * Entry->Cost; | |||
196 | ||||
197 | static const CostTblEntry SLMCostTable[] = { | |||
198 | { ISD::MUL, MVT::v4i32, 11 }, // pmulld | |||
199 | { ISD::MUL, MVT::v8i16, 2 }, // pmullw | |||
200 | { ISD::MUL, MVT::v16i8, 14 }, // extend/pmullw/trunc sequence. | |||
201 | { ISD::FMUL, MVT::f64, 2 }, // mulsd | |||
202 | { ISD::FMUL, MVT::v2f64, 4 }, // mulpd | |||
203 | { ISD::FMUL, MVT::v4f32, 2 }, // mulps | |||
204 | { ISD::FDIV, MVT::f32, 17 }, // divss | |||
205 | { ISD::FDIV, MVT::v4f32, 39 }, // divps | |||
206 | { ISD::FDIV, MVT::f64, 32 }, // divsd | |||
207 | { ISD::FDIV, MVT::v2f64, 69 }, // divpd | |||
208 | { ISD::FADD, MVT::v2f64, 2 }, // addpd | |||
209 | { ISD::FSUB, MVT::v2f64, 2 }, // subpd | |||
210 | // v2i64/v4i64 mul is custom lowered as a series of long: | |||
211 | // multiplies(3), shifts(3) and adds(2) | |||
212 | // slm muldq version throughput is 2 and addq throughput 4 | |||
213 | // thus: 3X2 (muldq throughput) + 3X1 (shift throughput) + | |||
214 | // 3X4 (addq throughput) = 17 | |||
215 | { ISD::MUL, MVT::v2i64, 17 }, | |||
216 | // slm addq\subq throughput is 4 | |||
217 | { ISD::ADD, MVT::v2i64, 4 }, | |||
218 | { ISD::SUB, MVT::v2i64, 4 }, | |||
219 | }; | |||
220 | ||||
221 | if (ST->isSLM()) { | |||
222 | if (Args.size() == 2 && ISD == ISD::MUL && LT.second == MVT::v4i32) { | |||
223 | // Check if the operands can be shrinked into a smaller datatype. | |||
224 | bool Op1Signed = false; | |||
225 | unsigned Op1MinSize = BaseT::minRequiredElementSize(Args[0], Op1Signed); | |||
226 | bool Op2Signed = false; | |||
227 | unsigned Op2MinSize = BaseT::minRequiredElementSize(Args[1], Op2Signed); | |||
228 | ||||
229 | bool signedMode = Op1Signed | Op2Signed; | |||
230 | unsigned OpMinSize = std::max(Op1MinSize, Op2MinSize); | |||
231 | ||||
232 | if (OpMinSize <= 7) | |||
233 | return LT.first * 3; // pmullw/sext | |||
234 | if (!signedMode && OpMinSize <= 8) | |||
235 | return LT.first * 3; // pmullw/zext | |||
236 | if (OpMinSize <= 15) | |||
237 | return LT.first * 5; // pmullw/pmulhw/pshuf | |||
238 | if (!signedMode && OpMinSize <= 16) | |||
239 | return LT.first * 5; // pmullw/pmulhw/pshuf | |||
240 | } | |||
241 | ||||
242 | if (const auto *Entry = CostTableLookup(SLMCostTable, ISD, | |||
243 | LT.second)) { | |||
244 | return LT.first * Entry->Cost; | |||
245 | } | |||
246 | } | |||
247 | ||||
248 | if ((ISD == ISD::SDIV || ISD == ISD::SREM || ISD == ISD::UDIV || | |||
249 | ISD == ISD::UREM) && | |||
250 | (Op2Info == TargetTransformInfo::OK_UniformConstantValue || | |||
251 | Op2Info == TargetTransformInfo::OK_NonUniformConstantValue) && | |||
252 | Opd2PropInfo == TargetTransformInfo::OP_PowerOf2) { | |||
253 | if (ISD == ISD::SDIV || ISD == ISD::SREM) { | |||
254 | // On X86, vector signed division by constants power-of-two are | |||
255 | // normally expanded to the sequence SRA + SRL + ADD + SRA. | |||
256 | // The OperandValue properties may not be the same as that of the previous | |||
257 | // operation; conservatively assume OP_None. | |||
258 | int Cost = | |||
259 | 2 * getArithmeticInstrCost(Instruction::AShr, Ty, Op1Info, Op2Info, | |||
260 | TargetTransformInfo::OP_None, | |||
261 | TargetTransformInfo::OP_None); | |||
262 | Cost += getArithmeticInstrCost(Instruction::LShr, Ty, Op1Info, Op2Info, | |||
263 | TargetTransformInfo::OP_None, | |||
264 | TargetTransformInfo::OP_None); | |||
265 | Cost += getArithmeticInstrCost(Instruction::Add, Ty, Op1Info, Op2Info, | |||
266 | TargetTransformInfo::OP_None, | |||
267 | TargetTransformInfo::OP_None); | |||
268 | ||||
269 | if (ISD == ISD::SREM) { | |||
270 | // For SREM: (X % C) is the equivalent of (X - (X/C)*C) | |||
271 | Cost += getArithmeticInstrCost(Instruction::Mul, Ty, Op1Info, Op2Info); | |||
272 | Cost += getArithmeticInstrCost(Instruction::Sub, Ty, Op1Info, Op2Info); | |||
273 | } | |||
274 | ||||
275 | return Cost; | |||
276 | } | |||
277 | ||||
278 | // Vector unsigned division/remainder will be simplified to shifts/masks. | |||
279 | if (ISD == ISD::UDIV) | |||
280 | return getArithmeticInstrCost(Instruction::LShr, Ty, Op1Info, Op2Info, | |||
281 | TargetTransformInfo::OP_None, | |||
282 | TargetTransformInfo::OP_None); | |||
283 | ||||
284 | else // UREM | |||
285 | return getArithmeticInstrCost(Instruction::And, Ty, Op1Info, Op2Info, | |||
286 | TargetTransformInfo::OP_None, | |||
287 | TargetTransformInfo::OP_None); | |||
288 | } | |||
289 | ||||
290 | static const CostTblEntry AVX512BWUniformConstCostTable[] = { | |||
291 | { ISD::SHL, MVT::v64i8, 2 }, // psllw + pand. | |||
292 | { ISD::SRL, MVT::v64i8, 2 }, // psrlw + pand. | |||
293 | { ISD::SRA, MVT::v64i8, 4 }, // psrlw, pand, pxor, psubb. | |||
294 | }; | |||
295 | ||||
296 | if (Op2Info == TargetTransformInfo::OK_UniformConstantValue && | |||
297 | ST->hasBWI()) { | |||
298 | if (const auto *Entry = CostTableLookup(AVX512BWUniformConstCostTable, ISD, | |||
299 | LT.second)) | |||
300 | return LT.first * Entry->Cost; | |||
301 | } | |||
302 | ||||
303 | static const CostTblEntry AVX512UniformConstCostTable[] = { | |||
304 | { ISD::SRA, MVT::v2i64, 1 }, | |||
305 | { ISD::SRA, MVT::v4i64, 1 }, | |||
306 | { ISD::SRA, MVT::v8i64, 1 }, | |||
307 | }; | |||
308 | ||||
309 | if (Op2Info == TargetTransformInfo::OK_UniformConstantValue && | |||
310 | ST->hasAVX512()) { | |||
311 | if (const auto *Entry = CostTableLookup(AVX512UniformConstCostTable, ISD, | |||
312 | LT.second)) | |||
313 | return LT.first * Entry->Cost; | |||
314 | } | |||
315 | ||||
316 | static const CostTblEntry AVX2UniformConstCostTable[] = { | |||
317 | { ISD::SHL, MVT::v32i8, 2 }, // psllw + pand. | |||
318 | { ISD::SRL, MVT::v32i8, 2 }, // psrlw + pand. | |||
319 | { ISD::SRA, MVT::v32i8, 4 }, // psrlw, pand, pxor, psubb. | |||
320 | ||||
321 | { ISD::SRA, MVT::v4i64, 4 }, // 2 x psrad + shuffle. | |||
322 | }; | |||
323 | ||||
324 | if (Op2Info == TargetTransformInfo::OK_UniformConstantValue && | |||
325 | ST->hasAVX2()) { | |||
326 | if (const auto *Entry = CostTableLookup(AVX2UniformConstCostTable, ISD, | |||
327 | LT.second)) | |||
328 | return LT.first * Entry->Cost; | |||
329 | } | |||
330 | ||||
331 | static const CostTblEntry SSE2UniformConstCostTable[] = { | |||
332 | { ISD::SHL, MVT::v16i8, 2 }, // psllw + pand. | |||
333 | { ISD::SRL, MVT::v16i8, 2 }, // psrlw + pand. | |||
334 | { ISD::SRA, MVT::v16i8, 4 }, // psrlw, pand, pxor, psubb. | |||
335 | ||||
336 | { ISD::SHL, MVT::v32i8, 4+2 }, // 2*(psllw + pand) + split. | |||
337 | { ISD::SRL, MVT::v32i8, 4+2 }, // 2*(psrlw + pand) + split. | |||
338 | { ISD::SRA, MVT::v32i8, 8+2 }, // 2*(psrlw, pand, pxor, psubb) + split. | |||
339 | }; | |||
340 | ||||
341 | // XOP has faster vXi8 shifts. | |||
342 | if (Op2Info == TargetTransformInfo::OK_UniformConstantValue && | |||
343 | ST->hasSSE2() && !ST->hasXOP()) { | |||
344 | if (const auto *Entry = | |||
345 | CostTableLookup(SSE2UniformConstCostTable, ISD, LT.second)) | |||
346 | return LT.first * Entry->Cost; | |||
347 | } | |||
348 | ||||
349 | static const CostTblEntry AVX512BWConstCostTable[] = { | |||
350 | { ISD::SDIV, MVT::v64i8, 14 }, // 2*ext+2*pmulhw sequence | |||
351 | { ISD::SREM, MVT::v64i8, 16 }, // 2*ext+2*pmulhw+mul+sub sequence | |||
352 | { ISD::UDIV, MVT::v64i8, 14 }, // 2*ext+2*pmulhw sequence | |||
353 | { ISD::UREM, MVT::v64i8, 16 }, // 2*ext+2*pmulhw+mul+sub sequence | |||
354 | { ISD::SDIV, MVT::v32i16, 6 }, // vpmulhw sequence | |||
355 | { ISD::SREM, MVT::v32i16, 8 }, // vpmulhw+mul+sub sequence | |||
356 | { ISD::UDIV, MVT::v32i16, 6 }, // vpmulhuw sequence | |||
357 | { ISD::UREM, MVT::v32i16, 8 }, // vpmulhuw+mul+sub sequence | |||
358 | }; | |||
359 | ||||
360 | if ((Op2Info == TargetTransformInfo::OK_UniformConstantValue || | |||
361 | Op2Info == TargetTransformInfo::OK_NonUniformConstantValue) && | |||
362 | ST->hasBWI()) { | |||
363 | if (const auto *Entry = | |||
364 | CostTableLookup(AVX512BWConstCostTable, ISD, LT.second)) | |||
365 | return LT.first * Entry->Cost; | |||
366 | } | |||
367 | ||||
368 | static const CostTblEntry AVX512ConstCostTable[] = { | |||
369 | { ISD::SDIV, MVT::v16i32, 15 }, // vpmuldq sequence | |||
370 | { ISD::SREM, MVT::v16i32, 17 }, // vpmuldq+mul+sub sequence | |||
371 | { ISD::UDIV, MVT::v16i32, 15 }, // vpmuludq sequence | |||
372 | { ISD::UREM, MVT::v16i32, 17 }, // vpmuludq+mul+sub sequence | |||
373 | }; | |||
374 | ||||
375 | if ((Op2Info == TargetTransformInfo::OK_UniformConstantValue || | |||
376 | Op2Info == TargetTransformInfo::OK_NonUniformConstantValue) && | |||
377 | ST->hasAVX512()) { | |||
378 | if (const auto *Entry = | |||
379 | CostTableLookup(AVX512ConstCostTable, ISD, LT.second)) | |||
380 | return LT.first * Entry->Cost; | |||
381 | } | |||
382 | ||||
383 | static const CostTblEntry AVX2ConstCostTable[] = { | |||
384 | { ISD::SDIV, MVT::v32i8, 14 }, // 2*ext+2*pmulhw sequence | |||
385 | { ISD::SREM, MVT::v32i8, 16 }, // 2*ext+2*pmulhw+mul+sub sequence | |||
386 | { ISD::UDIV, MVT::v32i8, 14 }, // 2*ext+2*pmulhw sequence | |||
387 | { ISD::UREM, MVT::v32i8, 16 }, // 2*ext+2*pmulhw+mul+sub sequence | |||
388 | { ISD::SDIV, MVT::v16i16, 6 }, // vpmulhw sequence | |||
389 | { ISD::SREM, MVT::v16i16, 8 }, // vpmulhw+mul+sub sequence | |||
390 | { ISD::UDIV, MVT::v16i16, 6 }, // vpmulhuw sequence | |||
391 | { ISD::UREM, MVT::v16i16, 8 }, // vpmulhuw+mul+sub sequence | |||
392 | { ISD::SDIV, MVT::v8i32, 15 }, // vpmuldq sequence | |||
393 | { ISD::SREM, MVT::v8i32, 19 }, // vpmuldq+mul+sub sequence | |||
394 | { ISD::UDIV, MVT::v8i32, 15 }, // vpmuludq sequence | |||
395 | { ISD::UREM, MVT::v8i32, 19 }, // vpmuludq+mul+sub sequence | |||
396 | }; | |||
397 | ||||
398 | if ((Op2Info == TargetTransformInfo::OK_UniformConstantValue || | |||
399 | Op2Info == TargetTransformInfo::OK_NonUniformConstantValue) && | |||
400 | ST->hasAVX2()) { | |||
401 | if (const auto *Entry = CostTableLookup(AVX2ConstCostTable, ISD, LT.second)) | |||
402 | return LT.first * Entry->Cost; | |||
403 | } | |||
404 | ||||
405 | static const CostTblEntry SSE2ConstCostTable[] = { | |||
406 | { ISD::SDIV, MVT::v32i8, 28+2 }, // 4*ext+4*pmulhw sequence + split. | |||
407 | { ISD::SREM, MVT::v32i8, 32+2 }, // 4*ext+4*pmulhw+mul+sub sequence + split. | |||
408 | { ISD::SDIV, MVT::v16i8, 14 }, // 2*ext+2*pmulhw sequence | |||
409 | { ISD::SREM, MVT::v16i8, 16 }, // 2*ext+2*pmulhw+mul+sub sequence | |||
410 | { ISD::UDIV, MVT::v32i8, 28+2 }, // 4*ext+4*pmulhw sequence + split. | |||
411 | { ISD::UREM, MVT::v32i8, 32+2 }, // 4*ext+4*pmulhw+mul+sub sequence + split. | |||
412 | { ISD::UDIV, MVT::v16i8, 14 }, // 2*ext+2*pmulhw sequence | |||
413 | { ISD::UREM, MVT::v16i8, 16 }, // 2*ext+2*pmulhw+mul+sub sequence | |||
414 | { ISD::SDIV, MVT::v16i16, 12+2 }, // 2*pmulhw sequence + split. | |||
415 | { ISD::SREM, MVT::v16i16, 16+2 }, // 2*pmulhw+mul+sub sequence + split. | |||
416 | { ISD::SDIV, MVT::v8i16, 6 }, // pmulhw sequence | |||
417 | { ISD::SREM, MVT::v8i16, 8 }, // pmulhw+mul+sub sequence | |||
418 | { ISD::UDIV, MVT::v16i16, 12+2 }, // 2*pmulhuw sequence + split. | |||
419 | { ISD::UREM, MVT::v16i16, 16+2 }, // 2*pmulhuw+mul+sub sequence + split. | |||
420 | { ISD::UDIV, MVT::v8i16, 6 }, // pmulhuw sequence | |||
421 | { ISD::UREM, MVT::v8i16, 8 }, // pmulhuw+mul+sub sequence | |||
422 | { ISD::SDIV, MVT::v8i32, 38+2 }, // 2*pmuludq sequence + split. | |||
423 | { ISD::SREM, MVT::v8i32, 48+2 }, // 2*pmuludq+mul+sub sequence + split. | |||
424 | { ISD::SDIV, MVT::v4i32, 19 }, // pmuludq sequence | |||
425 | { ISD::SREM, MVT::v4i32, 24 }, // pmuludq+mul+sub sequence | |||
426 | { ISD::UDIV, MVT::v8i32, 30+2 }, // 2*pmuludq sequence + split. | |||
427 | { ISD::UREM, MVT::v8i32, 40+2 }, // 2*pmuludq+mul+sub sequence + split. | |||
428 | { ISD::UDIV, MVT::v4i32, 15 }, // pmuludq sequence | |||
429 | { ISD::UREM, MVT::v4i32, 20 }, // pmuludq+mul+sub sequence | |||
430 | }; | |||
431 | ||||
432 | if ((Op2Info == TargetTransformInfo::OK_UniformConstantValue || | |||
433 | Op2Info == TargetTransformInfo::OK_NonUniformConstantValue) && | |||
434 | ST->hasSSE2()) { | |||
435 | // pmuldq sequence. | |||
436 | if (ISD == ISD::SDIV && LT.second == MVT::v8i32 && ST->hasAVX()) | |||
437 | return LT.first * 32; | |||
438 | if (ISD == ISD::SREM && LT.second == MVT::v8i32 && ST->hasAVX()) | |||
439 | return LT.first * 38; | |||
440 | if (ISD == ISD::SDIV && LT.second == MVT::v4i32 && ST->hasSSE41()) | |||
441 | return LT.first * 15; | |||
442 | if (ISD == ISD::SREM && LT.second == MVT::v4i32 && ST->hasSSE41()) | |||
443 | return LT.first * 20; | |||
444 | ||||
445 | if (const auto *Entry = CostTableLookup(SSE2ConstCostTable, ISD, LT.second)) | |||
446 | return LT.first * Entry->Cost; | |||
447 | } | |||
448 | ||||
449 | static const CostTblEntry AVX2UniformCostTable[] = { | |||
450 | // Uniform splats are cheaper for the following instructions. | |||
451 | { ISD::SHL, MVT::v16i16, 1 }, // psllw. | |||
452 | { ISD::SRL, MVT::v16i16, 1 }, // psrlw. | |||
453 | { ISD::SRA, MVT::v16i16, 1 }, // psraw. | |||
454 | }; | |||
455 | ||||
456 | if (ST->hasAVX2() && | |||
457 | ((Op2Info == TargetTransformInfo::OK_UniformConstantValue) || | |||
458 | (Op2Info == TargetTransformInfo::OK_UniformValue))) { | |||
459 | if (const auto *Entry = | |||
460 | CostTableLookup(AVX2UniformCostTable, ISD, LT.second)) | |||
461 | return LT.first * Entry->Cost; | |||
462 | } | |||
463 | ||||
464 | static const CostTblEntry SSE2UniformCostTable[] = { | |||
465 | // Uniform splats are cheaper for the following instructions. | |||
466 | { ISD::SHL, MVT::v8i16, 1 }, // psllw. | |||
467 | { ISD::SHL, MVT::v4i32, 1 }, // pslld | |||
468 | { ISD::SHL, MVT::v2i64, 1 }, // psllq. | |||
469 | ||||
470 | { ISD::SRL, MVT::v8i16, 1 }, // psrlw. | |||
471 | { ISD::SRL, MVT::v4i32, 1 }, // psrld. | |||
472 | { ISD::SRL, MVT::v2i64, 1 }, // psrlq. | |||
473 | ||||
474 | { ISD::SRA, MVT::v8i16, 1 }, // psraw. | |||
475 | { ISD::SRA, MVT::v4i32, 1 }, // psrad. | |||
476 | }; | |||
477 | ||||
478 | if (ST->hasSSE2() && | |||
479 | ((Op2Info == TargetTransformInfo::OK_UniformConstantValue) || | |||
480 | (Op2Info == TargetTransformInfo::OK_UniformValue))) { | |||
481 | if (const auto *Entry = | |||
482 | CostTableLookup(SSE2UniformCostTable, ISD, LT.second)) | |||
483 | return LT.first * Entry->Cost; | |||
484 | } | |||
485 | ||||
486 | static const CostTblEntry AVX512DQCostTable[] = { | |||
487 | { ISD::MUL, MVT::v2i64, 1 }, | |||
488 | { ISD::MUL, MVT::v4i64, 1 }, | |||
489 | { ISD::MUL, MVT::v8i64, 1 } | |||
490 | }; | |||
491 | ||||
492 | // Look for AVX512DQ lowering tricks for custom cases. | |||
493 | if (ST->hasDQI()) | |||
494 | if (const auto *Entry = CostTableLookup(AVX512DQCostTable, ISD, LT.second)) | |||
495 | return LT.first * Entry->Cost; | |||
496 | ||||
497 | static const CostTblEntry AVX512BWCostTable[] = { | |||
498 | { ISD::SHL, MVT::v8i16, 1 }, // vpsllvw | |||
499 | { ISD::SRL, MVT::v8i16, 1 }, // vpsrlvw | |||
500 | { ISD::SRA, MVT::v8i16, 1 }, // vpsravw | |||
501 | ||||
502 | { ISD::SHL, MVT::v16i16, 1 }, // vpsllvw | |||
503 | { ISD::SRL, MVT::v16i16, 1 }, // vpsrlvw | |||
504 | { ISD::SRA, MVT::v16i16, 1 }, // vpsravw | |||
505 | ||||
506 | { ISD::SHL, MVT::v32i16, 1 }, // vpsllvw | |||
507 | { ISD::SRL, MVT::v32i16, 1 }, // vpsrlvw | |||
508 | { ISD::SRA, MVT::v32i16, 1 }, // vpsravw | |||
509 | ||||
510 | { ISD::SHL, MVT::v64i8, 11 }, // vpblendvb sequence. | |||
511 | { ISD::SRL, MVT::v64i8, 11 }, // vpblendvb sequence. | |||
512 | { ISD::SRA, MVT::v64i8, 24 }, // vpblendvb sequence. | |||
513 | ||||
514 | { ISD::MUL, MVT::v64i8, 11 }, // extend/pmullw/trunc sequence. | |||
515 | { ISD::MUL, MVT::v32i8, 4 }, // extend/pmullw/trunc sequence. | |||
516 | { ISD::MUL, MVT::v16i8, 4 }, // extend/pmullw/trunc sequence. | |||
517 | }; | |||
518 | ||||
519 | // Look for AVX512BW lowering tricks for custom cases. | |||
520 | if (ST->hasBWI()) | |||
521 | if (const auto *Entry = CostTableLookup(AVX512BWCostTable, ISD, LT.second)) | |||
522 | return LT.first * Entry->Cost; | |||
523 | ||||
524 | static const CostTblEntry AVX512CostTable[] = { | |||
525 | { ISD::SHL, MVT::v16i32, 1 }, | |||
526 | { ISD::SRL, MVT::v16i32, 1 }, | |||
527 | { ISD::SRA, MVT::v16i32, 1 }, | |||
528 | ||||
529 | { ISD::SHL, MVT::v8i64, 1 }, | |||
530 | { ISD::SRL, MVT::v8i64, 1 }, | |||
531 | ||||
532 | { ISD::SRA, MVT::v2i64, 1 }, | |||
533 | { ISD::SRA, MVT::v4i64, 1 }, | |||
534 | { ISD::SRA, MVT::v8i64, 1 }, | |||
535 | ||||
536 | { ISD::MUL, MVT::v32i8, 13 }, // extend/pmullw/trunc sequence. | |||
537 | { ISD::MUL, MVT::v16i8, 5 }, // extend/pmullw/trunc sequence. | |||
538 | { ISD::MUL, MVT::v16i32, 1 }, // pmulld (Skylake from agner.org) | |||
539 | { ISD::MUL, MVT::v8i32, 1 }, // pmulld (Skylake from agner.org) | |||
540 | { ISD::MUL, MVT::v4i32, 1 }, // pmulld (Skylake from agner.org) | |||
541 | { ISD::MUL, MVT::v8i64, 8 }, // 3*pmuludq/3*shift/2*add | |||
542 | ||||
543 | { ISD::FADD, MVT::v8f64, 1 }, // Skylake from http://www.agner.org/ | |||
544 | { ISD::FSUB, MVT::v8f64, 1 }, // Skylake from http://www.agner.org/ | |||
545 | { ISD::FMUL, MVT::v8f64, 1 }, // Skylake from http://www.agner.org/ | |||
546 | ||||
547 | { ISD::FADD, MVT::v16f32, 1 }, // Skylake from http://www.agner.org/ | |||
548 | { ISD::FSUB, MVT::v16f32, 1 }, // Skylake from http://www.agner.org/ | |||
549 | { ISD::FMUL, MVT::v16f32, 1 }, // Skylake from http://www.agner.org/ | |||
550 | }; | |||
551 | ||||
552 | if (ST->hasAVX512()) | |||
553 | if (const auto *Entry = CostTableLookup(AVX512CostTable, ISD, LT.second)) | |||
554 | return LT.first * Entry->Cost; | |||
555 | ||||
556 | static const CostTblEntry AVX2ShiftCostTable[] = { | |||
557 | // Shifts on v4i64/v8i32 on AVX2 is legal even though we declare to | |||
558 | // customize them to detect the cases where shift amount is a scalar one. | |||
559 | { ISD::SHL, MVT::v4i32, 1 }, | |||
560 | { ISD::SRL, MVT::v4i32, 1 }, | |||
561 | { ISD::SRA, MVT::v4i32, 1 }, | |||
562 | { ISD::SHL, MVT::v8i32, 1 }, | |||
563 | { ISD::SRL, MVT::v8i32, 1 }, | |||
564 | { ISD::SRA, MVT::v8i32, 1 }, | |||
565 | { ISD::SHL, MVT::v2i64, 1 }, | |||
566 | { ISD::SRL, MVT::v2i64, 1 }, | |||
567 | { ISD::SHL, MVT::v4i64, 1 }, | |||
568 | { ISD::SRL, MVT::v4i64, 1 }, | |||
569 | }; | |||
570 | ||||
571 | // Look for AVX2 lowering tricks. | |||
572 | if (ST->hasAVX2()) { | |||
573 | if (ISD == ISD::SHL && LT.second == MVT::v16i16 && | |||
574 | (Op2Info == TargetTransformInfo::OK_UniformConstantValue || | |||
575 | Op2Info == TargetTransformInfo::OK_NonUniformConstantValue)) | |||
576 | // On AVX2, a packed v16i16 shift left by a constant build_vector | |||
577 | // is lowered into a vector multiply (vpmullw). | |||
578 | return getArithmeticInstrCost(Instruction::Mul, Ty, Op1Info, Op2Info, | |||
579 | TargetTransformInfo::OP_None, | |||
580 | TargetTransformInfo::OP_None); | |||
581 | ||||
582 | if (const auto *Entry = CostTableLookup(AVX2ShiftCostTable, ISD, LT.second)) | |||
583 | return LT.first * Entry->Cost; | |||
584 | } | |||
585 | ||||
586 | static const CostTblEntry XOPShiftCostTable[] = { | |||
587 | // 128bit shifts take 1cy, but right shifts require negation beforehand. | |||
588 | { ISD::SHL, MVT::v16i8, 1 }, | |||
589 | { ISD::SRL, MVT::v16i8, 2 }, | |||
590 | { ISD::SRA, MVT::v16i8, 2 }, | |||
591 | { ISD::SHL, MVT::v8i16, 1 }, | |||
592 | { ISD::SRL, MVT::v8i16, 2 }, | |||
593 | { ISD::SRA, MVT::v8i16, 2 }, | |||
594 | { ISD::SHL, MVT::v4i32, 1 }, | |||
595 | { ISD::SRL, MVT::v4i32, 2 }, | |||
596 | { ISD::SRA, MVT::v4i32, 2 }, | |||
597 | { ISD::SHL, MVT::v2i64, 1 }, | |||
598 | { ISD::SRL, MVT::v2i64, 2 }, | |||
599 | { ISD::SRA, MVT::v2i64, 2 }, | |||
600 | // 256bit shifts require splitting if AVX2 didn't catch them above. | |||
601 | { ISD::SHL, MVT::v32i8, 2+2 }, | |||
602 | { ISD::SRL, MVT::v32i8, 4+2 }, | |||
603 | { ISD::SRA, MVT::v32i8, 4+2 }, | |||
604 | { ISD::SHL, MVT::v16i16, 2+2 }, | |||
605 | { ISD::SRL, MVT::v16i16, 4+2 }, | |||
606 | { ISD::SRA, MVT::v16i16, 4+2 }, | |||
607 | { ISD::SHL, MVT::v8i32, 2+2 }, | |||
608 | { ISD::SRL, MVT::v8i32, 4+2 }, | |||
609 | { ISD::SRA, MVT::v8i32, 4+2 }, | |||
610 | { ISD::SHL, MVT::v4i64, 2+2 }, | |||
611 | { ISD::SRL, MVT::v4i64, 4+2 }, | |||
612 | { ISD::SRA, MVT::v4i64, 4+2 }, | |||
613 | }; | |||
614 | ||||
615 | // Look for XOP lowering tricks. | |||
616 | if (ST->hasXOP()) { | |||
617 | // If the right shift is constant then we'll fold the negation so | |||
618 | // it's as cheap as a left shift. | |||
619 | int ShiftISD = ISD; | |||
620 | if ((ShiftISD == ISD::SRL || ShiftISD == ISD::SRA) && | |||
621 | (Op2Info == TargetTransformInfo::OK_UniformConstantValue || | |||
622 | Op2Info == TargetTransformInfo::OK_NonUniformConstantValue)) | |||
623 | ShiftISD = ISD::SHL; | |||
624 | if (const auto *Entry = | |||
625 | CostTableLookup(XOPShiftCostTable, ShiftISD, LT.second)) | |||
626 | return LT.first * Entry->Cost; | |||
627 | } | |||
628 | ||||
629 | static const CostTblEntry SSE2UniformShiftCostTable[] = { | |||
630 | // Uniform splats are cheaper for the following instructions. | |||
631 | { ISD::SHL, MVT::v16i16, 2+2 }, // 2*psllw + split. | |||
632 | { ISD::SHL, MVT::v8i32, 2+2 }, // 2*pslld + split. | |||
633 | { ISD::SHL, MVT::v4i64, 2+2 }, // 2*psllq + split. | |||
634 | ||||
635 | { ISD::SRL, MVT::v16i16, 2+2 }, // 2*psrlw + split. | |||
636 | { ISD::SRL, MVT::v8i32, 2+2 }, // 2*psrld + split. | |||
637 | { ISD::SRL, MVT::v4i64, 2+2 }, // 2*psrlq + split. | |||
638 | ||||
639 | { ISD::SRA, MVT::v16i16, 2+2 }, // 2*psraw + split. | |||
640 | { ISD::SRA, MVT::v8i32, 2+2 }, // 2*psrad + split. | |||
641 | { ISD::SRA, MVT::v2i64, 4 }, // 2*psrad + shuffle. | |||
642 | { ISD::SRA, MVT::v4i64, 8+2 }, // 2*(2*psrad + shuffle) + split. | |||
643 | }; | |||
644 | ||||
645 | if (ST->hasSSE2() && | |||
646 | ((Op2Info == TargetTransformInfo::OK_UniformConstantValue) || | |||
647 | (Op2Info == TargetTransformInfo::OK_UniformValue))) { | |||
648 | ||||
649 | // Handle AVX2 uniform v4i64 ISD::SRA, it's not worth a table. | |||
650 | if (ISD == ISD::SRA && LT.second == MVT::v4i64 && ST->hasAVX2()) | |||
651 | return LT.first * 4; // 2*psrad + shuffle. | |||
652 | ||||
653 | if (const auto *Entry = | |||
654 | CostTableLookup(SSE2UniformShiftCostTable, ISD, LT.second)) | |||
655 | return LT.first * Entry->Cost; | |||
656 | } | |||
657 | ||||
658 | if (ISD == ISD::SHL && | |||
659 | Op2Info == TargetTransformInfo::OK_NonUniformConstantValue) { | |||
660 | MVT VT = LT.second; | |||
661 | // Vector shift left by non uniform constant can be lowered | |||
662 | // into vector multiply. | |||
663 | if (((VT == MVT::v8i16 || VT == MVT::v4i32) && ST->hasSSE2()) || | |||
664 | ((VT == MVT::v16i16 || VT == MVT::v8i32) && ST->hasAVX())) | |||
665 | ISD = ISD::MUL; | |||
666 | } | |||
667 | ||||
668 | static const CostTblEntry AVX2CostTable[] = { | |||
669 | { ISD::SHL, MVT::v32i8, 11 }, // vpblendvb sequence. | |||
670 | { ISD::SHL, MVT::v16i16, 10 }, // extend/vpsrlvd/pack sequence. | |||
671 | ||||
672 | { ISD::SRL, MVT::v32i8, 11 }, // vpblendvb sequence. | |||
673 | { ISD::SRL, MVT::v16i16, 10 }, // extend/vpsrlvd/pack sequence. | |||
674 | ||||
675 | { ISD::SRA, MVT::v32i8, 24 }, // vpblendvb sequence. | |||
676 | { ISD::SRA, MVT::v16i16, 10 }, // extend/vpsravd/pack sequence. | |||
677 | { ISD::SRA, MVT::v2i64, 4 }, // srl/xor/sub sequence. | |||
678 | { ISD::SRA, MVT::v4i64, 4 }, // srl/xor/sub sequence. | |||
679 | ||||
680 | { ISD::SUB, MVT::v32i8, 1 }, // psubb | |||
681 | { ISD::ADD, MVT::v32i8, 1 }, // paddb | |||
682 | { ISD::SUB, MVT::v16i16, 1 }, // psubw | |||
683 | { ISD::ADD, MVT::v16i16, 1 }, // paddw | |||
684 | { ISD::SUB, MVT::v8i32, 1 }, // psubd | |||
685 | { ISD::ADD, MVT::v8i32, 1 }, // paddd | |||
686 | { ISD::SUB, MVT::v4i64, 1 }, // psubq | |||
687 | { ISD::ADD, MVT::v4i64, 1 }, // paddq | |||
688 | ||||
689 | { ISD::MUL, MVT::v32i8, 17 }, // extend/pmullw/trunc sequence. | |||
690 | { ISD::MUL, MVT::v16i8, 7 }, // extend/pmullw/trunc sequence. | |||
691 | { ISD::MUL, MVT::v16i16, 1 }, // pmullw | |||
692 | { ISD::MUL, MVT::v8i32, 2 }, // pmulld (Haswell from agner.org) | |||
693 | { ISD::MUL, MVT::v4i64, 8 }, // 3*pmuludq/3*shift/2*add | |||
694 | ||||
695 | { ISD::FADD, MVT::v4f64, 1 }, // Haswell from http://www.agner.org/ | |||
696 | { ISD::FADD, MVT::v8f32, 1 }, // Haswell from http://www.agner.org/ | |||
697 | { ISD::FSUB, MVT::v4f64, 1 }, // Haswell from http://www.agner.org/ | |||
698 | { ISD::FSUB, MVT::v8f32, 1 }, // Haswell from http://www.agner.org/ | |||
699 | { ISD::FMUL, MVT::v4f64, 1 }, // Haswell from http://www.agner.org/ | |||
700 | { ISD::FMUL, MVT::v8f32, 1 }, // Haswell from http://www.agner.org/ | |||
701 | ||||
702 | { ISD::FDIV, MVT::f32, 7 }, // Haswell from http://www.agner.org/ | |||
703 | { ISD::FDIV, MVT::v4f32, 7 }, // Haswell from http://www.agner.org/ | |||
704 | { ISD::FDIV, MVT::v8f32, 14 }, // Haswell from http://www.agner.org/ | |||
705 | { ISD::FDIV, MVT::f64, 14 }, // Haswell from http://www.agner.org/ | |||
706 | { ISD::FDIV, MVT::v2f64, 14 }, // Haswell from http://www.agner.org/ | |||
707 | { ISD::FDIV, MVT::v4f64, 28 }, // Haswell from http://www.agner.org/ | |||
708 | }; | |||
709 | ||||
710 | // Look for AVX2 lowering tricks for custom cases. | |||
711 | if (ST->hasAVX2()) | |||
712 | if (const auto *Entry = CostTableLookup(AVX2CostTable, ISD, LT.second)) | |||
713 | return LT.first * Entry->Cost; | |||
714 | ||||
715 | static const CostTblEntry AVX1CostTable[] = { | |||
716 | // We don't have to scalarize unsupported ops. We can issue two half-sized | |||
717 | // operations and we only need to extract the upper YMM half. | |||
718 | // Two ops + 1 extract + 1 insert = 4. | |||
719 | { ISD::MUL, MVT::v16i16, 4 }, | |||
720 | { ISD::MUL, MVT::v8i32, 4 }, | |||
721 | { ISD::SUB, MVT::v32i8, 4 }, | |||
722 | { ISD::ADD, MVT::v32i8, 4 }, | |||
723 | { ISD::SUB, MVT::v16i16, 4 }, | |||
724 | { ISD::ADD, MVT::v16i16, 4 }, | |||
725 | { ISD::SUB, MVT::v8i32, 4 }, | |||
726 | { ISD::ADD, MVT::v8i32, 4 }, | |||
727 | { ISD::SUB, MVT::v4i64, 4 }, | |||
728 | { ISD::ADD, MVT::v4i64, 4 }, | |||
729 | ||||
730 | // A v4i64 multiply is custom lowered as two split v2i64 vectors that then | |||
731 | // are lowered as a series of long multiplies(3), shifts(3) and adds(2) | |||
732 | // Because we believe v4i64 to be a legal type, we must also include the | |||
733 | // extract+insert in the cost table. Therefore, the cost here is 18 | |||
734 | // instead of 8. | |||
735 | { ISD::MUL, MVT::v4i64, 18 }, | |||
736 | ||||
737 | { ISD::MUL, MVT::v32i8, 26 }, // extend/pmullw/trunc sequence. | |||
738 | ||||
739 | { ISD::FDIV, MVT::f32, 14 }, // SNB from http://www.agner.org/ | |||
740 | { ISD::FDIV, MVT::v4f32, 14 }, // SNB from http://www.agner.org/ | |||
741 | { ISD::FDIV, MVT::v8f32, 28 }, // SNB from http://www.agner.org/ | |||
742 | { ISD::FDIV, MVT::f64, 22 }, // SNB from http://www.agner.org/ | |||
743 | { ISD::FDIV, MVT::v2f64, 22 }, // SNB from http://www.agner.org/ | |||
744 | { ISD::FDIV, MVT::v4f64, 44 }, // SNB from http://www.agner.org/ | |||
745 | }; | |||
746 | ||||
747 | if (ST->hasAVX()) | |||
748 | if (const auto *Entry = CostTableLookup(AVX1CostTable, ISD, LT.second)) | |||
749 | return LT.first * Entry->Cost; | |||
750 | ||||
751 | static const CostTblEntry SSE42CostTable[] = { | |||
752 | { ISD::FADD, MVT::f64, 1 }, // Nehalem from http://www.agner.org/ | |||
753 | { ISD::FADD, MVT::f32, 1 }, // Nehalem from http://www.agner.org/ | |||
754 | { ISD::FADD, MVT::v2f64, 1 }, // Nehalem from http://www.agner.org/ | |||
755 | { ISD::FADD, MVT::v4f32, 1 }, // Nehalem from http://www.agner.org/ | |||
756 | ||||
757 | { ISD::FSUB, MVT::f64, 1 }, // Nehalem from http://www.agner.org/ | |||
758 | { ISD::FSUB, MVT::f32 , 1 }, // Nehalem from http://www.agner.org/ | |||
759 | { ISD::FSUB, MVT::v2f64, 1 }, // Nehalem from http://www.agner.org/ | |||
760 | { ISD::FSUB, MVT::v4f32, 1 }, // Nehalem from http://www.agner.org/ | |||
761 | ||||
762 | { ISD::FMUL, MVT::f64, 1 }, // Nehalem from http://www.agner.org/ | |||
763 | { ISD::FMUL, MVT::f32, 1 }, // Nehalem from http://www.agner.org/ | |||
764 | { ISD::FMUL, MVT::v2f64, 1 }, // Nehalem from http://www.agner.org/ | |||
765 | { ISD::FMUL, MVT::v4f32, 1 }, // Nehalem from http://www.agner.org/ | |||
766 | ||||
767 | { ISD::FDIV, MVT::f32, 14 }, // Nehalem from http://www.agner.org/ | |||
768 | { ISD::FDIV, MVT::v4f32, 14 }, // Nehalem from http://www.agner.org/ | |||
769 | { ISD::FDIV, MVT::f64, 22 }, // Nehalem from http://www.agner.org/ | |||
770 | { ISD::FDIV, MVT::v2f64, 22 }, // Nehalem from http://www.agner.org/ | |||
771 | }; | |||
772 | ||||
773 | if (ST->hasSSE42()) | |||
774 | if (const auto *Entry = CostTableLookup(SSE42CostTable, ISD, LT.second)) | |||
775 | return LT.first * Entry->Cost; | |||
776 | ||||
777 | static const CostTblEntry SSE41CostTable[] = { | |||
778 | { ISD::SHL, MVT::v16i8, 11 }, // pblendvb sequence. | |||
779 | { ISD::SHL, MVT::v32i8, 2*11+2 }, // pblendvb sequence + split. | |||
780 | { ISD::SHL, MVT::v8i16, 14 }, // pblendvb sequence. | |||
781 | { ISD::SHL, MVT::v16i16, 2*14+2 }, // pblendvb sequence + split. | |||
782 | { ISD::SHL, MVT::v4i32, 4 }, // pslld/paddd/cvttps2dq/pmulld | |||
783 | { ISD::SHL, MVT::v8i32, 2*4+2 }, // pslld/paddd/cvttps2dq/pmulld + split | |||
784 | ||||
785 | { ISD::SRL, MVT::v16i8, 12 }, // pblendvb sequence. | |||
786 | { ISD::SRL, MVT::v32i8, 2*12+2 }, // pblendvb sequence + split. | |||
787 | { ISD::SRL, MVT::v8i16, 14 }, // pblendvb sequence. | |||
788 | { ISD::SRL, MVT::v16i16, 2*14+2 }, // pblendvb sequence + split. | |||
789 | { ISD::SRL, MVT::v4i32, 11 }, // Shift each lane + blend. | |||
790 | { ISD::SRL, MVT::v8i32, 2*11+2 }, // Shift each lane + blend + split. | |||
791 | ||||
792 | { ISD::SRA, MVT::v16i8, 24 }, // pblendvb sequence. | |||
793 | { ISD::SRA, MVT::v32i8, 2*24+2 }, // pblendvb sequence + split. | |||
794 | { ISD::SRA, MVT::v8i16, 14 }, // pblendvb sequence. | |||
795 | { ISD::SRA, MVT::v16i16, 2*14+2 }, // pblendvb sequence + split. | |||
796 | { ISD::SRA, MVT::v4i32, 12 }, // Shift each lane + blend. | |||
797 | { ISD::SRA, MVT::v8i32, 2*12+2 }, // Shift each lane + blend + split. | |||
798 | ||||
799 | { ISD::MUL, MVT::v4i32, 2 } // pmulld (Nehalem from agner.org) | |||
800 | }; | |||
801 | ||||
802 | if (ST->hasSSE41()) | |||
803 | if (const auto *Entry = CostTableLookup(SSE41CostTable, ISD, LT.second)) | |||
804 | return LT.first * Entry->Cost; | |||
805 | ||||
806 | static const CostTblEntry SSE2CostTable[] = { | |||
807 | // We don't correctly identify costs of casts because they are marked as | |||
808 | // custom. | |||
809 | { ISD::SHL, MVT::v16i8, 26 }, // cmpgtb sequence. | |||
810 | { ISD::SHL, MVT::v8i16, 32 }, // cmpgtb sequence. | |||
811 | { ISD::SHL, MVT::v4i32, 2*5 }, // We optimized this using mul. | |||
812 | { ISD::SHL, MVT::v2i64, 4 }, // splat+shuffle sequence. | |||
813 | { ISD::SHL, MVT::v4i64, 2*4+2 }, // splat+shuffle sequence + split. | |||
814 | ||||
815 | { ISD::SRL, MVT::v16i8, 26 }, // cmpgtb sequence. | |||
816 | { ISD::SRL, MVT::v8i16, 32 }, // cmpgtb sequence. | |||
817 | { ISD::SRL, MVT::v4i32, 16 }, // Shift each lane + blend. | |||
818 | { ISD::SRL, MVT::v2i64, 4 }, // splat+shuffle sequence. | |||
819 | { ISD::SRL, MVT::v4i64, 2*4+2 }, // splat+shuffle sequence + split. | |||
820 | ||||
821 | { ISD::SRA, MVT::v16i8, 54 }, // unpacked cmpgtb sequence. | |||
822 | { ISD::SRA, MVT::v8i16, 32 }, // cmpgtb sequence. | |||
823 | { ISD::SRA, MVT::v4i32, 16 }, // Shift each lane + blend. | |||
824 | { ISD::SRA, MVT::v2i64, 12 }, // srl/xor/sub sequence. | |||
825 | { ISD::SRA, MVT::v4i64, 2*12+2 }, // srl/xor/sub sequence+split. | |||
826 | ||||
827 | { ISD::MUL, MVT::v16i8, 12 }, // extend/pmullw/trunc sequence. | |||
828 | { ISD::MUL, MVT::v8i16, 1 }, // pmullw | |||
829 | { ISD::MUL, MVT::v4i32, 6 }, // 3*pmuludq/4*shuffle | |||
830 | { ISD::MUL, MVT::v2i64, 8 }, // 3*pmuludq/3*shift/2*add | |||
831 | ||||
832 | { ISD::FDIV, MVT::f32, 23 }, // Pentium IV from http://www.agner.org/ | |||
833 | { ISD::FDIV, MVT::v4f32, 39 }, // Pentium IV from http://www.agner.org/ | |||
834 | { ISD::FDIV, MVT::f64, 38 }, // Pentium IV from http://www.agner.org/ | |||
835 | { ISD::FDIV, MVT::v2f64, 69 }, // Pentium IV from http://www.agner.org/ | |||
836 | ||||
837 | { ISD::FADD, MVT::f32, 2 }, // Pentium IV from http://www.agner.org/ | |||
838 | { ISD::FADD, MVT::f64, 2 }, // Pentium IV from http://www.agner.org/ | |||
839 | ||||
840 | { ISD::FSUB, MVT::f32, 2 }, // Pentium IV from http://www.agner.org/ | |||
841 | { ISD::FSUB, MVT::f64, 2 }, // Pentium IV from http://www.agner.org/ | |||
842 | }; | |||
843 | ||||
844 | if (ST->hasSSE2()) | |||
845 | if (const auto *Entry = CostTableLookup(SSE2CostTable, ISD, LT.second)) | |||
846 | return LT.first * Entry->Cost; | |||
847 | ||||
848 | static const CostTblEntry SSE1CostTable[] = { | |||
849 | { ISD::FDIV, MVT::f32, 17 }, // Pentium III from http://www.agner.org/ | |||
850 | { ISD::FDIV, MVT::v4f32, 34 }, // Pentium III from http://www.agner.org/ | |||
851 | ||||
852 | { ISD::FADD, MVT::f32, 1 }, // Pentium III from http://www.agner.org/ | |||
853 | { ISD::FADD, MVT::v4f32, 2 }, // Pentium III from http://www.agner.org/ | |||
854 | ||||
855 | { ISD::FSUB, MVT::f32, 1 }, // Pentium III from http://www.agner.org/ | |||
856 | { ISD::FSUB, MVT::v4f32, 2 }, // Pentium III from http://www.agner.org/ | |||
857 | ||||
858 | { ISD::ADD, MVT::i8, 1 }, // Pentium III from http://www.agner.org/ | |||
859 | { ISD::ADD, MVT::i16, 1 }, // Pentium III from http://www.agner.org/ | |||
860 | { ISD::ADD, MVT::i32, 1 }, // Pentium III from http://www.agner.org/ | |||
861 | ||||
862 | { ISD::SUB, MVT::i8, 1 }, // Pentium III from http://www.agner.org/ | |||
863 | { ISD::SUB, MVT::i16, 1 }, // Pentium III from http://www.agner.org/ | |||
864 | { ISD::SUB, MVT::i32, 1 }, // Pentium III from http://www.agner.org/ | |||
865 | }; | |||
866 | ||||
867 | if (ST->hasSSE1()) | |||
868 | if (const auto *Entry = CostTableLookup(SSE1CostTable, ISD, LT.second)) | |||
869 | return LT.first * Entry->Cost; | |||
870 | ||||
871 | // It is not a good idea to vectorize division. We have to scalarize it and | |||
872 | // in the process we will often end up having to spilling regular | |||
873 | // registers. The overhead of division is going to dominate most kernels | |||
874 | // anyways so try hard to prevent vectorization of division - it is | |||
875 | // generally a bad idea. Assume somewhat arbitrarily that we have to be able | |||
876 | // to hide "20 cycles" for each lane. | |||
877 | if (LT.second.isVector() && (ISD == ISD::SDIV || ISD == ISD::SREM || | |||
878 | ISD == ISD::UDIV || ISD == ISD::UREM)) { | |||
879 | int ScalarCost = getArithmeticInstrCost( | |||
880 | Opcode, Ty->getScalarType(), Op1Info, Op2Info, | |||
881 | TargetTransformInfo::OP_None, TargetTransformInfo::OP_None); | |||
882 | return 20 * LT.first * LT.second.getVectorNumElements() * ScalarCost; | |||
883 | } | |||
884 | ||||
885 | // Fallback to the default implementation. | |||
886 | return BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info); | |||
887 | } | |||
888 | ||||
889 | int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, | |||
890 | Type *SubTp) { | |||
891 | // 64-bit packed float vectors (v2f32) are widened to type v4f32. | |||
892 | // 64-bit packed integer vectors (v2i32) are widened to type v4i32. | |||
893 | std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp); | |||
894 | ||||
895 | // Treat Transpose as 2-op shuffles - there's no difference in lowering. | |||
896 | if (Kind == TTI::SK_Transpose) | |||
897 | Kind = TTI::SK_PermuteTwoSrc; | |||
898 | ||||
899 | // For Broadcasts we are splatting the first element from the first input | |||
900 | // register, so only need to reference that input and all the output | |||
901 | // registers are the same. | |||
902 | if (Kind == TTI::SK_Broadcast) | |||
903 | LT.first = 1; | |||
904 | ||||
905 | // Subvector extractions are free if they start at the beginning of a | |||
906 | // vector and cheap if the subvectors are aligned. | |||
907 | if (Kind == TTI::SK_ExtractSubvector && LT.second.isVector()) { | |||
908 | int NumElts = LT.second.getVectorNumElements(); | |||
909 | if ((Index % NumElts) == 0) | |||
910 | return 0; | |||
911 | std::pair<int, MVT> SubLT = TLI->getTypeLegalizationCost(DL, SubTp); | |||
912 | if (SubLT.second.isVector()) { | |||
913 | int NumSubElts = SubLT.second.getVectorNumElements(); | |||
914 | if ((Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0) | |||
915 | return SubLT.first; | |||
916 | // Handle some cases for widening legalization. For now we only handle | |||
917 | // cases where the original subvector was naturally aligned and evenly | |||
918 | // fit in its legalized subvector type. | |||
919 | // FIXME: Remove some of the alignment restrictions. | |||
920 | // FIXME: We can use permq for 64-bit or larger extracts from 256-bit | |||
921 | // vectors. | |||
922 | int OrigSubElts = SubTp->getVectorNumElements(); | |||
923 | if (NumSubElts > OrigSubElts && | |||
924 | (Index % OrigSubElts) == 0 && (NumSubElts % OrigSubElts) == 0 && | |||
925 | LT.second.getVectorElementType() == | |||
926 | SubLT.second.getVectorElementType() && | |||
927 | LT.second.getVectorElementType().getSizeInBits() == | |||
928 | Tp->getVectorElementType()->getPrimitiveSizeInBits()) { | |||
929 | assert(NumElts >= NumSubElts && NumElts > OrigSubElts &&((NumElts >= NumSubElts && NumElts > OrigSubElts && "Unexpected number of elements!") ? static_cast< void> (0) : __assert_fail ("NumElts >= NumSubElts && NumElts > OrigSubElts && \"Unexpected number of elements!\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp" , 930, __PRETTY_FUNCTION__)) | |||
930 | "Unexpected number of elements!")((NumElts >= NumSubElts && NumElts > OrigSubElts && "Unexpected number of elements!") ? static_cast< void> (0) : __assert_fail ("NumElts >= NumSubElts && NumElts > OrigSubElts && \"Unexpected number of elements!\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp" , 930, __PRETTY_FUNCTION__)); | |||
931 | Type *VecTy = VectorType::get(Tp->getVectorElementType(), | |||
932 | LT.second.getVectorNumElements()); | |||
933 | Type *SubTy = VectorType::get(Tp->getVectorElementType(), | |||
934 | SubLT.second.getVectorNumElements()); | |||
935 | int ExtractIndex = alignDown((Index % NumElts), NumSubElts); | |||
936 | int ExtractCost = getShuffleCost(TTI::SK_ExtractSubvector, VecTy, | |||
937 | ExtractIndex, SubTy); | |||
938 | ||||
939 | // If the original size is 32-bits or more, we can use pshufd. Otherwise | |||
940 | // if we have SSSE3 we can use pshufb. | |||
941 | if (SubTp->getPrimitiveSizeInBits() >= 32 || ST->hasSSSE3()) | |||
942 | return ExtractCost + 1; // pshufd or pshufb | |||
943 | ||||
944 | assert(SubTp->getPrimitiveSizeInBits() == 16 &&((SubTp->getPrimitiveSizeInBits() == 16 && "Unexpected vector size" ) ? static_cast<void> (0) : __assert_fail ("SubTp->getPrimitiveSizeInBits() == 16 && \"Unexpected vector size\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp" , 945, __PRETTY_FUNCTION__)) | |||
945 | "Unexpected vector size")((SubTp->getPrimitiveSizeInBits() == 16 && "Unexpected vector size" ) ? static_cast<void> (0) : __assert_fail ("SubTp->getPrimitiveSizeInBits() == 16 && \"Unexpected vector size\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp" , 945, __PRETTY_FUNCTION__)); | |||
946 | ||||
947 | return ExtractCost + 2; // worst case pshufhw + pshufd | |||
948 | } | |||
949 | } | |||
950 | } | |||
951 | ||||
952 | // We are going to permute multiple sources and the result will be in multiple | |||
953 | // destinations. Providing an accurate cost only for splits where the element | |||
954 | // type remains the same. | |||
955 | if (Kind == TTI::SK_PermuteSingleSrc && LT.first != 1) { | |||
956 | MVT LegalVT = LT.second; | |||
957 | if (LegalVT.isVector() && | |||
958 | LegalVT.getVectorElementType().getSizeInBits() == | |||
959 | Tp->getVectorElementType()->getPrimitiveSizeInBits() && | |||
960 | LegalVT.getVectorNumElements() < Tp->getVectorNumElements()) { | |||
961 | ||||
962 | unsigned VecTySize = DL.getTypeStoreSize(Tp); | |||
963 | unsigned LegalVTSize = LegalVT.getStoreSize(); | |||
964 | // Number of source vectors after legalization: | |||
965 | unsigned NumOfSrcs = (VecTySize + LegalVTSize - 1) / LegalVTSize; | |||
966 | // Number of destination vectors after legalization: | |||
967 | unsigned NumOfDests = LT.first; | |||
968 | ||||
969 | Type *SingleOpTy = VectorType::get(Tp->getVectorElementType(), | |||
970 | LegalVT.getVectorNumElements()); | |||
971 | ||||
972 | unsigned NumOfShuffles = (NumOfSrcs - 1) * NumOfDests; | |||
973 | return NumOfShuffles * | |||
974 | getShuffleCost(TTI::SK_PermuteTwoSrc, SingleOpTy, 0, nullptr); | |||
975 | } | |||
976 | ||||
977 | return BaseT::getShuffleCost(Kind, Tp, Index, SubTp); | |||
978 | } | |||
979 | ||||
980 | // For 2-input shuffles, we must account for splitting the 2 inputs into many. | |||
981 | if (Kind == TTI::SK_PermuteTwoSrc && LT.first != 1) { | |||
982 | // We assume that source and destination have the same vector type. | |||
983 | int NumOfDests = LT.first; | |||
984 | int NumOfShufflesPerDest = LT.first * 2 - 1; | |||
985 | LT.first = NumOfDests * NumOfShufflesPerDest; | |||
986 | } | |||
987 | ||||
988 | static const CostTblEntry AVX512VBMIShuffleTbl[] = { | |||
989 | {TTI::SK_Reverse, MVT::v64i8, 1}, // vpermb | |||
990 | {TTI::SK_Reverse, MVT::v32i8, 1}, // vpermb | |||
991 | ||||
992 | {TTI::SK_PermuteSingleSrc, MVT::v64i8, 1}, // vpermb | |||
993 | {TTI::SK_PermuteSingleSrc, MVT::v32i8, 1}, // vpermb | |||
994 | ||||
995 | {TTI::SK_PermuteTwoSrc, MVT::v64i8, 1}, // vpermt2b | |||
996 | {TTI::SK_PermuteTwoSrc, MVT::v32i8, 1}, // vpermt2b | |||
997 | {TTI::SK_PermuteTwoSrc, MVT::v16i8, 1} // vpermt2b | |||
998 | }; | |||
999 | ||||
1000 | if (ST->hasVBMI()) | |||
1001 | if (const auto *Entry = | |||
1002 | CostTableLookup(AVX512VBMIShuffleTbl, Kind, LT.second)) | |||
1003 | return LT.first * Entry->Cost; | |||
1004 | ||||
1005 | static const CostTblEntry AVX512BWShuffleTbl[] = { | |||
1006 | {TTI::SK_Broadcast, MVT::v32i16, 1}, // vpbroadcastw | |||
1007 | {TTI::SK_Broadcast, MVT::v64i8, 1}, // vpbroadcastb | |||
1008 | ||||
1009 | {TTI::SK_Reverse, MVT::v32i16, 1}, // vpermw | |||
1010 | {TTI::SK_Reverse, MVT::v16i16, 1}, // vpermw | |||
1011 | {TTI::SK_Reverse, MVT::v64i8, 2}, // pshufb + vshufi64x2 | |||
1012 | ||||
1013 | {TTI::SK_PermuteSingleSrc, MVT::v32i16, 1}, // vpermw | |||
1014 | {TTI::SK_PermuteSingleSrc, MVT::v16i16, 1}, // vpermw | |||
1015 | {TTI::SK_PermuteSingleSrc, MVT::v8i16, 1}, // vpermw | |||
1016 | {TTI::SK_PermuteSingleSrc, MVT::v64i8, 8}, // extend to v32i16 | |||
1017 | {TTI::SK_PermuteSingleSrc, MVT::v32i8, 3}, // vpermw + zext/trunc | |||
1018 | ||||
1019 | {TTI::SK_PermuteTwoSrc, MVT::v32i16, 1}, // vpermt2w | |||
1020 | {TTI::SK_PermuteTwoSrc, MVT::v16i16, 1}, // vpermt2w | |||
1021 | {TTI::SK_PermuteTwoSrc, MVT::v8i16, 1}, // vpermt2w | |||
1022 | {TTI::SK_PermuteTwoSrc, MVT::v32i8, 3}, // zext + vpermt2w + trunc | |||
1023 | {TTI::SK_PermuteTwoSrc, MVT::v64i8, 19}, // 6 * v32i8 + 1 | |||
1024 | {TTI::SK_PermuteTwoSrc, MVT::v16i8, 3} // zext + vpermt2w + trunc | |||
1025 | }; | |||
1026 | ||||
1027 | if (ST->hasBWI()) | |||
1028 | if (const auto *Entry = | |||
1029 | CostTableLookup(AVX512BWShuffleTbl, Kind, LT.second)) | |||
1030 | return LT.first * Entry->Cost; | |||
1031 | ||||
1032 | static const CostTblEntry AVX512ShuffleTbl[] = { | |||
1033 | {TTI::SK_Broadcast, MVT::v8f64, 1}, // vbroadcastpd | |||
1034 | {TTI::SK_Broadcast, MVT::v16f32, 1}, // vbroadcastps | |||
1035 | {TTI::SK_Broadcast, MVT::v8i64, 1}, // vpbroadcastq | |||
1036 | {TTI::SK_Broadcast, MVT::v16i32, 1}, // vpbroadcastd | |||
1037 | ||||
1038 | {TTI::SK_Reverse, MVT::v8f64, 1}, // vpermpd | |||
1039 | {TTI::SK_Reverse, MVT::v16f32, 1}, // vpermps | |||
1040 | {TTI::SK_Reverse, MVT::v8i64, 1}, // vpermq | |||
1041 | {TTI::SK_Reverse, MVT::v16i32, 1}, // vpermd | |||
1042 | ||||
1043 | {TTI::SK_PermuteSingleSrc, MVT::v8f64, 1}, // vpermpd | |||
1044 | {TTI::SK_PermuteSingleSrc, MVT::v4f64, 1}, // vpermpd | |||
1045 | {TTI::SK_PermuteSingleSrc, MVT::v2f64, 1}, // vpermpd | |||
1046 | {TTI::SK_PermuteSingleSrc, MVT::v16f32, 1}, // vpermps | |||
1047 | {TTI::SK_PermuteSingleSrc, MVT::v8f32, 1}, // vpermps | |||
1048 | {TTI::SK_PermuteSingleSrc, MVT::v4f32, 1}, // vpermps | |||
1049 | {TTI::SK_PermuteSingleSrc, MVT::v8i64, 1}, // vpermq | |||
1050 | {TTI::SK_PermuteSingleSrc, MVT::v4i64, 1}, // vpermq | |||
1051 | {TTI::SK_PermuteSingleSrc, MVT::v2i64, 1}, // vpermq | |||
1052 | {TTI::SK_PermuteSingleSrc, MVT::v16i32, 1}, // vpermd | |||
1053 | {TTI::SK_PermuteSingleSrc, MVT::v8i32, 1}, // vpermd | |||
1054 | {TTI::SK_PermuteSingleSrc, MVT::v4i32, 1}, // vpermd | |||
1055 | {TTI::SK_PermuteSingleSrc, MVT::v16i8, 1}, // pshufb | |||
1056 | ||||
1057 | {TTI::SK_PermuteTwoSrc, MVT::v8f64, 1}, // vpermt2pd | |||
1058 | {TTI::SK_PermuteTwoSrc, MVT::v16f32, 1}, // vpermt2ps | |||
1059 | {TTI::SK_PermuteTwoSrc, MVT::v8i64, 1}, // vpermt2q | |||
1060 | {TTI::SK_PermuteTwoSrc, MVT::v16i32, 1}, // vpermt2d | |||
1061 | {TTI::SK_PermuteTwoSrc, MVT::v4f64, 1}, // vpermt2pd | |||
1062 | {TTI::SK_PermuteTwoSrc, MVT::v8f32, 1}, // vpermt2ps | |||
1063 | {TTI::SK_PermuteTwoSrc, MVT::v4i64, 1}, // vpermt2q | |||
1064 | {TTI::SK_PermuteTwoSrc, MVT::v8i32, 1}, // vpermt2d | |||
1065 | {TTI::SK_PermuteTwoSrc, MVT::v2f64, 1}, // vpermt2pd | |||
1066 | {TTI::SK_PermuteTwoSrc, MVT::v4f32, 1}, // vpermt2ps | |||
1067 | {TTI::SK_PermuteTwoSrc, MVT::v2i64, 1}, // vpermt2q | |||
1068 | {TTI::SK_PermuteTwoSrc, MVT::v4i32, 1} // vpermt2d | |||
1069 | }; | |||
1070 | ||||
1071 | if (ST->hasAVX512()) | |||
1072 | if (const auto *Entry = CostTableLookup(AVX512ShuffleTbl, Kind, LT.second)) | |||
1073 | return LT.first * Entry->Cost; | |||
1074 | ||||
1075 | static const CostTblEntry AVX2ShuffleTbl[] = { | |||
1076 | {TTI::SK_Broadcast, MVT::v4f64, 1}, // vbroadcastpd | |||
1077 | {TTI::SK_Broadcast, MVT::v8f32, 1}, // vbroadcastps | |||
1078 | {TTI::SK_Broadcast, MVT::v4i64, 1}, // vpbroadcastq | |||
1079 | {TTI::SK_Broadcast, MVT::v8i32, 1}, // vpbroadcastd | |||
1080 | {TTI::SK_Broadcast, MVT::v16i16, 1}, // vpbroadcastw | |||
1081 | {TTI::SK_Broadcast, MVT::v32i8, 1}, // vpbroadcastb | |||
1082 | ||||
1083 | {TTI::SK_Reverse, MVT::v4f64, 1}, // vpermpd | |||
1084 | {TTI::SK_Reverse, MVT::v8f32, 1}, // vpermps | |||
1085 | {TTI::SK_Reverse, MVT::v4i64, 1}, // vpermq | |||
1086 | {TTI::SK_Reverse, MVT::v8i32, 1}, // vpermd | |||
1087 | {TTI::SK_Reverse, MVT::v16i16, 2}, // vperm2i128 + pshufb | |||
1088 | {TTI::SK_Reverse, MVT::v32i8, 2}, // vperm2i128 + pshufb | |||
1089 | ||||
1090 | {TTI::SK_Select, MVT::v16i16, 1}, // vpblendvb | |||
1091 | {TTI::SK_Select, MVT::v32i8, 1}, // vpblendvb | |||
1092 | ||||
1093 | {TTI::SK_PermuteSingleSrc, MVT::v4f64, 1}, // vpermpd | |||
1094 | {TTI::SK_PermuteSingleSrc, MVT::v8f32, 1}, // vpermps | |||
1095 | {TTI::SK_PermuteSingleSrc, MVT::v4i64, 1}, // vpermq | |||
1096 | {TTI::SK_PermuteSingleSrc, MVT::v8i32, 1}, // vpermd | |||
1097 | {TTI::SK_PermuteSingleSrc, MVT::v16i16, 4}, // vperm2i128 + 2*vpshufb | |||
1098 | // + vpblendvb | |||
1099 | {TTI::SK_PermuteSingleSrc, MVT::v32i8, 4}, // vperm2i128 + 2*vpshufb | |||
1100 | // + vpblendvb | |||
1101 | ||||
1102 | {TTI::SK_PermuteTwoSrc, MVT::v4f64, 3}, // 2*vpermpd + vblendpd | |||
1103 | {TTI::SK_PermuteTwoSrc, MVT::v8f32, 3}, // 2*vpermps + vblendps | |||
1104 | {TTI::SK_PermuteTwoSrc, MVT::v4i64, 3}, // 2*vpermq + vpblendd | |||
1105 | {TTI::SK_PermuteTwoSrc, MVT::v8i32, 3}, // 2*vpermd + vpblendd | |||
1106 | {TTI::SK_PermuteTwoSrc, MVT::v16i16, 7}, // 2*vperm2i128 + 4*vpshufb | |||
1107 | // + vpblendvb | |||
1108 | {TTI::SK_PermuteTwoSrc, MVT::v32i8, 7}, // 2*vperm2i128 + 4*vpshufb | |||
1109 | // + vpblendvb | |||
1110 | }; | |||
1111 | ||||
1112 | if (ST->hasAVX2()) | |||
1113 | if (const auto *Entry = CostTableLookup(AVX2ShuffleTbl, Kind, LT.second)) | |||
1114 | return LT.first * Entry->Cost; | |||
1115 | ||||
1116 | static const CostTblEntry XOPShuffleTbl[] = { | |||
1117 | {TTI::SK_PermuteSingleSrc, MVT::v4f64, 2}, // vperm2f128 + vpermil2pd | |||
1118 | {TTI::SK_PermuteSingleSrc, MVT::v8f32, 2}, // vperm2f128 + vpermil2ps | |||
1119 | {TTI::SK_PermuteSingleSrc, MVT::v4i64, 2}, // vperm2f128 + vpermil2pd | |||
1120 | {TTI::SK_PermuteSingleSrc, MVT::v8i32, 2}, // vperm2f128 + vpermil2ps | |||
1121 | {TTI::SK_PermuteSingleSrc, MVT::v16i16, 4}, // vextractf128 + 2*vpperm | |||
1122 | // + vinsertf128 | |||
1123 | {TTI::SK_PermuteSingleSrc, MVT::v32i8, 4}, // vextractf128 + 2*vpperm | |||
1124 | // + vinsertf128 | |||
1125 | ||||
1126 | {TTI::SK_PermuteTwoSrc, MVT::v16i16, 9}, // 2*vextractf128 + 6*vpperm | |||
1127 | // + vinsertf128 | |||
1128 | {TTI::SK_PermuteTwoSrc, MVT::v8i16, 1}, // vpperm | |||
1129 | {TTI::SK_PermuteTwoSrc, MVT::v32i8, 9}, // 2*vextractf128 + 6*vpperm | |||
1130 | // + vinsertf128 | |||
1131 | {TTI::SK_PermuteTwoSrc, MVT::v16i8, 1}, // vpperm | |||
1132 | }; | |||
1133 | ||||
1134 | if (ST->hasXOP()) | |||
1135 | if (const auto *Entry = CostTableLookup(XOPShuffleTbl, Kind, LT.second)) | |||
1136 | return LT.first * Entry->Cost; | |||
1137 | ||||
1138 | static const CostTblEntry AVX1ShuffleTbl[] = { | |||
1139 | {TTI::SK_Broadcast, MVT::v4f64, 2}, // vperm2f128 + vpermilpd | |||
1140 | {TTI::SK_Broadcast, MVT::v8f32, 2}, // vperm2f128 + vpermilps | |||
1141 | {TTI::SK_Broadcast, MVT::v4i64, 2}, // vperm2f128 + vpermilpd | |||
1142 | {TTI::SK_Broadcast, MVT::v8i32, 2}, // vperm2f128 + vpermilps | |||
1143 | {TTI::SK_Broadcast, MVT::v16i16, 3}, // vpshuflw + vpshufd + vinsertf128 | |||
1144 | {TTI::SK_Broadcast, MVT::v32i8, 2}, // vpshufb + vinsertf128 | |||
1145 | ||||
1146 | {TTI::SK_Reverse, MVT::v4f64, 2}, // vperm2f128 + vpermilpd | |||
1147 | {TTI::SK_Reverse, MVT::v8f32, 2}, // vperm2f128 + vpermilps | |||
1148 | {TTI::SK_Reverse, MVT::v4i64, 2}, // vperm2f128 + vpermilpd | |||
1149 | {TTI::SK_Reverse, MVT::v8i32, 2}, // vperm2f128 + vpermilps | |||
1150 | {TTI::SK_Reverse, MVT::v16i16, 4}, // vextractf128 + 2*pshufb | |||
1151 | // + vinsertf128 | |||
1152 | {TTI::SK_Reverse, MVT::v32i8, 4}, // vextractf128 + 2*pshufb | |||
1153 | // + vinsertf128 | |||
1154 | ||||
1155 | {TTI::SK_Select, MVT::v4i64, 1}, // vblendpd | |||
1156 | {TTI::SK_Select, MVT::v4f64, 1}, // vblendpd | |||
1157 | {TTI::SK_Select, MVT::v8i32, 1}, // vblendps | |||
1158 | {TTI::SK_Select, MVT::v8f32, 1}, // vblendps | |||
1159 | {TTI::SK_Select, MVT::v16i16, 3}, // vpand + vpandn + vpor | |||
1160 | {TTI::SK_Select, MVT::v32i8, 3}, // vpand + vpandn + vpor | |||
1161 | ||||
1162 | {TTI::SK_PermuteSingleSrc, MVT::v4f64, 2}, // vperm2f128 + vshufpd | |||
1163 | {TTI::SK_PermuteSingleSrc, MVT::v4i64, 2}, // vperm2f128 + vshufpd | |||
1164 | {TTI::SK_PermuteSingleSrc, MVT::v8f32, 4}, // 2*vperm2f128 + 2*vshufps | |||
1165 | {TTI::SK_PermuteSingleSrc, MVT::v8i32, 4}, // 2*vperm2f128 + 2*vshufps | |||
1166 | {TTI::SK_PermuteSingleSrc, MVT::v16i16, 8}, // vextractf128 + 4*pshufb | |||
1167 | // + 2*por + vinsertf128 | |||
1168 | {TTI::SK_PermuteSingleSrc, MVT::v32i8, 8}, // vextractf128 + 4*pshufb | |||
1169 | // + 2*por + vinsertf128 | |||
1170 | ||||
1171 | {TTI::SK_PermuteTwoSrc, MVT::v4f64, 3}, // 2*vperm2f128 + vshufpd | |||
1172 | {TTI::SK_PermuteTwoSrc, MVT::v4i64, 3}, // 2*vperm2f128 + vshufpd | |||
1173 | {TTI::SK_PermuteTwoSrc, MVT::v8f32, 4}, // 2*vperm2f128 + 2*vshufps | |||
1174 | {TTI::SK_PermuteTwoSrc, MVT::v8i32, 4}, // 2*vperm2f128 + 2*vshufps | |||
1175 | {TTI::SK_PermuteTwoSrc, MVT::v16i16, 15}, // 2*vextractf128 + 8*pshufb | |||
1176 | // + 4*por + vinsertf128 | |||
1177 | {TTI::SK_PermuteTwoSrc, MVT::v32i8, 15}, // 2*vextractf128 + 8*pshufb | |||
1178 | // + 4*por + vinsertf128 | |||
1179 | }; | |||
1180 | ||||
1181 | if (ST->hasAVX()) | |||
1182 | if (const auto *Entry = CostTableLookup(AVX1ShuffleTbl, Kind, LT.second)) | |||
1183 | return LT.first * Entry->Cost; | |||
1184 | ||||
1185 | static const CostTblEntry SSE41ShuffleTbl[] = { | |||
1186 | {TTI::SK_Select, MVT::v2i64, 1}, // pblendw | |||
1187 | {TTI::SK_Select, MVT::v2f64, 1}, // movsd | |||
1188 | {TTI::SK_Select, MVT::v4i32, 1}, // pblendw | |||
1189 | {TTI::SK_Select, MVT::v4f32, 1}, // blendps | |||
1190 | {TTI::SK_Select, MVT::v8i16, 1}, // pblendw | |||
1191 | {TTI::SK_Select, MVT::v16i8, 1} // pblendvb | |||
1192 | }; | |||
1193 | ||||
1194 | if (ST->hasSSE41()) | |||
1195 | if (const auto *Entry = CostTableLookup(SSE41ShuffleTbl, Kind, LT.second)) | |||
1196 | return LT.first * Entry->Cost; | |||
1197 | ||||
1198 | static const CostTblEntry SSSE3ShuffleTbl[] = { | |||
1199 | {TTI::SK_Broadcast, MVT::v8i16, 1}, // pshufb | |||
1200 | {TTI::SK_Broadcast, MVT::v16i8, 1}, // pshufb | |||
1201 | ||||
1202 | {TTI::SK_Reverse, MVT::v8i16, 1}, // pshufb | |||
1203 | {TTI::SK_Reverse, MVT::v16i8, 1}, // pshufb | |||
1204 | ||||
1205 | {TTI::SK_Select, MVT::v8i16, 3}, // 2*pshufb + por | |||
1206 | {TTI::SK_Select, MVT::v16i8, 3}, // 2*pshufb + por | |||
1207 | ||||
1208 | {TTI::SK_PermuteSingleSrc, MVT::v8i16, 1}, // pshufb | |||
1209 | {TTI::SK_PermuteSingleSrc, MVT::v16i8, 1}, // pshufb | |||
1210 | ||||
1211 | {TTI::SK_PermuteTwoSrc, MVT::v8i16, 3}, // 2*pshufb + por | |||
1212 | {TTI::SK_PermuteTwoSrc, MVT::v16i8, 3}, // 2*pshufb + por | |||
1213 | }; | |||
1214 | ||||
1215 | if (ST->hasSSSE3()) | |||
1216 | if (const auto *Entry = CostTableLookup(SSSE3ShuffleTbl, Kind, LT.second)) | |||
1217 | return LT.first * Entry->Cost; | |||
1218 | ||||
1219 | static const CostTblEntry SSE2ShuffleTbl[] = { | |||
1220 | {TTI::SK_Broadcast, MVT::v2f64, 1}, // shufpd | |||
1221 | {TTI::SK_Broadcast, MVT::v2i64, 1}, // pshufd | |||
1222 | {TTI::SK_Broadcast, MVT::v4i32, 1}, // pshufd | |||
1223 | {TTI::SK_Broadcast, MVT::v8i16, 2}, // pshuflw + pshufd | |||
1224 | {TTI::SK_Broadcast, MVT::v16i8, 3}, // unpck + pshuflw + pshufd | |||
1225 | ||||
1226 | {TTI::SK_Reverse, MVT::v2f64, 1}, // shufpd | |||
1227 | {TTI::SK_Reverse, MVT::v2i64, 1}, // pshufd | |||
1228 | {TTI::SK_Reverse, MVT::v4i32, 1}, // pshufd | |||
1229 | {TTI::SK_Reverse, MVT::v8i16, 3}, // pshuflw + pshufhw + pshufd | |||
1230 | {TTI::SK_Reverse, MVT::v16i8, 9}, // 2*pshuflw + 2*pshufhw | |||
1231 | // + 2*pshufd + 2*unpck + packus | |||
1232 | ||||
1233 | {TTI::SK_Select, MVT::v2i64, 1}, // movsd | |||
1234 | {TTI::SK_Select, MVT::v2f64, 1}, // movsd | |||
1235 | {TTI::SK_Select, MVT::v4i32, 2}, // 2*shufps | |||
1236 | {TTI::SK_Select, MVT::v8i16, 3}, // pand + pandn + por | |||
1237 | {TTI::SK_Select, MVT::v16i8, 3}, // pand + pandn + por | |||
1238 | ||||
1239 | {TTI::SK_PermuteSingleSrc, MVT::v2f64, 1}, // shufpd | |||
1240 | {TTI::SK_PermuteSingleSrc, MVT::v2i64, 1}, // pshufd | |||
1241 | {TTI::SK_PermuteSingleSrc, MVT::v4i32, 1}, // pshufd | |||
1242 | {TTI::SK_PermuteSingleSrc, MVT::v8i16, 5}, // 2*pshuflw + 2*pshufhw | |||
1243 | // + pshufd/unpck | |||
1244 | { TTI::SK_PermuteSingleSrc, MVT::v16i8, 10 }, // 2*pshuflw + 2*pshufhw | |||
1245 | // + 2*pshufd + 2*unpck + 2*packus | |||
1246 | ||||
1247 | { TTI::SK_PermuteTwoSrc, MVT::v2f64, 1 }, // shufpd | |||
1248 | { TTI::SK_PermuteTwoSrc, MVT::v2i64, 1 }, // shufpd | |||
1249 | { TTI::SK_PermuteTwoSrc, MVT::v4i32, 2 }, // 2*{unpck,movsd,pshufd} | |||
1250 | { TTI::SK_PermuteTwoSrc, MVT::v8i16, 8 }, // blend+permute | |||
1251 | { TTI::SK_PermuteTwoSrc, MVT::v16i8, 13 }, // blend+permute | |||
1252 | }; | |||
1253 | ||||
1254 | if (ST->hasSSE2()) | |||
1255 | if (const auto *Entry = CostTableLookup(SSE2ShuffleTbl, Kind, LT.second)) | |||
1256 | return LT.first * Entry->Cost; | |||
1257 | ||||
1258 | static const CostTblEntry SSE1ShuffleTbl[] = { | |||
1259 | { TTI::SK_Broadcast, MVT::v4f32, 1 }, // shufps | |||
1260 | { TTI::SK_Reverse, MVT::v4f32, 1 }, // shufps | |||
1261 | { TTI::SK_Select, MVT::v4f32, 2 }, // 2*shufps | |||
1262 | { TTI::SK_PermuteSingleSrc, MVT::v4f32, 1 }, // shufps | |||
1263 | { TTI::SK_PermuteTwoSrc, MVT::v4f32, 2 }, // 2*shufps | |||
1264 | }; | |||
1265 | ||||
1266 | if (ST->hasSSE1()) | |||
1267 | if (const auto *Entry = CostTableLookup(SSE1ShuffleTbl, Kind, LT.second)) | |||
1268 | return LT.first * Entry->Cost; | |||
1269 | ||||
1270 | return BaseT::getShuffleCost(Kind, Tp, Index, SubTp); | |||
1271 | } | |||
1272 | ||||
1273 | int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, | |||
1274 | const Instruction *I) { | |||
1275 | int ISD = TLI->InstructionOpcodeToISD(Opcode); | |||
1276 | assert(ISD && "Invalid opcode")((ISD && "Invalid opcode") ? static_cast<void> ( 0) : __assert_fail ("ISD && \"Invalid opcode\"", "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp" , 1276, __PRETTY_FUNCTION__)); | |||
1277 | ||||
1278 | // FIXME: Need a better design of the cost table to handle non-simple types of | |||
1279 | // potential massive combinations (elem_num x src_type x dst_type). | |||
1280 | ||||
1281 | static const TypeConversionCostTblEntry AVX512BWConversionTbl[] { | |||
1282 | { ISD::SIGN_EXTEND, MVT::v32i16, MVT::v32i8, 1 }, | |||
1283 | { ISD::ZERO_EXTEND, MVT::v32i16, MVT::v32i8, 1 }, | |||
1284 | ||||
1285 | // Mask sign extend has an instruction. | |||
1286 | { ISD::SIGN_EXTEND, MVT::v8i16, MVT::v8i1, 1 }, | |||
1287 | { ISD::SIGN_EXTEND, MVT::v16i8, MVT::v16i1, 1 }, | |||
1288 | { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i1, 1 }, | |||
1289 | { ISD::SIGN_EXTEND, MVT::v32i8, MVT::v32i1, 1 }, | |||
1290 | { ISD::SIGN_EXTEND, MVT::v32i16, MVT::v32i1, 1 }, | |||
1291 | { ISD::SIGN_EXTEND, MVT::v64i8, MVT::v64i1, 1 }, | |||
1292 | ||||
1293 | // Mask zero extend is a load + broadcast. | |||
1294 | { ISD::ZERO_EXTEND, MVT::v8i16, MVT::v8i1, 2 }, | |||
1295 | { ISD::ZERO_EXTEND, MVT::v16i8, MVT::v16i1, 2 }, | |||
1296 | { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i1, 2 }, | |||
1297 | { ISD::ZERO_EXTEND, MVT::v32i8, MVT::v32i1, 2 }, | |||
1298 | { ISD::ZERO_EXTEND, MVT::v32i16, MVT::v32i1, 2 }, | |||
1299 | { ISD::ZERO_EXTEND, MVT::v64i8, MVT::v64i1, 2 }, | |||
1300 | }; | |||
1301 | ||||
1302 | static const TypeConversionCostTblEntry AVX512DQConversionTbl[] = { | |||
1303 | { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i64, 1 }, | |||
1304 | { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 }, | |||
1305 | { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i64, 1 }, | |||
1306 | { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i64, 1 }, | |||
1307 | { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i64, 1 }, | |||
1308 | { ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i64, 1 }, | |||
1309 | ||||
1310 | { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i64, 1 }, | |||
1311 | { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 }, | |||
1312 | { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i64, 1 }, | |||
1313 | { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i64, 1 }, | |||
1314 | { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i64, 1 }, | |||
1315 | { ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i64, 1 }, | |||
1316 | ||||
1317 | { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f32, 1 }, | |||
1318 | { ISD::FP_TO_SINT, MVT::v4i64, MVT::v4f32, 1 }, | |||
1319 | { ISD::FP_TO_SINT, MVT::v8i64, MVT::v8f32, 1 }, | |||
1320 | { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f64, 1 }, | |||
1321 | { ISD::FP_TO_SINT, MVT::v4i64, MVT::v4f64, 1 }, | |||
1322 | { ISD::FP_TO_SINT, MVT::v8i64, MVT::v8f64, 1 }, | |||
1323 | ||||
1324 | { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f32, 1 }, | |||
1325 | { ISD::FP_TO_UINT, MVT::v4i64, MVT::v4f32, 1 }, | |||
1326 | { ISD::FP_TO_UINT, MVT::v8i64, MVT::v8f32, 1 }, | |||
1327 | { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f64, 1 }, | |||
1328 | { ISD::FP_TO_UINT, MVT::v4i64, MVT::v4f64, 1 }, | |||
1329 | { ISD::FP_TO_UINT, MVT::v8i64, MVT::v8f64, 1 }, | |||
1330 | }; | |||
1331 | ||||
1332 | // TODO: For AVX512DQ + AVX512VL, we also have cheap casts for 128-bit and | |||
1333 | // 256-bit wide vectors. | |||
1334 | ||||
1335 | static const TypeConversionCostTblEntry AVX512FConversionTbl[] = { | |||
1336 | { ISD::FP_EXTEND, MVT::v8f64, MVT::v8f32, 1 }, | |||
1337 | { ISD::FP_EXTEND, MVT::v8f64, MVT::v16f32, 3 }, | |||
1338 | { ISD::FP_ROUND, MVT::v8f32, MVT::v8f64, 1 }, | |||
1339 | ||||
1340 | { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 1 }, | |||
1341 | { ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, 1 }, | |||
1342 | { ISD::TRUNCATE, MVT::v8i16, MVT::v8i64, 1 }, | |||
1343 | { ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 1 }, | |||
1344 | ||||
1345 | // v16i1 -> v16i32 - load + broadcast | |||
1346 | { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i1, 2 }, | |||
1347 | { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i1, 2 }, | |||
1348 | { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 1 }, | |||
1349 | { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 1 }, | |||
1350 | { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i16, 1 }, | |||
1351 | { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i16, 1 }, | |||
1352 | { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i8, 1 }, | |||
1353 | { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i8, 1 }, | |||
1354 | { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i16, 1 }, | |||
1355 | { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i16, 1 }, | |||
1356 | { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i32, 1 }, | |||
1357 | { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i32, 1 }, | |||
1358 | ||||
1359 | { ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i1, 4 }, | |||
1360 | { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i1, 3 }, | |||
1361 | { ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i8, 2 }, | |||
1362 | { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i8, 2 }, | |||
1363 | { ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i16, 2 }, | |||
1364 | { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i16, 2 }, | |||
1365 | { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i32, 1 }, | |||
1366 | { ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i32, 1 }, | |||
1367 | ||||
1368 | { ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i1, 4 }, | |||
1369 | { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i1, 3 }, | |||
1370 | { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i8, 2 }, | |||
1371 | { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i8, 2 }, | |||
1372 | { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 2 }, | |||
1373 | { ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i8, 2 }, | |||
1374 | { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i8, 2 }, | |||
1375 | { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i16, 5 }, | |||
1376 | { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i16, 2 }, | |||
1377 | { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 2 }, | |||
1378 | { ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i16, 2 }, | |||
1379 | { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i16, 2 }, | |||
1380 | { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 2 }, | |||
1381 | { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 1 }, | |||
1382 | { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 }, | |||
1383 | { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i32, 1 }, | |||
1384 | { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i32, 1 }, | |||
1385 | { ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i32, 1 }, | |||
1386 | { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i32, 1 }, | |||
1387 | { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i64, 5 }, | |||
1388 | { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i64, 26 }, | |||
1389 | { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 5 }, | |||
1390 | { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i64, 5 }, | |||
1391 | { ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i64, 5 }, | |||
1392 | ||||
1393 | { ISD::UINT_TO_FP, MVT::f32, MVT::i64, 1 }, | |||
1394 | { ISD::UINT_TO_FP, MVT::f64, MVT::i64, 1 }, | |||
1395 | { ISD::FP_TO_UINT, MVT::i64, MVT::f32, 1 }, | |||
1396 | { ISD::FP_TO_UINT, MVT::i64, MVT::f64, 1 }, | |||
1397 | ||||
1398 | { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f32, 1 }, | |||
1399 | { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 }, | |||
1400 | { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f64, 1 }, | |||
1401 | { ISD::FP_TO_UINT, MVT::v8i32, MVT::v8f32, 1 }, | |||
1402 | { ISD::FP_TO_UINT, MVT::v8i32, MVT::v8f64, 1 }, | |||
1403 | { ISD::FP_TO_UINT, MVT::v8i16, MVT::v8f64, 2 }, | |||
1404 | { ISD::FP_TO_UINT, MVT::v8i8, MVT::v8f64, 2 }, | |||
1405 | { ISD::FP_TO_UINT, MVT::v16i32, MVT::v16f32, 1 }, | |||
1406 | { ISD::FP_TO_UINT, MVT::v16i16, MVT::v16f32, 2 }, | |||
1407 | { ISD::FP_TO_UINT, MVT::v16i8, MVT::v16f32, 2 }, | |||
1408 | }; | |||
1409 | ||||
1410 | static const TypeConversionCostTblEntry AVX2ConversionTbl[] = { | |||
1411 | { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i1, 3 }, | |||
1412 | { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i1, 3 }, | |||
1413 | { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 3 }, | |||
1414 | { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 3 }, | |||
1415 | { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 1 }, | |||
1416 | { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i8, 1 }, | |||
1417 | { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 1 }, | |||
1418 | { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 1 }, | |||
1419 | { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 1 }, | |||
1420 | { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 1 }, | |||
1421 | { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 1 }, | |||
1422 | { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 1 }, | |||
1423 | { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 1 }, | |||
1424 | { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 1 }, | |||
1425 | { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 1 }, | |||
1426 | { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 1 }, | |||
1427 | { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i16, 3 }, | |||
1428 | { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i16, 3 }, | |||
1429 | ||||
1430 | { ISD::TRUNCATE, MVT::v4i8, MVT::v4i64, 2 }, | |||
1431 | { ISD::TRUNCATE, MVT::v4i16, MVT::v4i64, 2 }, | |||
1432 | { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 2 }, | |||
1433 | { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 2 }, | |||
1434 | { ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 2 }, | |||
1435 | { ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 4 }, | |||
1436 | ||||
1437 | { ISD::FP_EXTEND, MVT::v8f64, MVT::v8f32, 3 }, | |||
1438 | { ISD::FP_ROUND, MVT::v8f32, MVT::v8f64, 3 }, | |||
1439 | ||||
1440 | { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i32, 8 }, | |||
1441 | }; | |||
1442 | ||||
1443 | static const TypeConversionCostTblEntry AVXConversionTbl[] = { | |||
1444 | { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i1, 6 }, | |||
1445 | { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i1, 4 }, | |||
1446 | { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 7 }, | |||
1447 | { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 4 }, | |||
1448 | { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 4 }, | |||
1449 | { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i8, 4 }, | |||
1450 | { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 4 }, | |||
1451 | { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 4 }, | |||
1452 | { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 4 }, | |||
1453 | { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 4 }, | |||
1454 | { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 4 }, | |||
1455 | { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 }, | |||
1456 | { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 4 }, | |||
1457 | { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 4 }, | |||
1458 | { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 4 }, | |||
1459 | { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 4 }, | |||
1460 | ||||
1461 | { ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 4 }, | |||
1462 | { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 4 }, | |||
1463 | { ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 5 }, | |||
1464 | { ISD::TRUNCATE, MVT::v4i8, MVT::v4i64, 4 }, | |||
1465 | { ISD::TRUNCATE, MVT::v4i16, MVT::v4i64, 4 }, | |||
1466 | { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 4 }, | |||
1467 | { ISD::TRUNCATE, MVT::v8i8, MVT::v8i64, 11 }, | |||
1468 | { ISD::TRUNCATE, MVT::v8i16, MVT::v8i64, 9 }, | |||
1469 | { ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 9 }, | |||
1470 | { ISD::TRUNCATE, MVT::v16i8, MVT::v16i64, 11 }, | |||
1471 | ||||
1472 | { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i1, 3 }, | |||
1473 | { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i1, 3 }, | |||
1474 | { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i1, 8 }, | |||
1475 | { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 }, | |||
1476 | { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i8, 3 }, | |||
1477 | { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i8, 8 }, | |||
1478 | { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i16, 3 }, | |||
1479 | { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i16, 3 }, | |||
1480 | { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 5 }, | |||
1481 | { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 }, | |||
1482 | { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i32, 1 }, | |||
1483 | { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i32, 1 }, | |||
1484 | ||||
1485 | { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i1, 7 }, | |||
1486 | { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i1, 7 }, | |||
1487 | { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i1, 6 }, | |||
1488 | { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 2 }, | |||
1489 | { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i8, 2 }, | |||
1490 | { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 5 }, | |||
1491 | { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 }, | |||
1492 | { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i16, 2 }, | |||
1493 | { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 5 }, | |||
1494 | { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 6 }, | |||
1495 | { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 6 }, | |||
1496 | { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i32, 6 }, | |||
1497 | { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i32, 9 }, | |||
1498 | { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 5 }, | |||
1499 | { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i64, 6 }, | |||
1500 | // The generic code to compute the scalar overhead is currently broken. | |||
1501 | // Workaround this limitation by estimating the scalarization overhead | |||
1502 | // here. We have roughly 10 instructions per scalar element. | |||
1503 | // Multiply that by the vector width. | |||
1504 | // FIXME: remove that when PR19268 is fixed. | |||
1505 | { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i64, 13 }, | |||
1506 | { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i64, 13 }, | |||
1507 | ||||
1508 | { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 1 }, | |||
1509 | { ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f32, 7 }, | |||
1510 | // This node is expanded into scalarized operations but BasicTTI is overly | |||
1511 | // optimistic estimating its cost. It computes 3 per element (one | |||
1512 | // vector-extract, one scalar conversion and one vector-insert). The | |||
1513 | // problem is that the inserts form a read-modify-write chain so latency | |||
1514 | // should be factored in too. Inflating the cost per element by 1. | |||
1515 | { ISD::FP_TO_UINT, MVT::v8i32, MVT::v8f32, 8*4 }, | |||
1516 | { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f64, 4*4 }, | |||
1517 | ||||
1518 | { ISD::FP_EXTEND, MVT::v4f64, MVT::v4f32, 1 }, | |||
1519 | { ISD::FP_ROUND, MVT::v4f32, MVT::v4f64, 1 }, | |||
1520 | }; | |||
1521 | ||||
1522 | static const TypeConversionCostTblEntry SSE41ConversionTbl[] = { | |||
1523 | { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i8, 2 }, | |||
1524 | { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 2 }, | |||
1525 | { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 2 }, | |||
1526 | { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 2 }, | |||
1527 | { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 2 }, | |||
1528 | { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 2 }, | |||
1529 | ||||
1530 | { ISD::ZERO_EXTEND, MVT::v4i16, MVT::v4i8, 1 }, | |||
1531 | { ISD::SIGN_EXTEND, MVT::v4i16, MVT::v4i8, 2 }, | |||
1532 | { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i8, 1 }, | |||
1533 | { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i8, 1 }, | |||
1534 | { ISD::ZERO_EXTEND, MVT::v8i16, MVT::v8i8, 1 }, | |||
1535 | { ISD::SIGN_EXTEND, MVT::v8i16, MVT::v8i8, 1 }, | |||
1536 | { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 2 }, | |||
1537 | { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 2 }, | |||
1538 | { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 2 }, | |||
1539 | { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 2 }, | |||
1540 | { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 4 }, | |||
1541 | { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 4 }, | |||
1542 | { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 1 }, | |||
1543 | { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 1 }, | |||
1544 | { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 2 }, | |||
1545 | { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 2 }, | |||
1546 | { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i16, 4 }, | |||
1547 | { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i16, 4 }, | |||
1548 | ||||
1549 | { ISD::TRUNCATE, MVT::v4i8, MVT::v4i16, 2 }, | |||
1550 | { ISD::TRUNCATE, MVT::v8i8, MVT::v8i16, 1 }, | |||
1551 | { ISD::TRUNCATE, MVT::v4i8, MVT::v4i32, 1 }, | |||
1552 | { ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 1 }, | |||
1553 | { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 3 }, | |||
1554 | { ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 3 }, | |||
1555 | { ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, 6 }, | |||
1556 | { ISD::TRUNCATE, MVT::v2i8, MVT::v2i64, 1 }, // PSHUFB | |||
1557 | ||||
1558 | { ISD::UINT_TO_FP, MVT::f32, MVT::i64, 4 }, | |||
1559 | { ISD::UINT_TO_FP, MVT::f64, MVT::i64, 4 }, | |||
1560 | }; | |||
1561 | ||||
1562 | static const TypeConversionCostTblEntry SSE2ConversionTbl[] = { | |||
1563 | // These are somewhat magic numbers justified by looking at the output of | |||
1564 | // Intel's IACA, running some kernels and making sure when we take | |||
1565 | // legalization into account the throughput will be overestimated. | |||
1566 | { ISD::SINT_TO_FP, MVT::v4f32, MVT::v16i8, 8 }, | |||
1567 | { ISD::SINT_TO_FP, MVT::v2f64, MVT::v16i8, 16*10 }, | |||
1568 | { ISD::SINT_TO_FP, MVT::v4f32, MVT::v8i16, 15 }, | |||
1569 | { ISD::SINT_TO_FP, MVT::v2f64, MVT::v8i16, 8*10 }, | |||
1570 | { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 5 }, | |||
1571 | { ISD::SINT_TO_FP, MVT::v2f64, MVT::v4i32, 2*10 }, | |||
1572 | { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2*10 }, | |||
1573 | { ISD::SINT_TO_FP, MVT::v4f32, MVT::v2i64, 15 }, | |||
1574 | { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 2*10 }, | |||
1575 | ||||
1576 | { ISD::UINT_TO_FP, MVT::v2f64, MVT::v16i8, 16*10 }, | |||
1577 | { ISD::UINT_TO_FP, MVT::v4f32, MVT::v16i8, 8 }, | |||
1578 | { ISD::UINT_TO_FP, MVT::v4f32, MVT::v8i16, 15 }, | |||
1579 | { ISD::UINT_TO_FP, MVT::v2f64, MVT::v8i16, 8*10 }, | |||
1580 | { ISD::UINT_TO_FP, MVT::v2f64, MVT::v4i32, 4*10 }, | |||
1581 | { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 8 }, | |||
1582 | { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 6 }, | |||
1583 | { ISD::UINT_TO_FP, MVT::v4f32, MVT::v2i64, 15 }, | |||
1584 | ||||
1585 | { ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 2 }, | |||
1586 | { ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f64, 2 }, | |||
1587 | ||||
1588 | { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 3 }, | |||
1589 | ||||
1590 | { ISD::UINT_TO_FP, MVT::f32, MVT::i64, 6 }, | |||
1591 | { ISD::UINT_TO_FP, MVT::f64, MVT::i64, 6 }, | |||
1592 | ||||
1593 | { ISD::FP_TO_UINT, MVT::i64, MVT::f32, 4 }, | |||
1594 | { ISD::FP_TO_UINT, MVT::i64, MVT::f64, 4 }, | |||
1595 | ||||
1596 | { ISD::ZERO_EXTEND, MVT::v4i16, MVT::v4i8, 1 }, | |||
1597 | { ISD::SIGN_EXTEND, MVT::v4i16, MVT::v4i8, 6 }, | |||
1598 | { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i8, 2 }, | |||
1599 | { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i8, 3 }, | |||
1600 | { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i8, 4 }, | |||
1601 | { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 8 }, | |||
1602 | { ISD::ZERO_EXTEND, MVT::v8i16, MVT::v8i8, 1 }, | |||
1603 | { ISD::SIGN_EXTEND, MVT::v8i16, MVT::v8i8, 2 }, | |||
1604 | { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 6 }, | |||
1605 | { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 6 }, | |||
1606 | { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 3 }, | |||
1607 | { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 4 }, | |||
1608 | { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 9 }, | |||
1609 | { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 12 }, | |||
1610 | { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 1 }, | |||
1611 | { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 2 }, | |||
1612 | { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 }, | |||
1613 | { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 10 }, | |||
1614 | { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 3 }, | |||
1615 | { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 4 }, | |||
1616 | { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i16, 6 }, | |||
1617 | { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i16, 8 }, | |||
1618 | { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 3 }, | |||
1619 | { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 5 }, | |||
1620 | ||||
1621 | { ISD::TRUNCATE, MVT::v2i8, MVT::v2i16, 2 }, // PAND+PACKUSWB | |||
1622 | { ISD::TRUNCATE, MVT::v4i8, MVT::v4i16, 4 }, | |||
1623 | { ISD::TRUNCATE, MVT::v8i8, MVT::v8i16, 2 }, | |||
1624 | { ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 3 }, | |||
1625 | { ISD::TRUNCATE, MVT::v2i8, MVT::v2i32, 3 }, // PAND+3*PACKUSWB | |||
1626 | { ISD::TRUNCATE, MVT::v2i16, MVT::v2i32, 1 }, | |||
1627 | { ISD::TRUNCATE, MVT::v4i8, MVT::v4i32, 3 }, | |||
1628 | { ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 3 }, | |||
1629 | { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 4 }, | |||
1630 | { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 7 }, | |||
1631 | { ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 5 }, | |||
1632 | { ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, 10 }, | |||
1633 | { ISD::TRUNCATE, MVT::v2i8, MVT::v2i64, 4 }, // PAND+3*PACKUSWB | |||
1634 | { ISD::TRUNCATE, MVT::v2i16, MVT::v2i64, 2 }, // PSHUFD+PSHUFLW | |||
1635 | { ISD::TRUNCATE, MVT::v2i32, MVT::v2i64, 1 }, // PSHUFD | |||
1636 | }; | |||
1637 | ||||
1638 | std::pair<int, MVT> LTSrc = TLI->getTypeLegalizationCost(DL, Src); | |||
1639 | std::pair<int, MVT> LTDest = TLI->getTypeLegalizationCost(DL, Dst); | |||
1640 | ||||
1641 | if (ST->hasSSE2() && !ST->hasAVX()) { | |||
1642 | if (const auto *Entry = ConvertCostTableLookup(SSE2ConversionTbl, ISD, | |||
1643 | LTDest.second, LTSrc.second)) | |||
1644 | return LTSrc.first * Entry->Cost; | |||
1645 | } | |||
1646 | ||||
1647 | EVT SrcTy = TLI->getValueType(DL, Src); | |||
1648 | EVT DstTy = TLI->getValueType(DL, Dst); | |||
1649 | ||||
1650 | // The function getSimpleVT only handles simple value types. | |||
1651 | if (!SrcTy.isSimple() || !DstTy.isSimple()) | |||
1652 | return BaseT::getCastInstrCost(Opcode, Dst, Src); | |||
1653 | ||||
1654 | MVT SimpleSrcTy = SrcTy.getSimpleVT(); | |||
1655 | MVT SimpleDstTy = DstTy.getSimpleVT(); | |||
1656 | ||||
1657 | // Make sure that neither type is going to be split before using the | |||
1658 | // AVX512 tables. This handles -mprefer-vector-width=256 | |||
1659 | // with -min-legal-vector-width<=256 | |||
1660 | if (TLI->getTypeAction(SimpleSrcTy) != TargetLowering::TypeSplitVector && | |||
1661 | TLI->getTypeAction(SimpleDstTy) != TargetLowering::TypeSplitVector) { | |||
1662 | if (ST->hasBWI()) | |||
1663 | if (const auto *Entry = ConvertCostTableLookup(AVX512BWConversionTbl, ISD, | |||
1664 | SimpleDstTy, SimpleSrcTy)) | |||
1665 | return Entry->Cost; | |||
1666 | ||||
1667 | if (ST->hasDQI()) | |||
1668 | if (const auto *Entry = ConvertCostTableLookup(AVX512DQConversionTbl, ISD, | |||
1669 | SimpleDstTy, SimpleSrcTy)) | |||
1670 | return Entry->Cost; | |||
1671 | ||||
1672 | if (ST->hasAVX512()) | |||
1673 | if (const auto *Entry = ConvertCostTableLookup(AVX512FConversionTbl, ISD, | |||
1674 | SimpleDstTy, SimpleSrcTy)) | |||
1675 | return Entry->Cost; | |||
1676 | } | |||
1677 | ||||
1678 | if (ST->hasAVX2()) { | |||
1679 | if (const auto *Entry = ConvertCostTableLookup(AVX2ConversionTbl, ISD, | |||
1680 | SimpleDstTy, SimpleSrcTy)) | |||
1681 | return Entry->Cost; | |||
1682 | } | |||
1683 | ||||
1684 | if (ST->hasAVX()) { | |||
1685 | if (const auto *Entry = ConvertCostTableLookup(AVXConversionTbl, ISD, | |||
1686 | SimpleDstTy, SimpleSrcTy)) | |||
1687 | return Entry->Cost; | |||
1688 | } | |||
1689 | ||||
1690 | if (ST->hasSSE41()) { | |||
1691 | if (const auto *Entry = ConvertCostTableLookup(SSE41ConversionTbl, ISD, | |||
1692 | SimpleDstTy, SimpleSrcTy)) | |||
1693 | return Entry->Cost; | |||
1694 | } | |||
1695 | ||||
1696 | if (ST->hasSSE2()) { | |||
1697 | if (const auto *Entry = ConvertCostTableLookup(SSE2ConversionTbl, ISD, | |||
1698 | SimpleDstTy, SimpleSrcTy)) | |||
1699 | return Entry->Cost; | |||
1700 | } | |||
1701 | ||||
1702 | return BaseT::getCastInstrCost(Opcode, Dst, Src, I); | |||
1703 | } | |||
1704 | ||||
1705 | int X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, | |||
1706 | const Instruction *I) { | |||
1707 | // Legalize the type. | |||
1708 | std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy); | |||
1709 | ||||
1710 | MVT MTy = LT.second; | |||
1711 | ||||
1712 | int ISD = TLI->InstructionOpcodeToISD(Opcode); | |||
1713 | assert(ISD && "Invalid opcode")((ISD && "Invalid opcode") ? static_cast<void> ( 0) : __assert_fail ("ISD && \"Invalid opcode\"", "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp" , 1713, __PRETTY_FUNCTION__)); | |||
1714 | ||||
1715 | unsigned ExtraCost = 0; | |||
1716 | if (I && (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp)) { | |||
1717 | // Some vector comparison predicates cost extra instructions. | |||
1718 | if (MTy.isVector() && | |||
1719 | !((ST->hasXOP() && (!ST->hasAVX2() || MTy.is128BitVector())) || | |||
1720 | (ST->hasAVX512() && 32 <= MTy.getScalarSizeInBits()) || | |||
1721 | ST->hasBWI())) { | |||
1722 | switch (cast<CmpInst>(I)->getPredicate()) { | |||
1723 | case CmpInst::Predicate::ICMP_NE: | |||
1724 | // xor(cmpeq(x,y),-1) | |||
1725 | ExtraCost = 1; | |||
1726 | break; | |||
1727 | case CmpInst::Predicate::ICMP_SGE: | |||
1728 | case CmpInst::Predicate::ICMP_SLE: | |||
1729 | // xor(cmpgt(x,y),-1) | |||
1730 | ExtraCost = 1; | |||
1731 | break; | |||
1732 | case CmpInst::Predicate::ICMP_ULT: | |||
1733 | case CmpInst::Predicate::ICMP_UGT: | |||
1734 | // cmpgt(xor(x,signbit),xor(y,signbit)) | |||
1735 | // xor(cmpeq(pmaxu(x,y),x),-1) | |||
1736 | ExtraCost = 2; | |||
1737 | break; | |||
1738 | case CmpInst::Predicate::ICMP_ULE: | |||
1739 | case CmpInst::Predicate::ICMP_UGE: | |||
1740 | if ((ST->hasSSE41() && MTy.getScalarSizeInBits() == 32) || | |||
1741 | (ST->hasSSE2() && MTy.getScalarSizeInBits() < 32)) { | |||
1742 | // cmpeq(psubus(x,y),0) | |||
1743 | // cmpeq(pminu(x,y),x) | |||
1744 | ExtraCost = 1; | |||
1745 | } else { | |||
1746 | // xor(cmpgt(xor(x,signbit),xor(y,signbit)),-1) | |||
1747 | ExtraCost = 3; | |||
1748 | } | |||
1749 | break; | |||
1750 | default: | |||
1751 | break; | |||
1752 | } | |||
1753 | } | |||
1754 | } | |||
1755 | ||||
1756 | static const CostTblEntry SLMCostTbl[] = { | |||
1757 | // slm pcmpeq/pcmpgt throughput is 2 | |||
1758 | { ISD::SETCC, MVT::v2i64, 2 }, | |||
1759 | }; | |||
1760 | ||||
1761 | static const CostTblEntry AVX512BWCostTbl[] = { | |||
1762 | { ISD::SETCC, MVT::v32i16, 1 }, | |||
1763 | { ISD::SETCC, MVT::v64i8, 1 }, | |||
1764 | ||||
1765 | { ISD::SELECT, MVT::v32i16, 1 }, | |||
1766 | { ISD::SELECT, MVT::v64i8, 1 }, | |||
1767 | }; | |||
1768 | ||||
1769 | static const CostTblEntry AVX512CostTbl[] = { | |||
1770 | { ISD::SETCC, MVT::v8i64, 1 }, | |||
1771 | { ISD::SETCC, MVT::v16i32, 1 }, | |||
1772 | { ISD::SETCC, MVT::v8f64, 1 }, | |||
1773 | { ISD::SETCC, MVT::v16f32, 1 }, | |||
1774 | ||||
1775 | { ISD::SELECT, MVT::v8i64, 1 }, | |||
1776 | { ISD::SELECT, MVT::v16i32, 1 }, | |||
1777 | { ISD::SELECT, MVT::v8f64, 1 }, | |||
1778 | { ISD::SELECT, MVT::v16f32, 1 }, | |||
1779 | }; | |||
1780 | ||||
1781 | static const CostTblEntry AVX2CostTbl[] = { | |||
1782 | { ISD::SETCC, MVT::v4i64, 1 }, | |||
1783 | { ISD::SETCC, MVT::v8i32, 1 }, | |||
1784 | { ISD::SETCC, MVT::v16i16, 1 }, | |||
1785 | { ISD::SETCC, MVT::v32i8, 1 }, | |||
1786 | ||||
1787 | { ISD::SELECT, MVT::v4i64, 1 }, // pblendvb | |||
1788 | { ISD::SELECT, MVT::v8i32, 1 }, // pblendvb | |||
1789 | { ISD::SELECT, MVT::v16i16, 1 }, // pblendvb | |||
1790 | { ISD::SELECT, MVT::v32i8, 1 }, // pblendvb | |||
1791 | }; | |||
1792 | ||||
1793 | static const CostTblEntry AVX1CostTbl[] = { | |||
1794 | { ISD::SETCC, MVT::v4f64, 1 }, | |||
1795 | { ISD::SETCC, MVT::v8f32, 1 }, | |||
1796 | // AVX1 does not support 8-wide integer compare. | |||
1797 | { ISD::SETCC, MVT::v4i64, 4 }, | |||
1798 | { ISD::SETCC, MVT::v8i32, 4 }, | |||
1799 | { ISD::SETCC, MVT::v16i16, 4 }, | |||
1800 | { ISD::SETCC, MVT::v32i8, 4 }, | |||
1801 | ||||
1802 | { ISD::SELECT, MVT::v4f64, 1 }, // vblendvpd | |||
1803 | { ISD::SELECT, MVT::v8f32, 1 }, // vblendvps | |||
1804 | { ISD::SELECT, MVT::v4i64, 1 }, // vblendvpd | |||
1805 | { ISD::SELECT, MVT::v8i32, 1 }, // vblendvps | |||
1806 | { ISD::SELECT, MVT::v16i16, 3 }, // vandps + vandnps + vorps | |||
1807 | { ISD::SELECT, MVT::v32i8, 3 }, // vandps + vandnps + vorps | |||
1808 | }; | |||
1809 | ||||
1810 | static const CostTblEntry SSE42CostTbl[] = { | |||
1811 | { ISD::SETCC, MVT::v2f64, 1 }, | |||
1812 | { ISD::SETCC, MVT::v4f32, 1 }, | |||
1813 | { ISD::SETCC, MVT::v2i64, 1 }, | |||
1814 | }; | |||
1815 | ||||
1816 | static const CostTblEntry SSE41CostTbl[] = { | |||
1817 | { ISD::SELECT, MVT::v2f64, 1 }, // blendvpd | |||
1818 | { ISD::SELECT, MVT::v4f32, 1 }, // blendvps | |||
1819 | { ISD::SELECT, MVT::v2i64, 1 }, // pblendvb | |||
1820 | { ISD::SELECT, MVT::v4i32, 1 }, // pblendvb | |||
1821 | { ISD::SELECT, MVT::v8i16, 1 }, // pblendvb | |||
1822 | { ISD::SELECT, MVT::v16i8, 1 }, // pblendvb | |||
1823 | }; | |||
1824 | ||||
1825 | static const CostTblEntry SSE2CostTbl[] = { | |||
1826 | { ISD::SETCC, MVT::v2f64, 2 }, | |||
1827 | { ISD::SETCC, MVT::f64, 1 }, | |||
1828 | { ISD::SETCC, MVT::v2i64, 8 }, | |||
1829 | { ISD::SETCC, MVT::v4i32, 1 }, | |||
1830 | { ISD::SETCC, MVT::v8i16, 1 }, | |||
1831 | { ISD::SETCC, MVT::v16i8, 1 }, | |||
1832 | ||||
1833 | { ISD::SELECT, MVT::v2f64, 3 }, // andpd + andnpd + orpd | |||
1834 | { ISD::SELECT, MVT::v2i64, 3 }, // pand + pandn + por | |||
1835 | { ISD::SELECT, MVT::v4i32, 3 }, // pand + pandn + por | |||
1836 | { ISD::SELECT, MVT::v8i16, 3 }, // pand + pandn + por | |||
1837 | { ISD::SELECT, MVT::v16i8, 3 }, // pand + pandn + por | |||
1838 | }; | |||
1839 | ||||
1840 | static const CostTblEntry SSE1CostTbl[] = { | |||
1841 | { ISD::SETCC, MVT::v4f32, 2 }, | |||
1842 | { ISD::SETCC, MVT::f32, 1 }, | |||
1843 | ||||
1844 | { ISD::SELECT, MVT::v4f32, 3 }, // andps + andnps + orps | |||
1845 | }; | |||
1846 | ||||
1847 | if (ST->isSLM()) | |||
1848 | if (const auto *Entry = CostTableLookup(SLMCostTbl, ISD, MTy)) | |||
1849 | return LT.first * (ExtraCost + Entry->Cost); | |||
1850 | ||||
1851 | if (ST->hasBWI()) | |||
1852 | if (const auto *Entry = CostTableLookup(AVX512BWCostTbl, ISD, MTy)) | |||
1853 | return LT.first * (ExtraCost + Entry->Cost); | |||
1854 | ||||
1855 | if (ST->hasAVX512()) | |||
1856 | if (const auto *Entry = CostTableLookup(AVX512CostTbl, ISD, MTy)) | |||
1857 | return LT.first * (ExtraCost + Entry->Cost); | |||
1858 | ||||
1859 | if (ST->hasAVX2()) | |||
1860 | if (const auto *Entry = CostTableLookup(AVX2CostTbl, ISD, MTy)) | |||
1861 | return LT.first * (ExtraCost + Entry->Cost); | |||
1862 | ||||
1863 | if (ST->hasAVX()) | |||
1864 | if (const auto *Entry = CostTableLookup(AVX1CostTbl, ISD, MTy)) | |||
1865 | return LT.first * (ExtraCost + Entry->Cost); | |||
1866 | ||||
1867 | if (ST->hasSSE42()) | |||
1868 | if (const auto *Entry = CostTableLookup(SSE42CostTbl, ISD, MTy)) | |||
1869 | return LT.first * (ExtraCost + Entry->Cost); | |||
1870 | ||||
1871 | if (ST->hasSSE41()) | |||
1872 | if (const auto *Entry = CostTableLookup(SSE41CostTbl, ISD, MTy)) | |||
1873 | return LT.first * (ExtraCost + Entry->Cost); | |||
1874 | ||||
1875 | if (ST->hasSSE2()) | |||
1876 | if (const auto *Entry = CostTableLookup(SSE2CostTbl, ISD, MTy)) | |||
1877 | return LT.first * (ExtraCost + Entry->Cost); | |||
1878 | ||||
1879 | if (ST->hasSSE1()) | |||
1880 | if (const auto *Entry = CostTableLookup(SSE1CostTbl, ISD, MTy)) | |||
1881 | return LT.first * (ExtraCost + Entry->Cost); | |||
1882 | ||||
1883 | return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I); | |||
1884 | } | |||
1885 | ||||
1886 | unsigned X86TTIImpl::getAtomicMemIntrinsicMaxElementSize() const { return 16; } | |||
1887 | ||||
1888 | int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, | |||
1889 | ArrayRef<Type *> Tys, FastMathFlags FMF, | |||
1890 | unsigned ScalarizationCostPassed) { | |||
1891 | // Costs should match the codegen from: | |||
1892 | // BITREVERSE: llvm\test\CodeGen\X86\vector-bitreverse.ll | |||
1893 | // BSWAP: llvm\test\CodeGen\X86\bswap-vector.ll | |||
1894 | // CTLZ: llvm\test\CodeGen\X86\vector-lzcnt-*.ll | |||
1895 | // CTPOP: llvm\test\CodeGen\X86\vector-popcnt-*.ll | |||
1896 | // CTTZ: llvm\test\CodeGen\X86\vector-tzcnt-*.ll | |||
1897 | static const CostTblEntry AVX512CDCostTbl[] = { | |||
1898 | { ISD::CTLZ, MVT::v8i64, 1 }, | |||
1899 | { ISD::CTLZ, MVT::v16i32, 1 }, | |||
1900 | { ISD::CTLZ, MVT::v32i16, 8 }, | |||
1901 | { ISD::CTLZ, MVT::v64i8, 20 }, | |||
1902 | { ISD::CTLZ, MVT::v4i64, 1 }, | |||
1903 | { ISD::CTLZ, MVT::v8i32, 1 }, | |||
1904 | { ISD::CTLZ, MVT::v16i16, 4 }, | |||
1905 | { ISD::CTLZ, MVT::v32i8, 10 }, | |||
1906 | { ISD::CTLZ, MVT::v2i64, 1 }, | |||
1907 | { ISD::CTLZ, MVT::v4i32, 1 }, | |||
1908 | { ISD::CTLZ, MVT::v8i16, 4 }, | |||
1909 | { ISD::CTLZ, MVT::v16i8, 4 }, | |||
1910 | }; | |||
1911 | static const CostTblEntry AVX512BWCostTbl[] = { | |||
1912 | { ISD::BITREVERSE, MVT::v8i64, 5 }, | |||
1913 | { ISD::BITREVERSE, MVT::v16i32, 5 }, | |||
1914 | { ISD::BITREVERSE, MVT::v32i16, 5 }, | |||
1915 | { ISD::BITREVERSE, MVT::v64i8, 5 }, | |||
1916 | { ISD::CTLZ, MVT::v8i64, 23 }, | |||
1917 | { ISD::CTLZ, MVT::v16i32, 22 }, | |||
1918 | { ISD::CTLZ, MVT::v32i16, 18 }, | |||
1919 | { ISD::CTLZ, MVT::v64i8, 17 }, | |||
1920 | { ISD::CTPOP, MVT::v8i64, 7 }, | |||
1921 | { ISD::CTPOP, MVT::v16i32, 11 }, | |||
1922 | { ISD::CTPOP, MVT::v32i16, 9 }, | |||
1923 | { ISD::CTPOP, MVT::v64i8, 6 }, | |||
1924 | { ISD::CTTZ, MVT::v8i64, 10 }, | |||
1925 | { ISD::CTTZ, MVT::v16i32, 14 }, | |||
1926 | { ISD::CTTZ, MVT::v32i16, 12 }, | |||
1927 | { ISD::CTTZ, MVT::v64i8, 9 }, | |||
1928 | { ISD::SADDSAT, MVT::v32i16, 1 }, | |||
1929 | { ISD::SADDSAT, MVT::v64i8, 1 }, | |||
1930 | { ISD::SSUBSAT, MVT::v32i16, 1 }, | |||
1931 | { ISD::SSUBSAT, MVT::v64i8, 1 }, | |||
1932 | { ISD::UADDSAT, MVT::v32i16, 1 }, | |||
1933 | { ISD::UADDSAT, MVT::v64i8, 1 }, | |||
1934 | { ISD::USUBSAT, MVT::v32i16, 1 }, | |||
1935 | { ISD::USUBSAT, MVT::v64i8, 1 }, | |||
1936 | }; | |||
1937 | static const CostTblEntry AVX512CostTbl[] = { | |||
1938 | { ISD::BITREVERSE, MVT::v8i64, 36 }, | |||
1939 | { ISD::BITREVERSE, MVT::v16i32, 24 }, | |||
1940 | { ISD::CTLZ, MVT::v8i64, 29 }, | |||
1941 | { ISD::CTLZ, MVT::v16i32, 35 }, | |||
1942 | { ISD::CTPOP, MVT::v8i64, 16 }, | |||
1943 | { ISD::CTPOP, MVT::v16i32, 24 }, | |||
1944 | { ISD::CTTZ, MVT::v8i64, 20 }, | |||
1945 | { ISD::CTTZ, MVT::v16i32, 28 }, | |||
1946 | { ISD::USUBSAT, MVT::v16i32, 2 }, // pmaxud + psubd | |||
1947 | { ISD::USUBSAT, MVT::v2i64, 2 }, // pmaxuq + psubq | |||
1948 | { ISD::USUBSAT, MVT::v4i64, 2 }, // pmaxuq + psubq | |||
1949 | { ISD::USUBSAT, MVT::v8i64, 2 }, // pmaxuq + psubq | |||
1950 | { ISD::UADDSAT, MVT::v16i32, 3 }, // not + pminud + paddd | |||
1951 | { ISD::UADDSAT, MVT::v2i64, 3 }, // not + pminuq + paddq | |||
1952 | { ISD::UADDSAT, MVT::v4i64, 3 }, // not + pminuq + paddq | |||
1953 | { ISD::UADDSAT, MVT::v8i64, 3 }, // not + pminuq + paddq | |||
1954 | }; | |||
1955 | static const CostTblEntry XOPCostTbl[] = { | |||
1956 | { ISD::BITREVERSE, MVT::v4i64, 4 }, | |||
1957 | { ISD::BITREVERSE, MVT::v8i32, 4 }, | |||
1958 | { ISD::BITREVERSE, MVT::v16i16, 4 }, | |||
1959 | { ISD::BITREVERSE, MVT::v32i8, 4 }, | |||
1960 | { ISD::BITREVERSE, MVT::v2i64, 1 }, | |||
1961 | { ISD::BITREVERSE, MVT::v4i32, 1 }, | |||
1962 | { ISD::BITREVERSE, MVT::v8i16, 1 }, | |||
1963 | { ISD::BITREVERSE, MVT::v16i8, 1 }, | |||
1964 | { ISD::BITREVERSE, MVT::i64, 3 }, | |||
1965 | { ISD::BITREVERSE, MVT::i32, 3 }, | |||
1966 | { ISD::BITREVERSE, MVT::i16, 3 }, | |||
1967 | { ISD::BITREVERSE, MVT::i8, 3 } | |||
1968 | }; | |||
1969 | static const CostTblEntry AVX2CostTbl[] = { | |||
1970 | { ISD::BITREVERSE, MVT::v4i64, 5 }, | |||
1971 | { ISD::BITREVERSE, MVT::v8i32, 5 }, | |||
1972 | { ISD::BITREVERSE, MVT::v16i16, 5 }, | |||
1973 | { ISD::BITREVERSE, MVT::v32i8, 5 }, | |||
1974 | { ISD::BSWAP, MVT::v4i64, 1 }, | |||
1975 | { ISD::BSWAP, MVT::v8i32, 1 }, | |||
1976 | { ISD::BSWAP, MVT::v16i16, 1 }, | |||
1977 | { ISD::CTLZ, MVT::v4i64, 23 }, | |||
1978 | { ISD::CTLZ, MVT::v8i32, 18 }, | |||
1979 | { ISD::CTLZ, MVT::v16i16, 14 }, | |||
1980 | { ISD::CTLZ, MVT::v32i8, 9 }, | |||
1981 | { ISD::CTPOP, MVT::v4i64, 7 }, | |||
1982 | { ISD::CTPOP, MVT::v8i32, 11 }, | |||
1983 | { ISD::CTPOP, MVT::v16i16, 9 }, | |||
1984 | { ISD::CTPOP, MVT::v32i8, 6 }, | |||
1985 | { ISD::CTTZ, MVT::v4i64, 10 }, | |||
1986 | { ISD::CTTZ, MVT::v8i32, 14 }, | |||
1987 | { ISD::CTTZ, MVT::v16i16, 12 }, | |||
1988 | { ISD::CTTZ, MVT::v32i8, 9 }, | |||
1989 | { ISD::SADDSAT, MVT::v16i16, 1 }, | |||
1990 | { ISD::SADDSAT, MVT::v32i8, 1 }, | |||
1991 | { ISD::SSUBSAT, MVT::v16i16, 1 }, | |||
1992 | { ISD::SSUBSAT, MVT::v32i8, 1 }, | |||
1993 | { ISD::UADDSAT, MVT::v16i16, 1 }, | |||
1994 | { ISD::UADDSAT, MVT::v32i8, 1 }, | |||
1995 | { ISD::UADDSAT, MVT::v8i32, 3 }, // not + pminud + paddd | |||
1996 | { ISD::USUBSAT, MVT::v16i16, 1 }, | |||
1997 | { ISD::USUBSAT, MVT::v32i8, 1 }, | |||
1998 | { ISD::USUBSAT, MVT::v8i32, 2 }, // pmaxud + psubd | |||
1999 | { ISD::FSQRT, MVT::f32, 7 }, // Haswell from http://www.agner.org/ | |||
2000 | { ISD::FSQRT, MVT::v4f32, 7 }, // Haswell from http://www.agner.org/ | |||
2001 | { ISD::FSQRT, MVT::v8f32, 14 }, // Haswell from http://www.agner.org/ | |||
2002 | { ISD::FSQRT, MVT::f64, 14 }, // Haswell from http://www.agner.org/ | |||
2003 | { ISD::FSQRT, MVT::v2f64, 14 }, // Haswell from http://www.agner.org/ | |||
2004 | { ISD::FSQRT, MVT::v4f64, 28 }, // Haswell from http://www.agner.org/ | |||
2005 | }; | |||
2006 | static const CostTblEntry AVX1CostTbl[] = { | |||
2007 | { ISD::BITREVERSE, MVT::v4i64, 12 }, // 2 x 128-bit Op + extract/insert | |||
2008 | { ISD::BITREVERSE, MVT::v8i32, 12 }, // 2 x 128-bit Op + extract/insert | |||
2009 | { ISD::BITREVERSE, MVT::v16i16, 12 }, // 2 x 128-bit Op + extract/insert | |||
2010 | { ISD::BITREVERSE, MVT::v32i8, 12 }, // 2 x 128-bit Op + extract/insert | |||
2011 | { ISD::BSWAP, MVT::v4i64, 4 }, | |||
2012 | { ISD::BSWAP, MVT::v8i32, 4 }, | |||
2013 | { ISD::BSWAP, MVT::v16i16, 4 }, | |||
2014 | { ISD::CTLZ, MVT::v4i64, 48 }, // 2 x 128-bit Op + extract/insert | |||
2015 | { ISD::CTLZ, MVT::v8i32, 38 }, // 2 x 128-bit Op + extract/insert | |||
2016 | { ISD::CTLZ, MVT::v16i16, 30 }, // 2 x 128-bit Op + extract/insert | |||
2017 | { ISD::CTLZ, MVT::v32i8, 20 }, // 2 x 128-bit Op + extract/insert | |||
2018 | { ISD::CTPOP, MVT::v4i64, 16 }, // 2 x 128-bit Op + extract/insert | |||
2019 | { ISD::CTPOP, MVT::v8i32, 24 }, // 2 x 128-bit Op + extract/insert | |||
2020 | { ISD::CTPOP, MVT::v16i16, 20 }, // 2 x 128-bit Op + extract/insert | |||
2021 | { ISD::CTPOP, MVT::v32i8, 14 }, // 2 x 128-bit Op + extract/insert | |||
2022 | { ISD::CTTZ, MVT::v4i64, 22 }, // 2 x 128-bit Op + extract/insert | |||
2023 | { ISD::CTTZ, MVT::v8i32, 30 }, // 2 x 128-bit Op + extract/insert | |||
2024 | { ISD::CTTZ, MVT::v16i16, 26 }, // 2 x 128-bit Op + extract/insert | |||
2025 | { ISD::CTTZ, MVT::v32i8, 20 }, // 2 x 128-bit Op + extract/insert | |||
2026 | { ISD::SADDSAT, MVT::v16i16, 4 }, // 2 x 128-bit Op + extract/insert | |||
2027 | { ISD::SADDSAT, MVT::v32i8, 4 }, // 2 x 128-bit Op + extract/insert | |||
2028 | { ISD::SSUBSAT, MVT::v16i16, 4 }, // 2 x 128-bit Op + extract/insert | |||
2029 | { ISD::SSUBSAT, MVT::v32i8, 4 }, // 2 x 128-bit Op + extract/insert | |||
2030 | { ISD::UADDSAT, MVT::v16i16, 4 }, // 2 x 128-bit Op + extract/insert | |||
2031 | { ISD::UADDSAT, MVT::v32i8, 4 }, // 2 x 128-bit Op + extract/insert | |||
2032 | { ISD::UADDSAT, MVT::v8i32, 8 }, // 2 x 128-bit Op + extract/insert | |||
2033 | { ISD::USUBSAT, MVT::v16i16, 4 }, // 2 x 128-bit Op + extract/insert | |||
2034 | { ISD::USUBSAT, MVT::v32i8, 4 }, // 2 x 128-bit Op + extract/insert | |||
2035 | { ISD::USUBSAT, MVT::v8i32, 6 }, // 2 x 128-bit Op + extract/insert | |||
2036 | { ISD::FSQRT, MVT::f32, 14 }, // SNB from http://www.agner.org/ | |||
2037 | { ISD::FSQRT, MVT::v4f32, 14 }, // SNB from http://www.agner.org/ | |||
2038 | { ISD::FSQRT, MVT::v8f32, 28 }, // SNB from http://www.agner.org/ | |||
2039 | { ISD::FSQRT, MVT::f64, 21 }, // SNB from http://www.agner.org/ | |||
2040 | { ISD::FSQRT, MVT::v2f64, 21 }, // SNB from http://www.agner.org/ | |||
2041 | { ISD::FSQRT, MVT::v4f64, 43 }, // SNB from http://www.agner.org/ | |||
2042 | }; | |||
2043 | static const CostTblEntry GLMCostTbl[] = { | |||
2044 | { ISD::FSQRT, MVT::f32, 19 }, // sqrtss | |||
2045 | { ISD::FSQRT, MVT::v4f32, 37 }, // sqrtps | |||
2046 | { ISD::FSQRT, MVT::f64, 34 }, // sqrtsd | |||
2047 | { ISD::FSQRT, MVT::v2f64, 67 }, // sqrtpd | |||
2048 | }; | |||
2049 | static const CostTblEntry SLMCostTbl[] = { | |||
2050 | { ISD::FSQRT, MVT::f32, 20 }, // sqrtss | |||
2051 | { ISD::FSQRT, MVT::v4f32, 40 }, // sqrtps | |||
2052 | { ISD::FSQRT, MVT::f64, 35 }, // sqrtsd | |||
2053 | { ISD::FSQRT, MVT::v2f64, 70 }, // sqrtpd | |||
2054 | }; | |||
2055 | static const CostTblEntry SSE42CostTbl[] = { | |||
2056 | { ISD::USUBSAT, MVT::v4i32, 2 }, // pmaxud + psubd | |||
2057 | { ISD::UADDSAT, MVT::v4i32, 3 }, // not + pminud + paddd | |||
2058 | { ISD::FSQRT, MVT::f32, 18 }, // Nehalem from http://www.agner.org/ | |||
2059 | { ISD::FSQRT, MVT::v4f32, 18 }, // Nehalem from http://www.agner.org/ | |||
2060 | }; | |||
2061 | static const CostTblEntry SSSE3CostTbl[] = { | |||
2062 | { ISD::BITREVERSE, MVT::v2i64, 5 }, | |||
2063 | { ISD::BITREVERSE, MVT::v4i32, 5 }, | |||
2064 | { ISD::BITREVERSE, MVT::v8i16, 5 }, | |||
2065 | { ISD::BITREVERSE, MVT::v16i8, 5 }, | |||
2066 | { ISD::BSWAP, MVT::v2i64, 1 }, | |||
2067 | { ISD::BSWAP, MVT::v4i32, 1 }, | |||
2068 | { ISD::BSWAP, MVT::v8i16, 1 }, | |||
2069 | { ISD::CTLZ, MVT::v2i64, 23 }, | |||
2070 | { ISD::CTLZ, MVT::v4i32, 18 }, | |||
2071 | { ISD::CTLZ, MVT::v8i16, 14 }, | |||
2072 | { ISD::CTLZ, MVT::v16i8, 9 }, | |||
2073 | { ISD::CTPOP, MVT::v2i64, 7 }, | |||
2074 | { ISD::CTPOP, MVT::v4i32, 11 }, | |||
2075 | { ISD::CTPOP, MVT::v8i16, 9 }, | |||
2076 | { ISD::CTPOP, MVT::v16i8, 6 }, | |||
2077 | { ISD::CTTZ, MVT::v2i64, 10 }, | |||
2078 | { ISD::CTTZ, MVT::v4i32, 14 }, | |||
2079 | { ISD::CTTZ, MVT::v8i16, 12 }, | |||
2080 | { ISD::CTTZ, MVT::v16i8, 9 } | |||
2081 | }; | |||
2082 | static const CostTblEntry SSE2CostTbl[] = { | |||
2083 | { ISD::BITREVERSE, MVT::v2i64, 29 }, | |||
2084 | { ISD::BITREVERSE, MVT::v4i32, 27 }, | |||
2085 | { ISD::BITREVERSE, MVT::v8i16, 27 }, | |||
2086 | { ISD::BITREVERSE, MVT::v16i8, 20 }, | |||
2087 | { ISD::BSWAP, MVT::v2i64, 7 }, | |||
2088 | { ISD::BSWAP, MVT::v4i32, 7 }, | |||
2089 | { ISD::BSWAP, MVT::v8i16, 7 }, | |||
2090 | { ISD::CTLZ, MVT::v2i64, 25 }, | |||
2091 | { ISD::CTLZ, MVT::v4i32, 26 }, | |||
2092 | { ISD::CTLZ, MVT::v8i16, 20 }, | |||
2093 | { ISD::CTLZ, MVT::v16i8, 17 }, | |||
2094 | { ISD::CTPOP, MVT::v2i64, 12 }, | |||
2095 | { ISD::CTPOP, MVT::v4i32, 15 }, | |||
2096 | { ISD::CTPOP, MVT::v8i16, 13 }, | |||
2097 | { ISD::CTPOP, MVT::v16i8, 10 }, | |||
2098 | { ISD::CTTZ, MVT::v2i64, 14 }, | |||
2099 | { ISD::CTTZ, MVT::v4i32, 18 }, | |||
2100 | { ISD::CTTZ, MVT::v8i16, 16 }, | |||
2101 | { ISD::CTTZ, MVT::v16i8, 13 }, | |||
2102 | { ISD::SADDSAT, MVT::v8i16, 1 }, | |||
2103 | { ISD::SADDSAT, MVT::v16i8, 1 }, | |||
2104 | { ISD::SSUBSAT, MVT::v8i16, 1 }, | |||
2105 | { ISD::SSUBSAT, MVT::v16i8, 1 }, | |||
2106 | { ISD::UADDSAT, MVT::v8i16, 1 }, | |||
2107 | { ISD::UADDSAT, MVT::v16i8, 1 }, | |||
2108 | { ISD::USUBSAT, MVT::v8i16, 1 }, | |||
2109 | { ISD::USUBSAT, MVT::v16i8, 1 }, | |||
2110 | { ISD::FSQRT, MVT::f64, 32 }, // Nehalem from http://www.agner.org/ | |||
2111 | { ISD::FSQRT, MVT::v2f64, 32 }, // Nehalem from http://www.agner.org/ | |||
2112 | }; | |||
2113 | static const CostTblEntry SSE1CostTbl[] = { | |||
2114 | { ISD::FSQRT, MVT::f32, 28 }, // Pentium III from http://www.agner.org/ | |||
2115 | { ISD::FSQRT, MVT::v4f32, 56 }, // Pentium III from http://www.agner.org/ | |||
2116 | }; | |||
2117 | static const CostTblEntry LZCNT64CostTbl[] = { // 64-bit targets | |||
2118 | { ISD::CTLZ, MVT::i64, 1 }, | |||
2119 | }; | |||
2120 | static const CostTblEntry LZCNT32CostTbl[] = { // 32 or 64-bit targets | |||
2121 | { ISD::CTLZ, MVT::i32, 1 }, | |||
2122 | { ISD::CTLZ, MVT::i16, 1 }, | |||
2123 | { ISD::CTLZ, MVT::i8, 1 }, | |||
2124 | }; | |||
2125 | static const CostTblEntry POPCNT64CostTbl[] = { // 64-bit targets | |||
2126 | { ISD::CTPOP, MVT::i64, 1 }, | |||
2127 | }; | |||
2128 | static const CostTblEntry POPCNT32CostTbl[] = { // 32 or 64-bit targets | |||
2129 | { ISD::CTPOP, MVT::i32, 1 }, | |||
2130 | { ISD::CTPOP, MVT::i16, 1 }, | |||
2131 | { ISD::CTPOP, MVT::i8, 1 }, | |||
2132 | }; | |||
2133 | static const CostTblEntry X64CostTbl[] = { // 64-bit targets | |||
2134 | { ISD::BITREVERSE, MVT::i64, 14 }, | |||
2135 | { ISD::CTLZ, MVT::i64, 4 }, // BSR+XOR or BSR+XOR+CMOV | |||
2136 | { ISD::CTPOP, MVT::i64, 10 }, | |||
2137 | { ISD::SADDO, MVT::i64, 1 }, | |||
2138 | { ISD::UADDO, MVT::i64, 1 }, | |||
2139 | }; | |||
2140 | static const CostTblEntry X86CostTbl[] = { // 32 or 64-bit targets | |||
2141 | { ISD::BITREVERSE, MVT::i32, 14 }, | |||
2142 | { ISD::BITREVERSE, MVT::i16, 14 }, | |||
2143 | { ISD::BITREVERSE, MVT::i8, 11 }, | |||
2144 | { ISD::CTLZ, MVT::i32, 4 }, // BSR+XOR or BSR+XOR+CMOV | |||
2145 | { ISD::CTLZ, MVT::i16, 4 }, // BSR+XOR or BSR+XOR+CMOV | |||
2146 | { ISD::CTLZ, MVT::i8, 4 }, // BSR+XOR or BSR+XOR+CMOV | |||
2147 | { ISD::CTPOP, MVT::i32, 8 }, | |||
2148 | { ISD::CTPOP, MVT::i16, 9 }, | |||
2149 | { ISD::CTPOP, MVT::i8, 7 }, | |||
2150 | { ISD::SADDO, MVT::i32, 1 }, | |||
2151 | { ISD::SADDO, MVT::i16, 1 }, | |||
2152 | { ISD::SADDO, MVT::i8, 1 }, | |||
2153 | { ISD::UADDO, MVT::i32, 1 }, | |||
2154 | { ISD::UADDO, MVT::i16, 1 }, | |||
2155 | { ISD::UADDO, MVT::i8, 1 }, | |||
2156 | }; | |||
2157 | ||||
2158 | Type *OpTy = RetTy; | |||
2159 | unsigned ISD = ISD::DELETED_NODE; | |||
2160 | switch (IID) { | |||
2161 | default: | |||
2162 | break; | |||
2163 | case Intrinsic::bitreverse: | |||
2164 | ISD = ISD::BITREVERSE; | |||
2165 | break; | |||
2166 | case Intrinsic::bswap: | |||
2167 | ISD = ISD::BSWAP; | |||
2168 | break; | |||
2169 | case Intrinsic::ctlz: | |||
2170 | ISD = ISD::CTLZ; | |||
2171 | break; | |||
2172 | case Intrinsic::ctpop: | |||
2173 | ISD = ISD::CTPOP; | |||
2174 | break; | |||
2175 | case Intrinsic::cttz: | |||
2176 | ISD = ISD::CTTZ; | |||
2177 | break; | |||
2178 | case Intrinsic::sadd_sat: | |||
2179 | ISD = ISD::SADDSAT; | |||
2180 | break; | |||
2181 | case Intrinsic::ssub_sat: | |||
2182 | ISD = ISD::SSUBSAT; | |||
2183 | break; | |||
2184 | case Intrinsic::uadd_sat: | |||
2185 | ISD = ISD::UADDSAT; | |||
2186 | break; | |||
2187 | case Intrinsic::usub_sat: | |||
2188 | ISD = ISD::USUBSAT; | |||
2189 | break; | |||
2190 | case Intrinsic::sqrt: | |||
2191 | ISD = ISD::FSQRT; | |||
2192 | break; | |||
2193 | case Intrinsic::sadd_with_overflow: | |||
2194 | case Intrinsic::ssub_with_overflow: | |||
2195 | // SSUBO has same costs so don't duplicate. | |||
2196 | ISD = ISD::SADDO; | |||
2197 | OpTy = RetTy->getContainedType(0); | |||
2198 | break; | |||
2199 | case Intrinsic::uadd_with_overflow: | |||
2200 | case Intrinsic::usub_with_overflow: | |||
2201 | // USUBO has same costs so don't duplicate. | |||
2202 | ISD = ISD::UADDO; | |||
2203 | OpTy = RetTy->getContainedType(0); | |||
2204 | break; | |||
2205 | } | |||
2206 | ||||
2207 | if (ISD != ISD::DELETED_NODE) { | |||
2208 | // Legalize the type. | |||
2209 | std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, OpTy); | |||
2210 | MVT MTy = LT.second; | |||
2211 | ||||
2212 | // Attempt to lookup cost. | |||
2213 | if (ST->useGLMDivSqrtCosts()) | |||
2214 | if (const auto *Entry = CostTableLookup(GLMCostTbl, ISD, MTy)) | |||
2215 | return LT.first * Entry->Cost; | |||
2216 | ||||
2217 | if (ST->isSLM()) | |||
2218 | if (const auto *Entry = CostTableLookup(SLMCostTbl, ISD, MTy)) | |||
2219 | return LT.first * Entry->Cost; | |||
2220 | ||||
2221 | if (ST->hasCDI()) | |||
2222 | if (const auto *Entry = CostTableLookup(AVX512CDCostTbl, ISD, MTy)) | |||
2223 | return LT.first * Entry->Cost; | |||
2224 | ||||
2225 | if (ST->hasBWI()) | |||
2226 | if (const auto *Entry = CostTableLookup(AVX512BWCostTbl, ISD, MTy)) | |||
2227 | return LT.first * Entry->Cost; | |||
2228 | ||||
2229 | if (ST->hasAVX512()) | |||
2230 | if (const auto *Entry = CostTableLookup(AVX512CostTbl, ISD, MTy)) | |||
2231 | return LT.first * Entry->Cost; | |||
2232 | ||||
2233 | if (ST->hasXOP()) | |||
2234 | if (const auto *Entry = CostTableLookup(XOPCostTbl, ISD, MTy)) | |||
2235 | return LT.first * Entry->Cost; | |||
2236 | ||||
2237 | if (ST->hasAVX2()) | |||
2238 | if (const auto *Entry = CostTableLookup(AVX2CostTbl, ISD, MTy)) | |||
2239 | return LT.first * Entry->Cost; | |||
2240 | ||||
2241 | if (ST->hasAVX()) | |||
2242 | if (const auto *Entry = CostTableLookup(AVX1CostTbl, ISD, MTy)) | |||
2243 | return LT.first * Entry->Cost; | |||
2244 | ||||
2245 | if (ST->hasSSE42()) | |||
2246 | if (const auto *Entry = CostTableLookup(SSE42CostTbl, ISD, MTy)) | |||
2247 | return LT.first * Entry->Cost; | |||
2248 | ||||
2249 | if (ST->hasSSSE3()) | |||
2250 | if (const auto *Entry = CostTableLookup(SSSE3CostTbl, ISD, MTy)) | |||
2251 | return LT.first * Entry->Cost; | |||
2252 | ||||
2253 | if (ST->hasSSE2()) | |||
2254 | if (const auto *Entry = CostTableLookup(SSE2CostTbl, ISD, MTy)) | |||
2255 | return LT.first * Entry->Cost; | |||
2256 | ||||
2257 | if (ST->hasSSE1()) | |||
2258 | if (const auto *Entry = CostTableLookup(SSE1CostTbl, ISD, MTy)) | |||
2259 | return LT.first * Entry->Cost; | |||
2260 | ||||
2261 | if (ST->hasLZCNT()) { | |||
2262 | if (ST->is64Bit()) | |||
2263 | if (const auto *Entry = CostTableLookup(LZCNT64CostTbl, ISD, MTy)) | |||
2264 | return LT.first * Entry->Cost; | |||
2265 | ||||
2266 | if (const auto *Entry = CostTableLookup(LZCNT32CostTbl, ISD, MTy)) | |||
2267 | return LT.first * Entry->Cost; | |||
2268 | } | |||
2269 | ||||
2270 | if (ST->hasPOPCNT()) { | |||
2271 | if (ST->is64Bit()) | |||
2272 | if (const auto *Entry = CostTableLookup(POPCNT64CostTbl, ISD, MTy)) | |||
2273 | return LT.first * Entry->Cost; | |||
2274 | ||||
2275 | if (const auto *Entry = CostTableLookup(POPCNT32CostTbl, ISD, MTy)) | |||
2276 | return LT.first * Entry->Cost; | |||
2277 | } | |||
2278 | ||||
2279 | // TODO - add BMI (TZCNT) scalar handling | |||
2280 | ||||
2281 | if (ST->is64Bit()) | |||
2282 | if (const auto *Entry = CostTableLookup(X64CostTbl, ISD, MTy)) | |||
2283 | return LT.first * Entry->Cost; | |||
2284 | ||||
2285 | if (const auto *Entry = CostTableLookup(X86CostTbl, ISD, MTy)) | |||
2286 | return LT.first * Entry->Cost; | |||
2287 | } | |||
2288 | ||||
2289 | return BaseT::getIntrinsicInstrCost(IID, RetTy, Tys, FMF, ScalarizationCostPassed); | |||
2290 | } | |||
2291 | ||||
2292 | int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, | |||
2293 | ArrayRef<Value *> Args, FastMathFlags FMF, | |||
2294 | unsigned VF) { | |||
2295 | static const CostTblEntry AVX512CostTbl[] = { | |||
2296 | { ISD::ROTL, MVT::v8i64, 1 }, | |||
2297 | { ISD::ROTL, MVT::v4i64, 1 }, | |||
2298 | { ISD::ROTL, MVT::v2i64, 1 }, | |||
2299 | { ISD::ROTL, MVT::v16i32, 1 }, | |||
2300 | { ISD::ROTL, MVT::v8i32, 1 }, | |||
2301 | { ISD::ROTL, MVT::v4i32, 1 }, | |||
2302 | { ISD::ROTR, MVT::v8i64, 1 }, | |||
2303 | { ISD::ROTR, MVT::v4i64, 1 }, | |||
2304 | { ISD::ROTR, MVT::v2i64, 1 }, | |||
2305 | { ISD::ROTR, MVT::v16i32, 1 }, | |||
2306 | { ISD::ROTR, MVT::v8i32, 1 }, | |||
2307 | { ISD::ROTR, MVT::v4i32, 1 } | |||
2308 | }; | |||
2309 | // XOP: ROTL = VPROT(X,Y), ROTR = VPROT(X,SUB(0,Y)) | |||
2310 | static const CostTblEntry XOPCostTbl[] = { | |||
2311 | { ISD::ROTL, MVT::v4i64, 4 }, | |||
2312 | { ISD::ROTL, MVT::v8i32, 4 }, | |||
2313 | { ISD::ROTL, MVT::v16i16, 4 }, | |||
2314 | { ISD::ROTL, MVT::v32i8, 4 }, | |||
2315 | { ISD::ROTL, MVT::v2i64, 1 }, | |||
2316 | { ISD::ROTL, MVT::v4i32, 1 }, | |||
2317 | { ISD::ROTL, MVT::v8i16, 1 }, | |||
2318 | { ISD::ROTL, MVT::v16i8, 1 }, | |||
2319 | { ISD::ROTR, MVT::v4i64, 6 }, | |||
2320 | { ISD::ROTR, MVT::v8i32, 6 }, | |||
2321 | { ISD::ROTR, MVT::v16i16, 6 }, | |||
2322 | { ISD::ROTR, MVT::v32i8, 6 }, | |||
2323 | { ISD::ROTR, MVT::v2i64, 2 }, | |||
2324 | { ISD::ROTR, MVT::v4i32, 2 }, | |||
2325 | { ISD::ROTR, MVT::v8i16, 2 }, | |||
2326 | { ISD::ROTR, MVT::v16i8, 2 } | |||
2327 | }; | |||
2328 | static const CostTblEntry X64CostTbl[] = { // 64-bit targets | |||
2329 | { ISD::ROTL, MVT::i64, 1 }, | |||
2330 | { ISD::ROTR, MVT::i64, 1 }, | |||
2331 | { ISD::FSHL, MVT::i64, 4 } | |||
2332 | }; | |||
2333 | static const CostTblEntry X86CostTbl[] = { // 32 or 64-bit targets | |||
2334 | { ISD::ROTL, MVT::i32, 1 }, | |||
2335 | { ISD::ROTL, MVT::i16, 1 }, | |||
2336 | { ISD::ROTL, MVT::i8, 1 }, | |||
2337 | { ISD::ROTR, MVT::i32, 1 }, | |||
2338 | { ISD::ROTR, MVT::i16, 1 }, | |||
2339 | { ISD::ROTR, MVT::i8, 1 }, | |||
2340 | { ISD::FSHL, MVT::i32, 4 }, | |||
2341 | { ISD::FSHL, MVT::i16, 4 }, | |||
2342 | { ISD::FSHL, MVT::i8, 4 } | |||
2343 | }; | |||
2344 | ||||
2345 | unsigned ISD = ISD::DELETED_NODE; | |||
2346 | switch (IID) { | |||
2347 | default: | |||
2348 | break; | |||
2349 | case Intrinsic::fshl: | |||
2350 | ISD = ISD::FSHL; | |||
2351 | if (Args[0] == Args[1]) | |||
2352 | ISD = ISD::ROTL; | |||
2353 | break; | |||
2354 | case Intrinsic::fshr: | |||
2355 | // FSHR has same costs so don't duplicate. | |||
2356 | ISD = ISD::FSHL; | |||
2357 | if (Args[0] == Args[1]) | |||
2358 | ISD = ISD::ROTR; | |||
2359 | break; | |||
2360 | } | |||
2361 | ||||
2362 | if (ISD != ISD::DELETED_NODE) { | |||
2363 | // Legalize the type. | |||
2364 | std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, RetTy); | |||
2365 | MVT MTy = LT.second; | |||
2366 | ||||
2367 | // Attempt to lookup cost. | |||
2368 | if (ST->hasAVX512()) | |||
2369 | if (const auto *Entry = CostTableLookup(AVX512CostTbl, ISD, MTy)) | |||
2370 | return LT.first * Entry->Cost; | |||
2371 | ||||
2372 | if (ST->hasXOP()) | |||
2373 | if (const auto *Entry = CostTableLookup(XOPCostTbl, ISD, MTy)) | |||
2374 | return LT.first * Entry->Cost; | |||
2375 | ||||
2376 | if (ST->is64Bit()) | |||
2377 | if (const auto *Entry = CostTableLookup(X64CostTbl, ISD, MTy)) | |||
2378 | return LT.first * Entry->Cost; | |||
2379 | ||||
2380 | if (const auto *Entry = CostTableLookup(X86CostTbl, ISD, MTy)) | |||
2381 | return LT.first * Entry->Cost; | |||
2382 | } | |||
2383 | ||||
2384 | return BaseT::getIntrinsicInstrCost(IID, RetTy, Args, FMF, VF); | |||
2385 | } | |||
2386 | ||||
2387 | int X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) { | |||
2388 | static const CostTblEntry SLMCostTbl[] = { | |||
2389 | { ISD::EXTRACT_VECTOR_ELT, MVT::i8, 4 }, | |||
2390 | { ISD::EXTRACT_VECTOR_ELT, MVT::i16, 4 }, | |||
2391 | { ISD::EXTRACT_VECTOR_ELT, MVT::i32, 4 }, | |||
2392 | { ISD::EXTRACT_VECTOR_ELT, MVT::i64, 7 } | |||
2393 | }; | |||
2394 | ||||
2395 | assert(Val->isVectorTy() && "This must be a vector type")((Val->isVectorTy() && "This must be a vector type" ) ? static_cast<void> (0) : __assert_fail ("Val->isVectorTy() && \"This must be a vector type\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp" , 2395, __PRETTY_FUNCTION__)); | |||
2396 | ||||
2397 | Type *ScalarType = Val->getScalarType(); | |||
2398 | ||||
2399 | if (Index != -1U) { | |||
2400 | // Legalize the type. | |||
2401 | std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Val); | |||
2402 | ||||
2403 | // This type is legalized to a scalar type. | |||
2404 | if (!LT.second.isVector()) | |||
2405 | return 0; | |||
2406 | ||||
2407 | // The type may be split. Normalize the index to the new type. | |||
2408 | unsigned Width = LT.second.getVectorNumElements(); | |||
2409 | Index = Index % Width; | |||
2410 | ||||
2411 | if (Index == 0) { | |||
2412 | // Floating point scalars are already located in index #0. | |||
2413 | if (ScalarType->isFloatingPointTy()) | |||
2414 | return 0; | |||
2415 | ||||
2416 | // Assume movd/movq XMM <-> GPR is relatively cheap on all targets. | |||
2417 | if (ScalarType->isIntegerTy()) | |||
2418 | return 1; | |||
2419 | } | |||
2420 | ||||
2421 | int ISD = TLI->InstructionOpcodeToISD(Opcode); | |||
2422 | assert(ISD && "Unexpected vector opcode")((ISD && "Unexpected vector opcode") ? static_cast< void> (0) : __assert_fail ("ISD && \"Unexpected vector opcode\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp" , 2422, __PRETTY_FUNCTION__)); | |||
2423 | MVT MScalarTy = LT.second.getScalarType(); | |||
2424 | if (ST->isSLM()) | |||
2425 | if (auto *Entry = CostTableLookup(SLMCostTbl, ISD, MScalarTy)) | |||
2426 | return Entry->Cost; | |||
2427 | } | |||
2428 | ||||
2429 | // Add to the base cost if we know that the extracted element of a vector is | |||
2430 | // destined to be moved to and used in the integer register file. | |||
2431 | int RegisterFileMoveCost = 0; | |||
2432 | if (Opcode == Instruction::ExtractElement && ScalarType->isPointerTy()) | |||
2433 | RegisterFileMoveCost = 1; | |||
2434 | ||||
2435 | return BaseT::getVectorInstrCost(Opcode, Val, Index) + RegisterFileMoveCost; | |||
2436 | } | |||
2437 | ||||
2438 | int X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, | |||
2439 | MaybeAlign Alignment, unsigned AddressSpace, | |||
2440 | const Instruction *I) { | |||
2441 | // Handle non-power-of-two vectors such as <3 x float> | |||
2442 | if (VectorType *VTy = dyn_cast<VectorType>(Src)) { | |||
2443 | unsigned NumElem = VTy->getVectorNumElements(); | |||
2444 | ||||
2445 | // Handle a few common cases: | |||
2446 | // <3 x float> | |||
2447 | if (NumElem == 3 && VTy->getScalarSizeInBits() == 32) | |||
2448 | // Cost = 64 bit store + extract + 32 bit store. | |||
2449 | return 3; | |||
2450 | ||||
2451 | // <3 x double> | |||
2452 | if (NumElem == 3 && VTy->getScalarSizeInBits() == 64) | |||
2453 | // Cost = 128 bit store + unpack + 64 bit store. | |||
2454 | return 3; | |||
2455 | ||||
2456 | // Assume that all other non-power-of-two numbers are scalarized. | |||
2457 | if (!isPowerOf2_32(NumElem)) { | |||
2458 | int Cost = BaseT::getMemoryOpCost(Opcode, VTy->getScalarType(), Alignment, | |||
2459 | AddressSpace); | |||
2460 | int SplitCost = getScalarizationOverhead(Src, Opcode == Instruction::Load, | |||
2461 | Opcode == Instruction::Store); | |||
2462 | return NumElem * Cost + SplitCost; | |||
2463 | } | |||
2464 | } | |||
2465 | ||||
2466 | // Legalize the type. | |||
2467 | std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src); | |||
2468 | assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&(((Opcode == Instruction::Load || Opcode == Instruction::Store ) && "Invalid Opcode") ? static_cast<void> (0) : __assert_fail ("(Opcode == Instruction::Load || Opcode == Instruction::Store) && \"Invalid Opcode\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp" , 2469, __PRETTY_FUNCTION__)) | |||
2469 | "Invalid Opcode")(((Opcode == Instruction::Load || Opcode == Instruction::Store ) && "Invalid Opcode") ? static_cast<void> (0) : __assert_fail ("(Opcode == Instruction::Load || Opcode == Instruction::Store) && \"Invalid Opcode\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp" , 2469, __PRETTY_FUNCTION__)); | |||
2470 | ||||
2471 | // Each load/store unit costs 1. | |||
2472 | int Cost = LT.first * 1; | |||
2473 | ||||
2474 | // This isn't exactly right. We're using slow unaligned 32-byte accesses as a | |||
2475 | // proxy for a double-pumped AVX memory interface such as on Sandybridge. | |||
2476 | if (LT.second.getStoreSize() == 32 && ST->isUnalignedMem32Slow()) | |||
2477 | Cost *= 2; | |||
2478 | ||||
2479 | return Cost; | |||
2480 | } | |||
2481 | ||||
2482 | int X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy, | |||
2483 | unsigned Alignment, | |||
2484 | unsigned AddressSpace) { | |||
2485 | bool IsLoad = (Instruction::Load == Opcode); | |||
2486 | bool IsStore = (Instruction::Store == Opcode); | |||
2487 | ||||
2488 | VectorType *SrcVTy = dyn_cast<VectorType>(SrcTy); | |||
2489 | if (!SrcVTy) | |||
2490 | // To calculate scalar take the regular cost, without mask | |||
2491 | return getMemoryOpCost(Opcode, SrcTy, MaybeAlign(Alignment), AddressSpace); | |||
2492 | ||||
2493 | unsigned NumElem = SrcVTy->getVectorNumElements(); | |||
2494 | VectorType *MaskTy = | |||
2495 | VectorType::get(Type::getInt8Ty(SrcVTy->getContext()), NumElem); | |||
2496 | if ((IsLoad && !isLegalMaskedLoad(SrcVTy, MaybeAlign(Alignment))) || | |||
2497 | (IsStore && !isLegalMaskedStore(SrcVTy, MaybeAlign(Alignment))) || | |||
2498 | !isPowerOf2_32(NumElem)) { | |||
2499 | // Scalarization | |||
2500 | int MaskSplitCost = getScalarizationOverhead(MaskTy, false, true); | |||
2501 | int ScalarCompareCost = getCmpSelInstrCost( | |||
2502 | Instruction::ICmp, Type::getInt8Ty(SrcVTy->getContext()), nullptr); | |||
2503 | int BranchCost = getCFInstrCost(Instruction::Br); | |||
2504 | int MaskCmpCost = NumElem * (BranchCost + ScalarCompareCost); | |||
2505 | ||||
2506 | int ValueSplitCost = getScalarizationOverhead(SrcVTy, IsLoad, IsStore); | |||
2507 | int MemopCost = | |||
2508 | NumElem * BaseT::getMemoryOpCost(Opcode, SrcVTy->getScalarType(), | |||
2509 | MaybeAlign(Alignment), AddressSpace); | |||
2510 | return MemopCost + ValueSplitCost + MaskSplitCost + MaskCmpCost; | |||
2511 | } | |||
2512 | ||||
2513 | // Legalize the type. | |||
2514 | std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, SrcVTy); | |||
2515 | auto VT = TLI->getValueType(DL, SrcVTy); | |||
2516 | int Cost = 0; | |||
2517 | if (VT.isSimple() && LT.second != VT.getSimpleVT() && | |||
2518 | LT.second.getVectorNumElements() == NumElem) | |||
2519 | // Promotion requires expand/truncate for data and a shuffle for mask. | |||
2520 | Cost += getShuffleCost(TTI::SK_PermuteTwoSrc, SrcVTy, 0, nullptr) + | |||
2521 | getShuffleCost(TTI::SK_PermuteTwoSrc, MaskTy, 0, nullptr); | |||
2522 | ||||
2523 | else if (LT.second.getVectorNumElements() > NumElem) { | |||
2524 | VectorType *NewMaskTy = VectorType::get(MaskTy->getVectorElementType(), | |||
2525 | LT.second.getVectorNumElements()); | |||
2526 | // Expanding requires fill mask with zeroes | |||
2527 | Cost += getShuffleCost(TTI::SK_InsertSubvector, NewMaskTy, 0, MaskTy); | |||
2528 | } | |||
2529 | ||||
2530 | // Pre-AVX512 - each maskmov load costs 2 + store costs ~8. | |||
2531 | if (!ST->hasAVX512()) | |||
2532 | return Cost + LT.first * (IsLoad ? 2 : 8); | |||
2533 | ||||
2534 | // AVX-512 masked load/store is cheapper | |||
2535 | return Cost + LT.first; | |||
2536 | } | |||
2537 | ||||
2538 | int X86TTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE, | |||
2539 | const SCEV *Ptr) { | |||
2540 | // Address computations in vectorized code with non-consecutive addresses will | |||
2541 | // likely result in more instructions compared to scalar code where the | |||
2542 | // computation can more often be merged into the index mode. The resulting | |||
2543 | // extra micro-ops can significantly decrease throughput. | |||
2544 | const unsigned NumVectorInstToHideOverhead = 10; | |||
2545 | ||||
2546 | // Cost modeling of Strided Access Computation is hidden by the indexing | |||
2547 | // modes of X86 regardless of the stride value. We dont believe that there | |||
2548 | // is a difference between constant strided access in gerenal and constant | |||
2549 | // strided value which is less than or equal to 64. | |||
2550 | // Even in the case of (loop invariant) stride whose value is not known at | |||
2551 | // compile time, the address computation will not incur more than one extra | |||
2552 | // ADD instruction. | |||
2553 | if (Ty->isVectorTy() && SE) { | |||
2554 | if (!BaseT::isStridedAccess(Ptr)) | |||
2555 | return NumVectorInstToHideOverhead; | |||
2556 | if (!BaseT::getConstantStrideStep(SE, Ptr)) | |||
2557 | return 1; | |||
2558 | } | |||
2559 | ||||
2560 | return BaseT::getAddressComputationCost(Ty, SE, Ptr); | |||
2561 | } | |||
2562 | ||||
2563 | int X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, Type *ValTy, | |||
2564 | bool IsPairwise) { | |||
2565 | // We use the Intel Architecture Code Analyzer(IACA) to measure the throughput | |||
2566 | // and make it as the cost. | |||
2567 | ||||
2568 | static const CostTblEntry SLMCostTblPairWise[] = { | |||
2569 | { ISD::FADD, MVT::v2f64, 3 }, | |||
2570 | { ISD::ADD, MVT::v2i64, 5 }, | |||
2571 | }; | |||
2572 | ||||
2573 | static const CostTblEntry SSE2CostTblPairWise[] = { | |||
2574 | { ISD::FADD, MVT::v2f64, 2 }, | |||
2575 | { ISD::FADD, MVT::v4f32, 4 }, | |||
2576 | { ISD::ADD, MVT::v2i64, 2 }, // The data reported by the IACA tool is "1.6". | |||
2577 | { ISD::ADD, MVT::v2i32, 2 }, // FIXME: chosen to be less than v4i32. | |||
2578 | { ISD::ADD, MVT::v4i32, 3 }, // The data reported by the IACA tool is "3.5". | |||
2579 | { ISD::ADD, MVT::v2i16, 3 }, // FIXME: chosen to be less than v4i16 | |||
2580 | { ISD::ADD, MVT::v4i16, 4 }, // FIXME: chosen to be less than v8i16 | |||
2581 | { ISD::ADD, MVT::v8i16, 5 }, | |||
2582 | { ISD::ADD, MVT::v2i8, 2 }, | |||
2583 | { ISD::ADD, MVT::v4i8, 2 }, | |||
2584 | { ISD::ADD, MVT::v8i8, 2 }, | |||
2585 | { ISD::ADD, MVT::v16i8, 3 }, | |||
2586 | }; | |||
2587 | ||||
2588 | static const CostTblEntry AVX1CostTblPairWise[] = { | |||
2589 | { ISD::FADD, MVT::v4f64, 5 }, | |||
2590 | { ISD::FADD, MVT::v8f32, 7 }, | |||
2591 | { ISD::ADD, MVT::v2i64, 1 }, // The data reported by the IACA tool is "1.5". | |||
2592 | { ISD::ADD, MVT::v4i64, 5 }, // The data reported by the IACA tool is "4.8". | |||
2593 | { ISD::ADD, MVT::v8i32, 5 }, | |||
2594 | { ISD::ADD, MVT::v16i16, 6 }, | |||
2595 | { ISD::ADD, MVT::v32i8, 4 }, | |||
2596 | }; | |||
2597 | ||||
2598 | static const CostTblEntry SLMCostTblNoPairWise[] = { | |||
2599 | { ISD::FADD, MVT::v2f64, 3 }, | |||
2600 | { ISD::ADD, MVT::v2i64, 5 }, | |||
2601 | }; | |||
2602 | ||||
2603 | static const CostTblEntry SSE2CostTblNoPairWise[] = { | |||
2604 | { ISD::FADD, MVT::v2f64, 2 }, | |||
2605 | { ISD::FADD, MVT::v4f32, 4 }, | |||
2606 | { ISD::ADD, MVT::v2i64, 2 }, // The data reported by the IACA tool is "1.6". | |||
2607 | { ISD::ADD, MVT::v2i32, 2 }, // FIXME: chosen to be less than v4i32 | |||
2608 | { ISD::ADD, MVT::v4i32, 3 }, // The data reported by the IACA tool is "3.3". | |||
2609 | { ISD::ADD, MVT::v2i16, 2 }, // The data reported by the IACA tool is "4.3". | |||
2610 | { ISD::ADD, MVT::v4i16, 3 }, // The data reported by the IACA tool is "4.3". | |||
2611 | { ISD::ADD, MVT::v8i16, 4 }, // The data reported by the IACA tool is "4.3". | |||
2612 | { ISD::ADD, MVT::v2i8, 2 }, | |||
2613 | { ISD::ADD, MVT::v4i8, 2 }, | |||
2614 | { ISD::ADD, MVT::v8i8, 2 }, | |||
2615 | { ISD::ADD, MVT::v16i8, 3 }, | |||
2616 | }; | |||
2617 | ||||
2618 | static const CostTblEntry AVX1CostTblNoPairWise[] = { | |||
2619 | { ISD::FADD, MVT::v4f64, 3 }, | |||
2620 | { ISD::FADD, MVT::v4f32, 3 }, | |||
2621 | { ISD::FADD, MVT::v8f32, 4 }, | |||
2622 | { ISD::ADD, MVT::v2i64, 1 }, // The data reported by the IACA tool is "1.5". | |||
2623 | { ISD::ADD, MVT::v4i64, 3 }, | |||
2624 | { ISD::ADD, MVT::v8i32, 5 }, | |||
2625 | { ISD::ADD, MVT::v16i16, 5 }, | |||
2626 | { ISD::ADD, MVT::v32i8, 4 }, | |||
2627 | }; | |||
2628 | ||||
2629 | int ISD = TLI->InstructionOpcodeToISD(Opcode); | |||
2630 | assert(ISD && "Invalid opcode")((ISD && "Invalid opcode") ? static_cast<void> ( 0) : __assert_fail ("ISD && \"Invalid opcode\"", "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp" , 2630, __PRETTY_FUNCTION__)); | |||
2631 | ||||
2632 | // Before legalizing the type, give a chance to look up illegal narrow types | |||
2633 | // in the table. | |||
2634 | // FIXME: Is there a better way to do this? | |||
2635 | EVT VT = TLI->getValueType(DL, ValTy); | |||
2636 | if (VT.isSimple()) { | |||
2637 | MVT MTy = VT.getSimpleVT(); | |||
2638 | if (IsPairwise) { | |||
2639 | if (ST->isSLM()) | |||
2640 | if (const auto *Entry = CostTableLookup(SLMCostTblPairWise, ISD, MTy)) | |||
2641 | return Entry->Cost; | |||
2642 | ||||
2643 | if (ST->hasAVX()) | |||
2644 | if (const auto *Entry = CostTableLookup(AVX1CostTblPairWise, ISD, MTy)) | |||
2645 | return Entry->Cost; | |||
2646 | ||||
2647 | if (ST->hasSSE2()) | |||
2648 | if (const auto *Entry = CostTableLookup(SSE2CostTblPairWise, ISD, MTy)) | |||
2649 | return Entry->Cost; | |||
2650 | } else { | |||
2651 | if (ST->isSLM()) | |||
2652 | if (const auto *Entry = CostTableLookup(SLMCostTblNoPairWise, ISD, MTy)) | |||
2653 | return Entry->Cost; | |||
2654 | ||||
2655 | if (ST->hasAVX()) | |||
2656 | if (const auto *Entry = CostTableLookup(AVX1CostTblNoPairWise, ISD, MTy)) | |||
2657 | return Entry->Cost; | |||
2658 | ||||
2659 | if (ST->hasSSE2()) | |||
2660 | if (const auto *Entry = CostTableLookup(SSE2CostTblNoPairWise, ISD, MTy)) | |||
2661 | return Entry->Cost; | |||
2662 | } | |||
2663 | } | |||
2664 | ||||
2665 | std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy); | |||
2666 | ||||
2667 | MVT MTy = LT.second; | |||
2668 | ||||
2669 | if (IsPairwise) { | |||
2670 | if (ST->isSLM()) | |||
2671 | if (const auto *Entry = CostTableLookup(SLMCostTblPairWise, ISD, MTy)) | |||
2672 | return LT.first * Entry->Cost; | |||
2673 | ||||
2674 | if (ST->hasAVX()) | |||
2675 | if (const auto *Entry = CostTableLookup(AVX1CostTblPairWise, ISD, MTy)) | |||
2676 | return LT.first * Entry->Cost; | |||
2677 | ||||
2678 | if (ST->hasSSE2()) | |||
2679 | if (const auto *Entry = CostTableLookup(SSE2CostTblPairWise, ISD, MTy)) | |||
2680 | return LT.first * Entry->Cost; | |||
2681 | } else { | |||
2682 | if (ST->isSLM()) | |||
2683 | if (const auto *Entry = CostTableLookup(SLMCostTblNoPairWise, ISD, MTy)) | |||
2684 | return LT.first * Entry->Cost; | |||
2685 | ||||
2686 | if (ST->hasAVX()) | |||
2687 | if (const auto *Entry = CostTableLookup(AVX1CostTblNoPairWise, ISD, MTy)) | |||
2688 | return LT.first * Entry->Cost; | |||
2689 | ||||
2690 | if (ST->hasSSE2()) | |||
2691 | if (const auto *Entry = CostTableLookup(SSE2CostTblNoPairWise, ISD, MTy)) | |||
2692 | return LT.first * Entry->Cost; | |||
2693 | } | |||
2694 | ||||
2695 | // FIXME: These assume a naive kshift+binop lowering, which is probably | |||
2696 | // conservative in most cases. | |||
2697 | // FIXME: This doesn't cost large types like v128i1 correctly. | |||
2698 | static const CostTblEntry AVX512BoolReduction[] = { | |||
2699 | { ISD::AND, MVT::v2i1, 3 }, | |||
2700 | { ISD::AND, MVT::v4i1, 5 }, | |||
2701 | { ISD::AND, MVT::v8i1, 7 }, | |||
2702 | { ISD::AND, MVT::v16i1, 9 }, | |||
2703 | { ISD::AND, MVT::v32i1, 11 }, | |||
2704 | { ISD::AND, MVT::v64i1, 13 }, | |||
2705 | { ISD::OR, MVT::v2i1, 3 }, | |||
2706 | { ISD::OR, MVT::v4i1, 5 }, | |||
2707 | { ISD::OR, MVT::v8i1, 7 }, | |||
2708 | { ISD::OR, MVT::v16i1, 9 }, | |||
2709 | { ISD::OR, MVT::v32i1, 11 }, | |||
2710 | { ISD::OR, MVT::v64i1, 13 }, | |||
2711 | }; | |||
2712 | ||||
2713 | static const CostTblEntry AVX2BoolReduction[] = { | |||
2714 | { ISD::AND, MVT::v16i16, 2 }, // vpmovmskb + cmp | |||
2715 | { ISD::AND, MVT::v32i8, 2 }, // vpmovmskb + cmp | |||
2716 | { ISD::OR, MVT::v16i16, 2 }, // vpmovmskb + cmp | |||
2717 | { ISD::OR, MVT::v32i8, 2 }, // vpmovmskb + cmp | |||
2718 | }; | |||
2719 | ||||
2720 | static const CostTblEntry AVX1BoolReduction[] = { | |||
2721 | { ISD::AND, MVT::v4i64, 2 }, // vmovmskpd + cmp | |||
2722 | { ISD::AND, MVT::v8i32, 2 }, // vmovmskps + cmp | |||
2723 | { ISD::AND, MVT::v16i16, 4 }, // vextractf128 + vpand + vpmovmskb + cmp | |||
2724 | { ISD::AND, MVT::v32i8, 4 }, // vextractf128 + vpand + vpmovmskb + cmp | |||
2725 | { ISD::OR, MVT::v4i64, 2 }, // vmovmskpd + cmp | |||
2726 | { ISD::OR, MVT::v8i32, 2 }, // vmovmskps + cmp | |||
2727 | { ISD::OR, MVT::v16i16, 4 }, // vextractf128 + vpor + vpmovmskb + cmp | |||
2728 | { ISD::OR, MVT::v32i8, 4 }, // vextractf128 + vpor + vpmovmskb + cmp | |||
2729 | }; | |||
2730 | ||||
2731 | static const CostTblEntry SSE2BoolReduction[] = { | |||
2732 | { ISD::AND, MVT::v2i64, 2 }, // movmskpd + cmp | |||
2733 | { ISD::AND, MVT::v4i32, 2 }, // movmskps + cmp | |||
2734 | { ISD::AND, MVT::v8i16, 2 }, // pmovmskb + cmp | |||
2735 | { ISD::AND, MVT::v16i8, 2 }, // pmovmskb + cmp | |||
2736 | { ISD::OR, MVT::v2i64, 2 }, // movmskpd + cmp | |||
2737 | { ISD::OR, MVT::v4i32, 2 }, // movmskps + cmp | |||
2738 | { ISD::OR, MVT::v8i16, 2 }, // pmovmskb + cmp | |||
2739 | { ISD::OR, MVT::v16i8, 2 }, // pmovmskb + cmp | |||
2740 | }; | |||
2741 | ||||
2742 | // Handle bool allof/anyof patterns. | |||
2743 | if (!IsPairwise && ValTy->getVectorElementType()->isIntegerTy(1)) { | |||
2744 | if (ST->hasAVX512()) | |||
2745 | if (const auto *Entry = CostTableLookup(AVX512BoolReduction, ISD, MTy)) | |||
2746 | return LT.first * Entry->Cost; | |||
2747 | if (ST->hasAVX2()) | |||
2748 | if (const auto *Entry = CostTableLookup(AVX2BoolReduction, ISD, MTy)) | |||
2749 | return LT.first * Entry->Cost; | |||
2750 | if (ST->hasAVX()) | |||
2751 | if (const auto *Entry = CostTableLookup(AVX1BoolReduction, ISD, MTy)) | |||
2752 | return LT.first * Entry->Cost; | |||
2753 | if (ST->hasSSE2()) | |||
2754 | if (const auto *Entry = CostTableLookup(SSE2BoolReduction, ISD, MTy)) | |||
2755 | return LT.first * Entry->Cost; | |||
2756 | } | |||
2757 | ||||
2758 | return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwise); | |||
2759 | } | |||
2760 | ||||
2761 | int X86TTIImpl::getMinMaxReductionCost(Type *ValTy, Type *CondTy, | |||
2762 | bool IsPairwise, bool IsUnsigned) { | |||
2763 | std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy); | |||
2764 | ||||
2765 | MVT MTy = LT.second; | |||
2766 | ||||
2767 | int ISD; | |||
2768 | if (ValTy->isIntOrIntVectorTy()) { | |||
2769 | ISD = IsUnsigned ? ISD::UMIN : ISD::SMIN; | |||
2770 | } else { | |||
2771 | assert(ValTy->isFPOrFPVectorTy() &&((ValTy->isFPOrFPVectorTy() && "Expected float point or integer vector type." ) ? static_cast<void> (0) : __assert_fail ("ValTy->isFPOrFPVectorTy() && \"Expected float point or integer vector type.\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp" , 2772, __PRETTY_FUNCTION__)) | |||
2772 | "Expected float point or integer vector type.")((ValTy->isFPOrFPVectorTy() && "Expected float point or integer vector type." ) ? static_cast<void> (0) : __assert_fail ("ValTy->isFPOrFPVectorTy() && \"Expected float point or integer vector type.\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp" , 2772, __PRETTY_FUNCTION__)); | |||
2773 | ISD = ISD::FMINNUM; | |||
2774 | } | |||
2775 | ||||
2776 | // We use the Intel Architecture Code Analyzer(IACA) to measure the throughput | |||
2777 | // and make it as the cost. | |||
2778 | ||||
2779 | static const CostTblEntry SSE1CostTblPairWise[] = { | |||
2780 | {ISD::FMINNUM, MVT::v4f32, 4}, | |||
2781 | }; | |||
2782 | ||||
2783 | static const CostTblEntry SSE2CostTblPairWise[] = { | |||
2784 | {ISD::FMINNUM, MVT::v2f64, 3}, | |||
2785 | {ISD::SMIN, MVT::v2i64, 6}, | |||
2786 | {ISD::UMIN, MVT::v2i64, 8}, | |||
2787 | {ISD::SMIN, MVT::v4i32, 6}, | |||
2788 | {ISD::UMIN, MVT::v4i32, 8}, | |||
2789 | {ISD::SMIN, MVT::v8i16, 4}, | |||
2790 | {ISD::UMIN, MVT::v8i16, 6}, | |||
2791 | {ISD::SMIN, MVT::v16i8, 8}, | |||
2792 | {ISD::UMIN, MVT::v16i8, 6}, | |||
2793 | }; | |||
2794 | ||||
2795 | static const CostTblEntry SSE41CostTblPairWise[] = { | |||
2796 | {ISD::FMINNUM, MVT::v4f32, 2}, | |||
2797 | {ISD::SMIN, MVT::v2i64, 9}, | |||
2798 | {ISD::UMIN, MVT::v2i64,10}, | |||
2799 | {ISD::SMIN, MVT::v4i32, 1}, // The data reported by the IACA is "1.5" | |||
2800 | {ISD::UMIN, MVT::v4i32, 2}, // The data reported by the IACA is "1.8" | |||
2801 | {ISD::SMIN, MVT::v8i16, 2}, | |||
2802 | {ISD::UMIN, MVT::v8i16, 2}, | |||
2803 | {ISD::SMIN, MVT::v16i8, 3}, | |||
2804 | {ISD::UMIN, MVT::v16i8, 3}, | |||
2805 | }; | |||
2806 | ||||
2807 | static const CostTblEntry SSE42CostTblPairWise[] = { | |||
2808 | {ISD::SMIN, MVT::v2i64, 7}, // The data reported by the IACA is "6.8" | |||
2809 | {ISD::UMIN, MVT::v2i64, 8}, // The data reported by the IACA is "8.6" | |||
2810 | }; | |||
2811 | ||||
2812 | static const CostTblEntry AVX1CostTblPairWise[] = { | |||
2813 | {ISD::FMINNUM, MVT::v4f32, 1}, | |||
2814 | {ISD::FMINNUM, MVT::v4f64, 1}, | |||
2815 | {ISD::FMINNUM, MVT::v8f32, 2}, | |||
2816 | {ISD::SMIN, MVT::v2i64, 3}, | |||
2817 | {ISD::UMIN, MVT::v2i64, 3}, | |||
2818 | {ISD::SMIN, MVT::v4i32, 1}, | |||
2819 | {ISD::UMIN, MVT::v4i32, 1}, | |||
2820 | {ISD::SMIN, MVT::v8i16, 1}, | |||
2821 | {ISD::UMIN, MVT::v8i16, 1}, | |||
2822 | {ISD::SMIN, MVT::v16i8, 2}, | |||
2823 | {ISD::UMIN, MVT::v16i8, 2}, | |||
2824 | {ISD::SMIN, MVT::v4i64, 7}, | |||
2825 | {ISD::UMIN, MVT::v4i64, 7}, | |||
2826 | {ISD::SMIN, MVT::v8i32, 3}, | |||
2827 | {ISD::UMIN, MVT::v8i32, 3}, | |||
2828 | {ISD::SMIN, MVT::v16i16, 3}, | |||
2829 | {ISD::UMIN, MVT::v16i16, 3}, | |||
2830 | {ISD::SMIN, MVT::v32i8, 3}, | |||
2831 | {ISD::UMIN, MVT::v32i8, 3}, | |||
2832 | }; | |||
2833 | ||||
2834 | static const CostTblEntry AVX2CostTblPairWise[] = { | |||
2835 | {ISD::SMIN, MVT::v4i64, 2}, | |||
2836 | {ISD::UMIN, MVT::v4i64, 2}, | |||
2837 | {ISD::SMIN, MVT::v8i32, 1}, | |||
2838 | {ISD::UMIN, MVT::v8i32, 1}, | |||
2839 | {ISD::SMIN, MVT::v16i16, 1}, | |||
2840 | {ISD::UMIN, MVT::v16i16, 1}, | |||
2841 | {ISD::SMIN, MVT::v32i8, 2}, | |||
2842 | {ISD::UMIN, MVT::v32i8, 2}, | |||
2843 | }; | |||
2844 | ||||
2845 | static const CostTblEntry AVX512CostTblPairWise[] = { | |||
2846 | {ISD::FMINNUM, MVT::v8f64, 1}, | |||
2847 | {ISD::FMINNUM, MVT::v16f32, 2}, | |||
2848 | {ISD::SMIN, MVT::v8i64, 2}, | |||
2849 | {ISD::UMIN, MVT::v8i64, 2}, | |||
2850 | {ISD::SMIN, MVT::v16i32, 1}, | |||
2851 | {ISD::UMIN, MVT::v16i32, 1}, | |||
2852 | }; | |||
2853 | ||||
2854 | static const CostTblEntry SSE1CostTblNoPairWise[] = { | |||
2855 | {ISD::FMINNUM, MVT::v4f32, 4}, | |||
2856 | }; | |||
2857 | ||||
2858 | static const CostTblEntry SSE2CostTblNoPairWise[] = { | |||
2859 | {ISD::FMINNUM, MVT::v2f64, 3}, | |||
2860 | {ISD::SMIN, MVT::v2i64, 6}, | |||
2861 | {ISD::UMIN, MVT::v2i64, 8}, | |||
2862 | {ISD::SMIN, MVT::v4i32, 6}, | |||
2863 | {ISD::UMIN, MVT::v4i32, 8}, | |||
2864 | {ISD::SMIN, MVT::v8i16, 4}, | |||
2865 | {ISD::UMIN, MVT::v8i16, 6}, | |||
2866 | {ISD::SMIN, MVT::v16i8, 8}, | |||
2867 | {ISD::UMIN, MVT::v16i8, 6}, | |||
2868 | }; | |||
2869 | ||||
2870 | static const CostTblEntry SSE41CostTblNoPairWise[] = { | |||
2871 | {ISD::FMINNUM, MVT::v4f32, 3}, | |||
2872 | {ISD::SMIN, MVT::v2i64, 9}, | |||
2873 | {ISD::UMIN, MVT::v2i64,11}, | |||
2874 | {ISD::SMIN, MVT::v4i32, 1}, // The data reported by the IACA is "1.5" | |||
2875 | {ISD::UMIN, MVT::v4i32, 2}, // The data reported by the IACA is "1.8" | |||
2876 | {ISD::SMIN, MVT::v8i16, 1}, // The data reported by the IACA is "1.5" | |||
2877 | {ISD::UMIN, MVT::v8i16, 2}, // The data reported by the IACA is "1.8" | |||
2878 | {ISD::SMIN, MVT::v16i8, 3}, | |||
2879 | {ISD::UMIN, MVT::v16i8, 3}, | |||
2880 | }; | |||
2881 | ||||
2882 | static const CostTblEntry SSE42CostTblNoPairWise[] = { | |||
2883 | {ISD::SMIN, MVT::v2i64, 7}, // The data reported by the IACA is "6.8" | |||
2884 | {ISD::UMIN, MVT::v2i64, 9}, // The data reported by the IACA is "8.6" | |||
2885 | }; | |||
2886 | ||||
2887 | static const CostTblEntry AVX1CostTblNoPairWise[] = { | |||
2888 | {ISD::FMINNUM, MVT::v4f32, 1}, | |||
2889 | {ISD::FMINNUM, MVT::v4f64, 1}, | |||
2890 | {ISD::FMINNUM, MVT::v8f32, 1}, | |||
2891 | {ISD::SMIN, MVT::v2i64, 3}, | |||
2892 | {ISD::UMIN, MVT::v2i64, 3}, | |||
2893 | {ISD::SMIN, MVT::v4i32, 1}, | |||
2894 | {ISD::UMIN, MVT::v4i32, 1}, | |||
2895 | {ISD::SMIN, MVT::v8i16, 1}, | |||
2896 | {ISD::UMIN, MVT::v8i16, 1}, | |||
2897 | {ISD::SMIN, MVT::v16i8, 2}, | |||
2898 | {ISD::UMIN, MVT::v16i8, 2}, | |||
2899 | {ISD::SMIN, MVT::v4i64, 7}, | |||
2900 | {ISD::UMIN, MVT::v4i64, 7}, | |||
2901 | {ISD::SMIN, MVT::v8i32, 2}, | |||
2902 | {ISD::UMIN, MVT::v8i32, 2}, | |||
2903 | {ISD::SMIN, MVT::v16i16, 2}, | |||
2904 | {ISD::UMIN, MVT::v16i16, 2}, | |||
2905 | {ISD::SMIN, MVT::v32i8, 2}, | |||
2906 | {ISD::UMIN, MVT::v32i8, 2}, | |||
2907 | }; | |||
2908 | ||||
2909 | static const CostTblEntry AVX2CostTblNoPairWise[] = { | |||
2910 | {ISD::SMIN, MVT::v4i64, 1}, | |||
2911 | {ISD::UMIN, MVT::v4i64, 1}, | |||
2912 | {ISD::SMIN, MVT::v8i32, 1}, | |||
2913 | {ISD::UMIN, MVT::v8i32, 1}, | |||
2914 | {ISD::SMIN, MVT::v16i16, 1}, | |||
2915 | {ISD::UMIN, MVT::v16i16, 1}, | |||
2916 | {ISD::SMIN, MVT::v32i8, 1}, | |||
2917 | {ISD::UMIN, MVT::v32i8, 1}, | |||
2918 | }; | |||
2919 | ||||
2920 | static const CostTblEntry AVX512CostTblNoPairWise[] = { | |||
2921 | {ISD::FMINNUM, MVT::v8f64, 1}, | |||
2922 | {ISD::FMINNUM, MVT::v16f32, 2}, | |||
2923 | {ISD::SMIN, MVT::v8i64, 1}, | |||
2924 | {ISD::UMIN, MVT::v8i64, 1}, | |||
2925 | {ISD::SMIN, MVT::v16i32, 1}, | |||
2926 | {ISD::UMIN, MVT::v16i32, 1}, | |||
2927 | }; | |||
2928 | ||||
2929 | if (IsPairwise) { | |||
2930 | if (ST->hasAVX512()) | |||
2931 | if (const auto *Entry = CostTableLookup(AVX512CostTblPairWise, ISD, MTy)) | |||
2932 | return LT.first * Entry->Cost; | |||
2933 | ||||
2934 | if (ST->hasAVX2()) | |||
2935 | if (const auto *Entry = CostTableLookup(AVX2CostTblPairWise, ISD, MTy)) | |||
2936 | return LT.first * Entry->Cost; | |||
2937 | ||||
2938 | if (ST->hasAVX()) | |||
2939 | if (const auto *Entry = CostTableLookup(AVX1CostTblPairWise, ISD, MTy)) | |||
2940 | return LT.first * Entry->Cost; | |||
2941 | ||||
2942 | if (ST->hasSSE42()) | |||
2943 | if (const auto *Entry = CostTableLookup(SSE42CostTblPairWise, ISD, MTy)) | |||
2944 | return LT.first * Entry->Cost; | |||
2945 | ||||
2946 | if (ST->hasSSE41()) | |||
2947 | if (const auto *Entry = CostTableLookup(SSE41CostTblPairWise, ISD, MTy)) | |||
2948 | return LT.first * Entry->Cost; | |||
2949 | ||||
2950 | if (ST->hasSSE2()) | |||
2951 | if (const auto *Entry = CostTableLookup(SSE2CostTblPairWise, ISD, MTy)) | |||
2952 | return LT.first * Entry->Cost; | |||
2953 | ||||
2954 | if (ST->hasSSE1()) | |||
2955 | if (const auto *Entry = CostTableLookup(SSE1CostTblPairWise, ISD, MTy)) | |||
2956 | return LT.first * Entry->Cost; | |||
2957 | } else { | |||
2958 | if (ST->hasAVX512()) | |||
2959 | if (const auto *Entry = | |||
2960 | CostTableLookup(AVX512CostTblNoPairWise, ISD, MTy)) | |||
2961 | return LT.first * Entry->Cost; | |||
2962 | ||||
2963 | if (ST->hasAVX2()) | |||
2964 | if (const auto *Entry = CostTableLookup(AVX2CostTblNoPairWise, ISD, MTy)) | |||
2965 | return LT.first * Entry->Cost; | |||
2966 | ||||
2967 | if (ST->hasAVX()) | |||
2968 | if (const auto *Entry = CostTableLookup(AVX1CostTblNoPairWise, ISD, MTy)) | |||
2969 | return LT.first * Entry->Cost; | |||
2970 | ||||
2971 | if (ST->hasSSE42()) | |||
2972 | if (const auto *Entry = CostTableLookup(SSE42CostTblNoPairWise, ISD, MTy)) | |||
2973 | return LT.first * Entry->Cost; | |||
2974 | ||||
2975 | if (ST->hasSSE41()) | |||
2976 | if (const auto *Entry = CostTableLookup(SSE41CostTblNoPairWise, ISD, MTy)) | |||
2977 | return LT.first * Entry->Cost; | |||
2978 | ||||
2979 | if (ST->hasSSE2()) | |||
2980 | if (const auto *Entry = CostTableLookup(SSE2CostTblNoPairWise, ISD, MTy)) | |||
2981 | return LT.first * Entry->Cost; | |||
2982 | ||||
2983 | if (ST->hasSSE1()) | |||
2984 | if (const auto *Entry = CostTableLookup(SSE1CostTblNoPairWise, ISD, MTy)) | |||
2985 | return LT.first * Entry->Cost; | |||
2986 | } | |||
2987 | ||||
2988 | return BaseT::getMinMaxReductionCost(ValTy, CondTy, IsPairwise, IsUnsigned); | |||
2989 | } | |||
2990 | ||||
2991 | /// Calculate the cost of materializing a 64-bit value. This helper | |||
2992 | /// method might only calculate a fraction of a larger immediate. Therefore it | |||
2993 | /// is valid to return a cost of ZERO. | |||
2994 | int X86TTIImpl::getIntImmCost(int64_t Val) { | |||
2995 | if (Val == 0) | |||
2996 | return TTI::TCC_Free; | |||
2997 | ||||
2998 | if (isInt<32>(Val)) | |||
2999 | return TTI::TCC_Basic; | |||
3000 | ||||
3001 | return 2 * TTI::TCC_Basic; | |||
3002 | } | |||
3003 | ||||
3004 | int X86TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) { | |||
3005 | assert(Ty->isIntegerTy())((Ty->isIntegerTy()) ? static_cast<void> (0) : __assert_fail ("Ty->isIntegerTy()", "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp" , 3005, __PRETTY_FUNCTION__)); | |||
3006 | ||||
3007 | unsigned BitSize = Ty->getPrimitiveSizeInBits(); | |||
3008 | if (BitSize == 0) | |||
3009 | return ~0U; | |||
3010 | ||||
3011 | // Never hoist constants larger than 128bit, because this might lead to | |||
3012 | // incorrect code generation or assertions in codegen. | |||
3013 | // Fixme: Create a cost model for types larger than i128 once the codegen | |||
3014 | // issues have been fixed. | |||
3015 | if (BitSize > 128) | |||
3016 | return TTI::TCC_Free; | |||
3017 | ||||
3018 | if (Imm == 0) | |||
3019 | return TTI::TCC_Free; | |||
3020 | ||||
3021 | // Sign-extend all constants to a multiple of 64-bit. | |||
3022 | APInt ImmVal = Imm; | |||
3023 | if (BitSize % 64 != 0) | |||
3024 | ImmVal = Imm.sext(alignTo(BitSize, 64)); | |||
3025 | ||||
3026 | // Split the constant into 64-bit chunks and calculate the cost for each | |||
3027 | // chunk. | |||
3028 | int Cost = 0; | |||
3029 | for (unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) { | |||
3030 | APInt Tmp = ImmVal.ashr(ShiftVal).sextOrTrunc(64); | |||
3031 | int64_t Val = Tmp.getSExtValue(); | |||
3032 | Cost += getIntImmCost(Val); | |||
3033 | } | |||
3034 | // We need at least one instruction to materialize the constant. | |||
3035 | return std::max(1, Cost); | |||
3036 | } | |||
3037 | ||||
3038 | int X86TTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, | |||
3039 | Type *Ty) { | |||
3040 | assert(Ty->isIntegerTy())((Ty->isIntegerTy()) ? static_cast<void> (0) : __assert_fail ("Ty->isIntegerTy()", "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp" , 3040, __PRETTY_FUNCTION__)); | |||
3041 | ||||
3042 | unsigned BitSize = Ty->getPrimitiveSizeInBits(); | |||
3043 | // There is no cost model for constants with a bit size of 0. Return TCC_Free | |||
3044 | // here, so that constant hoisting will ignore this constant. | |||
3045 | if (BitSize == 0) | |||
3046 | return TTI::TCC_Free; | |||
3047 | ||||
3048 | unsigned ImmIdx = ~0U; | |||
3049 | switch (Opcode) { | |||
3050 | default: | |||
3051 | return TTI::TCC_Free; | |||
3052 | case Instruction::GetElementPtr: | |||
3053 | // Always hoist the base address of a GetElementPtr. This prevents the | |||
3054 | // creation of new constants for every base constant that gets constant | |||
3055 | // folded with the offset. | |||
3056 | if (Idx == 0) | |||
3057 | return 2 * TTI::TCC_Basic; | |||
3058 | return TTI::TCC_Free; | |||
3059 | case Instruction::Store: | |||
3060 | ImmIdx = 0; | |||
3061 | break; | |||
3062 | case Instruction::ICmp: | |||
3063 | // This is an imperfect hack to prevent constant hoisting of | |||
3064 | // compares that might be trying to check if a 64-bit value fits in | |||
3065 | // 32-bits. The backend can optimize these cases using a right shift by 32. | |||
3066 | // Ideally we would check the compare predicate here. There also other | |||
3067 | // similar immediates the backend can use shifts for. | |||
3068 | if (Idx == 1 && Imm.getBitWidth() == 64) { | |||
3069 | uint64_t ImmVal = Imm.getZExtValue(); | |||
3070 | if (ImmVal == 0x100000000ULL || ImmVal == 0xffffffff) | |||
3071 | return TTI::TCC_Free; | |||
3072 | } | |||
3073 | ImmIdx = 1; | |||
3074 | break; | |||
3075 | case Instruction::And: | |||
3076 | // We support 64-bit ANDs with immediates with 32-bits of leading zeroes | |||
3077 | // by using a 32-bit operation with implicit zero extension. Detect such | |||
3078 | // immediates here as the normal path expects bit 31 to be sign extended. | |||
3079 | if (Idx == 1 && Imm.getBitWidth() == 64 && isUInt<32>(Imm.getZExtValue())) | |||
3080 | return TTI::TCC_Free; | |||
3081 | ImmIdx = 1; | |||
3082 | break; | |||
3083 | case Instruction::Add: | |||
3084 | case Instruction::Sub: | |||
3085 | // For add/sub, we can use the opposite instruction for INT32_MIN. | |||
3086 | if (Idx == 1 && Imm.getBitWidth() == 64 && Imm.getZExtValue() == 0x80000000) | |||
3087 | return TTI::TCC_Free; | |||
3088 | ImmIdx = 1; | |||
3089 | break; | |||
3090 | case Instruction::UDiv: | |||
3091 | case Instruction::SDiv: | |||
3092 | case Instruction::URem: | |||
3093 | case Instruction::SRem: | |||
3094 | // Division by constant is typically expanded later into a different | |||
3095 | // instruction sequence. This completely changes the constants. | |||
3096 | // Report them as "free" to stop ConstantHoist from marking them as opaque. | |||
3097 | return TTI::TCC_Free; | |||
3098 | case Instruction::Mul: | |||
3099 | case Instruction::Or: | |||
3100 | case Instruction::Xor: | |||
3101 | ImmIdx = 1; | |||
3102 | break; | |||
3103 | // Always return TCC_Free for the shift value of a shift instruction. | |||
3104 | case Instruction::Shl: | |||
3105 | case Instruction::LShr: | |||
3106 | case Instruction::AShr: | |||
3107 | if (Idx == 1) | |||
3108 | return TTI::TCC_Free; | |||
3109 | break; | |||
3110 | case Instruction::Trunc: | |||
3111 | case Instruction::ZExt: | |||
3112 | case Instruction::SExt: | |||
3113 | case Instruction::IntToPtr: | |||
3114 | case Instruction::PtrToInt: | |||
3115 | case Instruction::BitCast: | |||
3116 | case Instruction::PHI: | |||
3117 | case Instruction::Call: | |||
3118 | case Instruction::Select: | |||
3119 | case Instruction::Ret: | |||
3120 | case Instruction::Load: | |||
3121 | break; | |||
3122 | } | |||
3123 | ||||
3124 | if (Idx == ImmIdx) { | |||
3125 | int NumConstants = divideCeil(BitSize, 64); | |||
3126 | int Cost = X86TTIImpl::getIntImmCost(Imm, Ty); | |||
3127 | return (Cost <= NumConstants * TTI::TCC_Basic) | |||
3128 | ? static_cast<int>(TTI::TCC_Free) | |||
3129 | : Cost; | |||
3130 | } | |||
3131 | ||||
3132 | return X86TTIImpl::getIntImmCost(Imm, Ty); | |||
3133 | } | |||
3134 | ||||
3135 | int X86TTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, | |||
3136 | const APInt &Imm, Type *Ty) { | |||
3137 | assert(Ty->isIntegerTy())((Ty->isIntegerTy()) ? static_cast<void> (0) : __assert_fail ("Ty->isIntegerTy()", "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp" , 3137, __PRETTY_FUNCTION__)); | |||
3138 | ||||
3139 | unsigned BitSize = Ty->getPrimitiveSizeInBits(); | |||
3140 | // There is no cost model for constants with a bit size of 0. Return TCC_Free | |||
3141 | // here, so that constant hoisting will ignore this constant. | |||
3142 | if (BitSize == 0) | |||
3143 | return TTI::TCC_Free; | |||
3144 | ||||
3145 | switch (IID) { | |||
3146 | default: | |||
3147 | return TTI::TCC_Free; | |||
3148 | case Intrinsic::sadd_with_overflow: | |||
3149 | case Intrinsic::uadd_with_overflow: | |||
3150 | case Intrinsic::ssub_with_overflow: | |||
3151 | case Intrinsic::usub_with_overflow: | |||
3152 | case Intrinsic::smul_with_overflow: | |||
3153 | case Intrinsic::umul_with_overflow: | |||
3154 | if ((Idx == 1) && Imm.getBitWidth() <= 64 && isInt<32>(Imm.getSExtValue())) | |||
3155 | return TTI::TCC_Free; | |||
3156 | break; | |||
3157 | case Intrinsic::experimental_stackmap: | |||
3158 | if ((Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue()))) | |||
3159 | return TTI::TCC_Free; | |||
3160 | break; | |||
3161 | case Intrinsic::experimental_patchpoint_void: | |||
3162 | case Intrinsic::experimental_patchpoint_i64: | |||
3163 | if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue()))) | |||
3164 | return TTI::TCC_Free; | |||
3165 | break; | |||
3166 | } | |||
3167 | return X86TTIImpl::getIntImmCost(Imm, Ty); | |||
3168 | } | |||
3169 | ||||
3170 | unsigned X86TTIImpl::getUserCost(const User *U, | |||
3171 | ArrayRef<const Value *> Operands) { | |||
3172 | if (isa<StoreInst>(U)) { | |||
| ||||
3173 | Value *Ptr = U->getOperand(1); | |||
3174 | // Store instruction with index and scale costs 2 Uops. | |||
3175 | // Check the preceding GEP to identify non-const indices. | |||
3176 | if (auto GEP = dyn_cast<GetElementPtrInst>(Ptr)) { | |||
3177 | if (!all_of(GEP->indices(), [](Value *V) { return isa<Constant>(V); })) | |||
3178 | return TTI::TCC_Basic * 2; | |||
3179 | } | |||
3180 | return TTI::TCC_Basic; | |||
3181 | } | |||
3182 | return BaseT::getUserCost(U, Operands); | |||
3183 | } | |||
3184 | ||||
3185 | // Return an average cost of Gather / Scatter instruction, maybe improved later | |||
3186 | int X86TTIImpl::getGSVectorCost(unsigned Opcode, Type *SrcVTy, Value *Ptr, | |||
3187 | unsigned Alignment, unsigned AddressSpace) { | |||
3188 | ||||
3189 | assert(isa<VectorType>(SrcVTy) && "Unexpected type in getGSVectorCost")((isa<VectorType>(SrcVTy) && "Unexpected type in getGSVectorCost" ) ? static_cast<void> (0) : __assert_fail ("isa<VectorType>(SrcVTy) && \"Unexpected type in getGSVectorCost\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp" , 3189, __PRETTY_FUNCTION__)); | |||
3190 | unsigned VF = SrcVTy->getVectorNumElements(); | |||
3191 | ||||
3192 | // Try to reduce index size from 64 bit (default for GEP) | |||
3193 | // to 32. It is essential for VF 16. If the index can't be reduced to 32, the | |||
3194 | // operation will use 16 x 64 indices which do not fit in a zmm and needs | |||
3195 | // to split. Also check that the base pointer is the same for all lanes, | |||
3196 | // and that there's at most one variable index. | |||
3197 | auto getIndexSizeInBits = [](Value *Ptr, const DataLayout& DL) { | |||
3198 | unsigned IndexSize = DL.getPointerSizeInBits(); | |||
3199 | GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr); | |||
3200 | if (IndexSize < 64 || !GEP) | |||
3201 | return IndexSize; | |||
3202 | ||||
3203 | unsigned NumOfVarIndices = 0; | |||
3204 | Value *Ptrs = GEP->getPointerOperand(); | |||
3205 | if (Ptrs->getType()->isVectorTy() && !getSplatValue(Ptrs)) | |||
3206 | return IndexSize; | |||
3207 | for (unsigned i = 1; i < GEP->getNumOperands(); ++i) { | |||
3208 | if (isa<Constant>(GEP->getOperand(i))) | |||
3209 | continue; | |||
3210 | Type *IndxTy = GEP->getOperand(i)->getType(); | |||
3211 | if (IndxTy->isVectorTy()) | |||
3212 | IndxTy = IndxTy->getVectorElementType(); | |||
3213 | if ((IndxTy->getPrimitiveSizeInBits() == 64 && | |||
3214 | !isa<SExtInst>(GEP->getOperand(i))) || | |||
3215 | ++NumOfVarIndices > 1) | |||
3216 | return IndexSize; // 64 | |||
3217 | } | |||
3218 | return (unsigned)32; | |||
3219 | }; | |||
3220 | ||||
3221 | ||||
3222 | // Trying to reduce IndexSize to 32 bits for vector 16. | |||
3223 | // By default the IndexSize is equal to pointer size. | |||
3224 | unsigned IndexSize = (ST->hasAVX512() && VF >= 16) | |||
3225 | ? getIndexSizeInBits(Ptr, DL) | |||
3226 | : DL.getPointerSizeInBits(); | |||
3227 | ||||
3228 | Type *IndexVTy = VectorType::get(IntegerType::get(SrcVTy->getContext(), | |||
3229 | IndexSize), VF); | |||
3230 | std::pair<int, MVT> IdxsLT = TLI->getTypeLegalizationCost(DL, IndexVTy); | |||
3231 | std::pair<int, MVT> SrcLT = TLI->getTypeLegalizationCost(DL, SrcVTy); | |||
3232 | int SplitFactor = std::max(IdxsLT.first, SrcLT.first); | |||
3233 | if (SplitFactor > 1) { | |||
3234 | // Handle splitting of vector of pointers | |||
3235 | Type *SplitSrcTy = VectorType::get(SrcVTy->getScalarType(), VF / SplitFactor); | |||
3236 | return SplitFactor * getGSVectorCost(Opcode, SplitSrcTy, Ptr, Alignment, | |||
3237 | AddressSpace); | |||
3238 | } | |||
3239 | ||||
3240 | // The gather / scatter cost is given by Intel architects. It is a rough | |||
3241 | // number since we are looking at one instruction in a time. | |||
3242 | const int GSOverhead = (Opcode == Instruction::Load) | |||
3243 | ? ST->getGatherOverhead() | |||
3244 | : ST->getScatterOverhead(); | |||
3245 | return GSOverhead + VF * getMemoryOpCost(Opcode, SrcVTy->getScalarType(), | |||
3246 | MaybeAlign(Alignment), AddressSpace); | |||
3247 | } | |||
3248 | ||||
3249 | /// Return the cost of full scalarization of gather / scatter operation. | |||
3250 | /// | |||
3251 | /// Opcode - Load or Store instruction. | |||
3252 | /// SrcVTy - The type of the data vector that should be gathered or scattered. | |||
3253 | /// VariableMask - The mask is non-constant at compile time. | |||
3254 | /// Alignment - Alignment for one element. | |||
3255 | /// AddressSpace - pointer[s] address space. | |||
3256 | /// | |||
3257 | int X86TTIImpl::getGSScalarCost(unsigned Opcode, Type *SrcVTy, | |||
3258 | bool VariableMask, unsigned Alignment, | |||
3259 | unsigned AddressSpace) { | |||
3260 | unsigned VF = SrcVTy->getVectorNumElements(); | |||
3261 | ||||
3262 | int MaskUnpackCost = 0; | |||
3263 | if (VariableMask) { | |||
3264 | VectorType *MaskTy = | |||
3265 | VectorType::get(Type::getInt1Ty(SrcVTy->getContext()), VF); | |||
3266 | MaskUnpackCost = getScalarizationOverhead(MaskTy, false, true); | |||
3267 | int ScalarCompareCost = | |||
3268 | getCmpSelInstrCost(Instruction::ICmp, Type::getInt1Ty(SrcVTy->getContext()), | |||
3269 | nullptr); | |||
3270 | int BranchCost = getCFInstrCost(Instruction::Br); | |||
3271 | MaskUnpackCost += VF * (BranchCost + ScalarCompareCost); | |||
3272 | } | |||
3273 | ||||
3274 | // The cost of the scalar loads/stores. | |||
3275 | int MemoryOpCost = VF * getMemoryOpCost(Opcode, SrcVTy->getScalarType(), | |||
3276 | MaybeAlign(Alignment), AddressSpace); | |||
3277 | ||||
3278 | int InsertExtractCost = 0; | |||
3279 | if (Opcode == Instruction::Load) | |||
3280 | for (unsigned i = 0; i < VF; ++i) | |||
3281 | // Add the cost of inserting each scalar load into the vector | |||
3282 | InsertExtractCost += | |||
3283 | getVectorInstrCost(Instruction::InsertElement, SrcVTy, i); | |||
3284 | else | |||
3285 | for (unsigned i = 0; i < VF; ++i) | |||
3286 | // Add the cost of extracting each element out of the data vector | |||
3287 | InsertExtractCost += | |||
3288 | getVectorInstrCost(Instruction::ExtractElement, SrcVTy, i); | |||
3289 | ||||
3290 | return MemoryOpCost + MaskUnpackCost + InsertExtractCost; | |||
3291 | } | |||
3292 | ||||
3293 | /// Calculate the cost of Gather / Scatter operation | |||
3294 | int X86TTIImpl::getGatherScatterOpCost(unsigned Opcode, Type *SrcVTy, | |||
3295 | Value *Ptr, bool VariableMask, | |||
3296 | unsigned Alignment) { | |||
3297 | assert(SrcVTy->isVectorTy() && "Unexpected data type for Gather/Scatter")((SrcVTy->isVectorTy() && "Unexpected data type for Gather/Scatter" ) ? static_cast<void> (0) : __assert_fail ("SrcVTy->isVectorTy() && \"Unexpected data type for Gather/Scatter\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp" , 3297, __PRETTY_FUNCTION__)); | |||
3298 | unsigned VF = SrcVTy->getVectorNumElements(); | |||
3299 | PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType()); | |||
3300 | if (!PtrTy && Ptr->getType()->isVectorTy()) | |||
3301 | PtrTy = dyn_cast<PointerType>(Ptr->getType()->getVectorElementType()); | |||
3302 | assert(PtrTy && "Unexpected type for Ptr argument")((PtrTy && "Unexpected type for Ptr argument") ? static_cast <void> (0) : __assert_fail ("PtrTy && \"Unexpected type for Ptr argument\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp" , 3302, __PRETTY_FUNCTION__)); | |||
3303 | unsigned AddressSpace = PtrTy->getAddressSpace(); | |||
3304 | ||||
3305 | bool Scalarize = false; | |||
3306 | if ((Opcode == Instruction::Load && | |||
3307 | !isLegalMaskedGather(SrcVTy, MaybeAlign(Alignment))) || | |||
3308 | (Opcode == Instruction::Store && | |||
3309 | !isLegalMaskedScatter(SrcVTy, MaybeAlign(Alignment)))) | |||
3310 | Scalarize = true; | |||
3311 | // Gather / Scatter for vector 2 is not profitable on KNL / SKX | |||
3312 | // Vector-4 of gather/scatter instruction does not exist on KNL. | |||
3313 | // We can extend it to 8 elements, but zeroing upper bits of | |||
3314 | // the mask vector will add more instructions. Right now we give the scalar | |||
3315 | // cost of vector-4 for KNL. TODO: Check, maybe the gather/scatter instruction | |||
3316 | // is better in the VariableMask case. | |||
3317 | if (ST->hasAVX512() && (VF == 2 || (VF == 4 && !ST->hasVLX()))) | |||
3318 | Scalarize = true; | |||
3319 | ||||
3320 | if (Scalarize) | |||
3321 | return getGSScalarCost(Opcode, SrcVTy, VariableMask, Alignment, | |||
3322 | AddressSpace); | |||
3323 | ||||
3324 | return getGSVectorCost(Opcode, SrcVTy, Ptr, Alignment, AddressSpace); | |||
3325 | } | |||
3326 | ||||
3327 | bool X86TTIImpl::isLSRCostLess(TargetTransformInfo::LSRCost &C1, | |||
3328 | TargetTransformInfo::LSRCost &C2) { | |||
3329 | // X86 specific here are "instruction number 1st priority". | |||
3330 | return std::tie(C1.Insns, C1.NumRegs, C1.AddRecCost, | |||
3331 | C1.NumIVMuls, C1.NumBaseAdds, | |||
3332 | C1.ScaleCost, C1.ImmCost, C1.SetupCost) < | |||
3333 | std::tie(C2.Insns, C2.NumRegs, C2.AddRecCost, | |||
3334 | C2.NumIVMuls, C2.NumBaseAdds, | |||
3335 | C2.ScaleCost, C2.ImmCost, C2.SetupCost); | |||
3336 | } | |||
3337 | ||||
3338 | bool X86TTIImpl::canMacroFuseCmp() { | |||
3339 | return ST->hasMacroFusion() || ST->hasBranchFusion(); | |||
3340 | } | |||
3341 | ||||
3342 | bool X86TTIImpl::isLegalMaskedLoad(Type *DataTy, MaybeAlign Alignment) { | |||
3343 | if (!ST->hasAVX()) | |||
3344 | return false; | |||
3345 | ||||
3346 | // The backend can't handle a single element vector. | |||
3347 | if (isa<VectorType>(DataTy) && DataTy->getVectorNumElements() == 1) | |||
3348 | return false; | |||
3349 | Type *ScalarTy = DataTy->getScalarType(); | |||
3350 | ||||
3351 | if (ScalarTy->isPointerTy()) | |||
3352 | return true; | |||
3353 | ||||
3354 | if (ScalarTy->isFloatTy() || ScalarTy->isDoubleTy()) | |||
3355 | return true; | |||
3356 | ||||
3357 | if (!ScalarTy->isIntegerTy()) | |||
3358 | return false; | |||
3359 | ||||
3360 | unsigned IntWidth = ScalarTy->getIntegerBitWidth(); | |||
3361 | return IntWidth == 32 || IntWidth == 64 || | |||
3362 | ((IntWidth == 8 || IntWidth == 16) && ST->hasBWI()); | |||
3363 | } | |||
3364 | ||||
3365 | bool X86TTIImpl::isLegalMaskedStore(Type *DataType, MaybeAlign Alignment) { | |||
3366 | return isLegalMaskedLoad(DataType, Alignment); | |||
3367 | } | |||
3368 | ||||
3369 | bool X86TTIImpl::isLegalNTLoad(Type *DataType, Align Alignment) { | |||
3370 | unsigned DataSize = DL.getTypeStoreSize(DataType); | |||
3371 | // The only supported nontemporal loads are for aligned vectors of 16 or 32 | |||
3372 | // bytes. Note that 32-byte nontemporal vector loads are supported by AVX2 | |||
3373 | // (the equivalent stores only require AVX). | |||
3374 | if (Alignment >= DataSize && (DataSize == 16 || DataSize == 32)) | |||
3375 | return DataSize == 16 ? ST->hasSSE1() : ST->hasAVX2(); | |||
3376 | ||||
3377 | return false; | |||
3378 | } | |||
3379 | ||||
3380 | bool X86TTIImpl::isLegalNTStore(Type *DataType, Align Alignment) { | |||
3381 | unsigned DataSize = DL.getTypeStoreSize(DataType); | |||
3382 | ||||
3383 | // SSE4A supports nontemporal stores of float and double at arbitrary | |||
3384 | // alignment. | |||
3385 | if (ST->hasSSE4A() && (DataType->isFloatTy() || DataType->isDoubleTy())) | |||
3386 | return true; | |||
3387 | ||||
3388 | // Besides the SSE4A subtarget exception above, only aligned stores are | |||
3389 | // available nontemporaly on any other subtarget. And only stores with a size | |||
3390 | // of 4..32 bytes (powers of 2, only) are permitted. | |||
3391 | if (Alignment < DataSize || DataSize < 4 || DataSize > 32 || | |||
3392 | !isPowerOf2_32(DataSize)) | |||
3393 | return false; | |||
3394 | ||||
3395 | // 32-byte vector nontemporal stores are supported by AVX (the equivalent | |||
3396 | // loads require AVX2). | |||
3397 | if (DataSize == 32) | |||
3398 | return ST->hasAVX(); | |||
3399 | else if (DataSize == 16) | |||
3400 | return ST->hasSSE1(); | |||
3401 | return true; | |||
3402 | } | |||
3403 | ||||
3404 | bool X86TTIImpl::isLegalMaskedExpandLoad(Type *DataTy) { | |||
3405 | if (!isa<VectorType>(DataTy)) | |||
3406 | return false; | |||
3407 | ||||
3408 | if (!ST->hasAVX512()) | |||
3409 | return false; | |||
3410 | ||||
3411 | // The backend can't handle a single element vector. | |||
3412 | if (DataTy->getVectorNumElements() == 1) | |||
3413 | return false; | |||
3414 | ||||
3415 | Type *ScalarTy = DataTy->getVectorElementType(); | |||
3416 | ||||
3417 | if (ScalarTy->isFloatTy() || ScalarTy->isDoubleTy()) | |||
3418 | return true; | |||
3419 | ||||
3420 | if (!ScalarTy->isIntegerTy()) | |||
3421 | return false; | |||
3422 | ||||
3423 | unsigned IntWidth = ScalarTy->getIntegerBitWidth(); | |||
3424 | return IntWidth == 32 || IntWidth == 64 || | |||
3425 | ((IntWidth == 8 || IntWidth == 16) && ST->hasVBMI2()); | |||
3426 | } | |||
3427 | ||||
3428 | bool X86TTIImpl::isLegalMaskedCompressStore(Type *DataTy) { | |||
3429 | return isLegalMaskedExpandLoad(DataTy); | |||
3430 | } | |||
3431 | ||||
3432 | bool X86TTIImpl::isLegalMaskedGather(Type *DataTy, MaybeAlign Alignment) { | |||
3433 | // Some CPUs have better gather performance than others. | |||
3434 | // TODO: Remove the explicit ST->hasAVX512()?, That would mean we would only | |||
3435 | // enable gather with a -march. | |||
3436 | if (!(ST->hasAVX512() || (ST->hasFastGather() && ST->hasAVX2()))) | |||
3437 | return false; | |||
3438 | ||||
3439 | // This function is called now in two cases: from the Loop Vectorizer | |||
3440 | // and from the Scalarizer. | |||
3441 | // When the Loop Vectorizer asks about legality of the feature, | |||
3442 | // the vectorization factor is not calculated yet. The Loop Vectorizer | |||
3443 | // sends a scalar type and the decision is based on the width of the | |||
3444 | // scalar element. | |||
3445 | // Later on, the cost model will estimate usage this intrinsic based on | |||
3446 | // the vector type. | |||
3447 | // The Scalarizer asks again about legality. It sends a vector type. | |||
3448 | // In this case we can reject non-power-of-2 vectors. | |||
3449 | // We also reject single element vectors as the type legalizer can't | |||
3450 | // scalarize it. | |||
3451 | if (isa<VectorType>(DataTy)) { | |||
3452 | unsigned NumElts = DataTy->getVectorNumElements(); | |||
3453 | if (NumElts == 1 || !isPowerOf2_32(NumElts)) | |||
3454 | return false; | |||
3455 | } | |||
3456 | Type *ScalarTy = DataTy->getScalarType(); | |||
3457 | if (ScalarTy->isPointerTy()) | |||
3458 | return true; | |||
3459 | ||||
3460 | if (ScalarTy->isFloatTy() || ScalarTy->isDoubleTy()) | |||
3461 | return true; | |||
3462 | ||||
3463 | if (!ScalarTy->isIntegerTy()) | |||
3464 | return false; | |||
3465 | ||||
3466 | unsigned IntWidth = ScalarTy->getIntegerBitWidth(); | |||
3467 | return IntWidth == 32 || IntWidth == 64; | |||
3468 | } | |||
3469 | ||||
3470 | bool X86TTIImpl::isLegalMaskedScatter(Type *DataType, MaybeAlign Alignment) { | |||
3471 | // AVX2 doesn't support scatter | |||
3472 | if (!ST->hasAVX512()) | |||
3473 | return false; | |||
3474 | return isLegalMaskedGather(DataType, Alignment); | |||
3475 | } | |||
3476 | ||||
3477 | bool X86TTIImpl::hasDivRemOp(Type *DataType, bool IsSigned) { | |||
3478 | EVT VT = TLI->getValueType(DL, DataType); | |||
3479 | return TLI->isOperationLegal(IsSigned ? ISD::SDIVREM : ISD::UDIVREM, VT); | |||
3480 | } | |||
3481 | ||||
3482 | bool X86TTIImpl::isFCmpOrdCheaperThanFCmpZero(Type *Ty) { | |||
3483 | return false; | |||
3484 | } | |||
3485 | ||||
3486 | bool X86TTIImpl::areInlineCompatible(const Function *Caller, | |||
3487 | const Function *Callee) const { | |||
3488 | const TargetMachine &TM = getTLI()->getTargetMachine(); | |||
3489 | ||||
3490 | // Work this as a subsetting of subtarget features. | |||
3491 | const FeatureBitset &CallerBits = | |||
3492 | TM.getSubtargetImpl(*Caller)->getFeatureBits(); | |||
3493 | const FeatureBitset &CalleeBits = | |||
3494 | TM.getSubtargetImpl(*Callee)->getFeatureBits(); | |||
3495 | ||||
3496 | FeatureBitset RealCallerBits = CallerBits & ~InlineFeatureIgnoreList; | |||
3497 | FeatureBitset RealCalleeBits = CalleeBits & ~InlineFeatureIgnoreList; | |||
3498 | return (RealCallerBits & RealCalleeBits) == RealCalleeBits; | |||
3499 | } | |||
3500 | ||||
3501 | bool X86TTIImpl::areFunctionArgsABICompatible( | |||
3502 | const Function *Caller, const Function *Callee, | |||
3503 | SmallPtrSetImpl<Argument *> &Args) const { | |||
3504 | if (!BaseT::areFunctionArgsABICompatible(Caller, Callee, Args)) | |||
3505 | return false; | |||
3506 | ||||
3507 | // If we get here, we know the target features match. If one function | |||
3508 | // considers 512-bit vectors legal and the other does not, consider them | |||
3509 | // incompatible. | |||
3510 | // FIXME Look at the arguments and only consider 512 bit or larger vectors? | |||
3511 | const TargetMachine &TM = getTLI()->getTargetMachine(); | |||
3512 | ||||
3513 | return TM.getSubtarget<X86Subtarget>(*Caller).useAVX512Regs() == | |||
3514 | TM.getSubtarget<X86Subtarget>(*Callee).useAVX512Regs(); | |||
3515 | } | |||
3516 | ||||
3517 | X86TTIImpl::TTI::MemCmpExpansionOptions | |||
3518 | X86TTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const { | |||
3519 | TTI::MemCmpExpansionOptions Options; | |||
3520 | Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize); | |||
3521 | Options.NumLoadsPerBlock = 2; | |||
3522 | if (IsZeroCmp) { | |||
3523 | // Only enable vector loads for equality comparison. Right now the vector | |||
3524 | // version is not as fast for three way compare (see #33329). | |||
3525 | const unsigned PreferredWidth = ST->getPreferVectorWidth(); | |||
3526 | if (PreferredWidth >= 512 && ST->hasAVX512()) Options.LoadSizes.push_back(64); | |||
3527 | if (PreferredWidth >= 256 && ST->hasAVX()) Options.LoadSizes.push_back(32); | |||
3528 | if (PreferredWidth >= 128 && ST->hasSSE2()) Options.LoadSizes.push_back(16); | |||
3529 | // All GPR and vector loads can be unaligned. | |||
3530 | Options.AllowOverlappingLoads = true; | |||
3531 | } | |||
3532 | if (ST->is64Bit()) { | |||
3533 | Options.LoadSizes.push_back(8); | |||
3534 | } | |||
3535 | Options.LoadSizes.push_back(4); | |||
3536 | Options.LoadSizes.push_back(2); | |||
3537 | Options.LoadSizes.push_back(1); | |||
3538 | return Options; | |||
3539 | } | |||
3540 | ||||
3541 | bool X86TTIImpl::enableInterleavedAccessVectorization() { | |||
3542 | // TODO: We expect this to be beneficial regardless of arch, | |||
3543 | // but there are currently some unexplained performance artifacts on Atom. | |||
3544 | // As a temporary solution, disable on Atom. | |||
3545 | return !(ST->isAtom()); | |||
3546 | } | |||
3547 | ||||
3548 | // Get estimation for interleaved load/store operations for AVX2. | |||
3549 | // \p Factor is the interleaved-access factor (stride) - number of | |||
3550 | // (interleaved) elements in the group. | |||
3551 | // \p Indices contains the indices for a strided load: when the | |||
3552 | // interleaved load has gaps they indicate which elements are used. | |||
3553 | // If Indices is empty (or if the number of indices is equal to the size | |||
3554 | // of the interleaved-access as given in \p Factor) the access has no gaps. | |||
3555 | // | |||
3556 | // As opposed to AVX-512, AVX2 does not have generic shuffles that allow | |||
3557 | // computing the cost using a generic formula as a function of generic | |||
3558 | // shuffles. We therefore use a lookup table instead, filled according to | |||
3559 | // the instruction sequences that codegen currently generates. | |||
3560 | int X86TTIImpl::getInterleavedMemoryOpCostAVX2(unsigned Opcode, Type *VecTy, | |||
3561 | unsigned Factor, | |||
3562 | ArrayRef<unsigned> Indices, | |||
3563 | unsigned Alignment, | |||
3564 | unsigned AddressSpace, | |||
3565 | bool UseMaskForCond, | |||
3566 | bool UseMaskForGaps) { | |||
3567 | ||||
3568 | if (UseMaskForCond || UseMaskForGaps) | |||
3569 | return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, | |||
3570 | Alignment, AddressSpace, | |||
3571 | UseMaskForCond, UseMaskForGaps); | |||
3572 | ||||
3573 | // We currently Support only fully-interleaved groups, with no gaps. | |||
3574 | // TODO: Support also strided loads (interleaved-groups with gaps). | |||
3575 | if (Indices.size() && Indices.size() != Factor) | |||
3576 | return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, | |||
3577 | Alignment, AddressSpace); | |||
3578 | ||||
3579 | // VecTy for interleave memop is <VF*Factor x Elt>. | |||
3580 | // So, for VF=4, Interleave Factor = 3, Element type = i32 we have | |||
3581 | // VecTy = <12 x i32>. | |||
3582 | MVT LegalVT = getTLI()->getTypeLegalizationCost(DL, VecTy).second; | |||
3583 | ||||
3584 | // This function can be called with VecTy=<6xi128>, Factor=3, in which case | |||
3585 | // the VF=2, while v2i128 is an unsupported MVT vector type | |||
3586 | // (see MachineValueType.h::getVectorVT()). | |||
3587 | if (!LegalVT.isVector()) | |||
3588 | return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, | |||
3589 | Alignment, AddressSpace); | |||
3590 | ||||
3591 | unsigned VF = VecTy->getVectorNumElements() / Factor; | |||
3592 | Type *ScalarTy = VecTy->getVectorElementType(); | |||
3593 | ||||
3594 | // Calculate the number of memory operations (NumOfMemOps), required | |||
3595 | // for load/store the VecTy. | |||
3596 | unsigned VecTySize = DL.getTypeStoreSize(VecTy); | |||
3597 | unsigned LegalVTSize = LegalVT.getStoreSize(); | |||
3598 | unsigned NumOfMemOps = (VecTySize + LegalVTSize - 1) / LegalVTSize; | |||
3599 | ||||
3600 | // Get the cost of one memory operation. | |||
3601 | Type *SingleMemOpTy = VectorType::get(VecTy->getVectorElementType(), | |||
3602 | LegalVT.getVectorNumElements()); | |||
3603 | unsigned MemOpCost = getMemoryOpCost(Opcode, SingleMemOpTy, | |||
3604 | MaybeAlign(Alignment), AddressSpace); | |||
3605 | ||||
3606 | VectorType *VT = VectorType::get(ScalarTy, VF); | |||
3607 | EVT ETy = TLI->getValueType(DL, VT); | |||
3608 | if (!ETy.isSimple()) | |||
3609 | return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, | |||
3610 | Alignment, AddressSpace); | |||
3611 | ||||
3612 | // TODO: Complete for other data-types and strides. | |||
3613 | // Each combination of Stride, ElementTy and VF results in a different | |||
3614 | // sequence; The cost tables are therefore accessed with: | |||
3615 | // Factor (stride) and VectorType=VFxElemType. | |||
3616 | // The Cost accounts only for the shuffle sequence; | |||
3617 | // The cost of the loads/stores is accounted for separately. | |||
3618 | // | |||
3619 | static const CostTblEntry AVX2InterleavedLoadTbl[] = { | |||
3620 | { 2, MVT::v4i64, 6 }, //(load 8i64 and) deinterleave into 2 x 4i64 | |||
3621 | { 2, MVT::v4f64, 6 }, //(load 8f64 and) deinterleave into 2 x 4f64 | |||
3622 | ||||
3623 | { 3, MVT::v2i8, 10 }, //(load 6i8 and) deinterleave into 3 x 2i8 | |||
3624 | { 3, MVT::v4i8, 4 }, //(load 12i8 and) deinterleave into 3 x 4i8 | |||
3625 | { 3, MVT::v8i8, 9 }, //(load 24i8 and) deinterleave into 3 x 8i8 | |||
3626 | { 3, MVT::v16i8, 11}, //(load 48i8 and) deinterleave into 3 x 16i8 | |||
3627 | { 3, MVT::v32i8, 13}, //(load 96i8 and) deinterleave into 3 x 32i8 | |||
3628 | { 3, MVT::v8f32, 17 }, //(load 24f32 and)deinterleave into 3 x 8f32 | |||
3629 | ||||
3630 | { 4, MVT::v2i8, 12 }, //(load 8i8 and) deinterleave into 4 x 2i8 | |||
3631 | { 4, MVT::v4i8, 4 }, //(load 16i8 and) deinterleave into 4 x 4i8 | |||
3632 | { 4, MVT::v8i8, 20 }, //(load 32i8 and) deinterleave into 4 x 8i8 | |||
3633 | { 4, MVT::v16i8, 39 }, //(load 64i8 and) deinterleave into 4 x 16i8 | |||
3634 | { 4, MVT::v32i8, 80 }, //(load 128i8 and) deinterleave into 4 x 32i8 | |||
3635 | ||||
3636 | { 8, MVT::v8f32, 40 } //(load 64f32 and)deinterleave into 8 x 8f32 | |||
3637 | }; | |||
3638 | ||||
3639 | static const CostTblEntry AVX2InterleavedStoreTbl[] = { | |||
3640 | { 2, MVT::v4i64, 6 }, //interleave into 2 x 4i64 into 8i64 (and store) | |||
3641 | { 2, MVT::v4f64, 6 }, //interleave into 2 x 4f64 into 8f64 (and store) | |||
3642 | ||||
3643 | { 3, MVT::v2i8, 7 }, //interleave 3 x 2i8 into 6i8 (and store) | |||
3644 | { 3, MVT::v4i8, 8 }, //interleave 3 x 4i8 into 12i8 (and store) | |||
3645 | { 3, MVT::v8i8, 11 }, //interleave 3 x 8i8 into 24i8 (and store) | |||
3646 | { 3, MVT::v16i8, 11 }, //interleave 3 x 16i8 into 48i8 (and store) | |||
3647 | { 3, MVT::v32i8, 13 }, //interleave 3 x 32i8 into 96i8 (and store) | |||
3648 | ||||
3649 | { 4, MVT::v2i8, 12 }, //interleave 4 x 2i8 into 8i8 (and store) | |||
3650 | { 4, MVT::v4i8, 9 }, //interleave 4 x 4i8 into 16i8 (and store) | |||
3651 | { 4, MVT::v8i8, 10 }, //interleave 4 x 8i8 into 32i8 (and store) | |||
3652 | { 4, MVT::v16i8, 10 }, //interleave 4 x 16i8 into 64i8 (and store) | |||
3653 | { 4, MVT::v32i8, 12 } //interleave 4 x 32i8 into 128i8 (and store) | |||
3654 | }; | |||
3655 | ||||
3656 | if (Opcode == Instruction::Load) { | |||
3657 | if (const auto *Entry = | |||
3658 | CostTableLookup(AVX2InterleavedLoadTbl, Factor, ETy.getSimpleVT())) | |||
3659 | return NumOfMemOps * MemOpCost + Entry->Cost; | |||
3660 | } else { | |||
3661 | assert(Opcode == Instruction::Store &&((Opcode == Instruction::Store && "Expected Store Instruction at this point" ) ? static_cast<void> (0) : __assert_fail ("Opcode == Instruction::Store && \"Expected Store Instruction at this point\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp" , 3662, __PRETTY_FUNCTION__)) | |||
3662 | "Expected Store Instruction at this point")((Opcode == Instruction::Store && "Expected Store Instruction at this point" ) ? static_cast<void> (0) : __assert_fail ("Opcode == Instruction::Store && \"Expected Store Instruction at this point\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp" , 3662, __PRETTY_FUNCTION__)); | |||
3663 | if (const auto *Entry = | |||
3664 | CostTableLookup(AVX2InterleavedStoreTbl, Factor, ETy.getSimpleVT())) | |||
3665 | return NumOfMemOps * MemOpCost + Entry->Cost; | |||
3666 | } | |||
3667 | ||||
3668 | return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, | |||
3669 | Alignment, AddressSpace); | |||
3670 | } | |||
3671 | ||||
3672 | // Get estimation for interleaved load/store operations and strided load. | |||
3673 | // \p Indices contains indices for strided load. | |||
3674 | // \p Factor - the factor of interleaving. | |||
3675 | // AVX-512 provides 3-src shuffles that significantly reduces the cost. | |||
3676 | int X86TTIImpl::getInterleavedMemoryOpCostAVX512(unsigned Opcode, Type *VecTy, | |||
3677 | unsigned Factor, | |||
3678 | ArrayRef<unsigned> Indices, | |||
3679 | unsigned Alignment, | |||
3680 | unsigned AddressSpace, | |||
3681 | bool UseMaskForCond, | |||
3682 | bool UseMaskForGaps) { | |||
3683 | ||||
3684 | if (UseMaskForCond || UseMaskForGaps) | |||
3685 | return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, | |||
3686 | Alignment, AddressSpace, | |||
3687 | UseMaskForCond, UseMaskForGaps); | |||
3688 | ||||
3689 | // VecTy for interleave memop is <VF*Factor x Elt>. | |||
3690 | // So, for VF=4, Interleave Factor = 3, Element type = i32 we have | |||
3691 | // VecTy = <12 x i32>. | |||
3692 | ||||
3693 | // Calculate the number of memory operations (NumOfMemOps), required | |||
3694 | // for load/store the VecTy. | |||
3695 | MVT LegalVT = getTLI()->getTypeLegalizationCost(DL, VecTy).second; | |||
3696 | unsigned VecTySize = DL.getTypeStoreSize(VecTy); | |||
3697 | unsigned LegalVTSize = LegalVT.getStoreSize(); | |||
3698 | unsigned NumOfMemOps = (VecTySize + LegalVTSize - 1) / LegalVTSize; | |||
3699 | ||||
3700 | // Get the cost of one memory operation. | |||
3701 | Type *SingleMemOpTy = VectorType::get(VecTy->getVectorElementType(), | |||
3702 | LegalVT.getVectorNumElements()); | |||
3703 | unsigned MemOpCost = getMemoryOpCost(Opcode, SingleMemOpTy, | |||
3704 | MaybeAlign(Alignment), AddressSpace); | |||
3705 | ||||
3706 | unsigned VF = VecTy->getVectorNumElements() / Factor; | |||
3707 | MVT VT = MVT::getVectorVT(MVT::getVT(VecTy->getScalarType()), VF); | |||
3708 | ||||
3709 | if (Opcode == Instruction::Load) { | |||
3710 | // The tables (AVX512InterleavedLoadTbl and AVX512InterleavedStoreTbl) | |||
3711 | // contain the cost of the optimized shuffle sequence that the | |||
3712 | // X86InterleavedAccess pass will generate. | |||
3713 | // The cost of loads and stores are computed separately from the table. | |||
3714 | ||||
3715 | // X86InterleavedAccess support only the following interleaved-access group. | |||
3716 | static const CostTblEntry AVX512InterleavedLoadTbl[] = { | |||
3717 | {3, MVT::v16i8, 12}, //(load 48i8 and) deinterleave into 3 x 16i8 | |||
3718 | {3, MVT::v32i8, 14}, //(load 96i8 and) deinterleave into 3 x 32i8 | |||
3719 | {3, MVT::v64i8, 22}, //(load 96i8 and) deinterleave into 3 x 32i8 | |||
3720 | }; | |||
3721 | ||||
3722 | if (const auto *Entry = | |||
3723 | CostTableLookup(AVX512InterleavedLoadTbl, Factor, VT)) | |||
3724 | return NumOfMemOps * MemOpCost + Entry->Cost; | |||
3725 | //If an entry does not exist, fallback to the default implementation. | |||
3726 | ||||
3727 | // Kind of shuffle depends on number of loaded values. | |||
3728 | // If we load the entire data in one register, we can use a 1-src shuffle. | |||
3729 | // Otherwise, we'll merge 2 sources in each operation. | |||
3730 | TTI::ShuffleKind ShuffleKind = | |||
3731 | (NumOfMemOps > 1) ? TTI::SK_PermuteTwoSrc : TTI::SK_PermuteSingleSrc; | |||
3732 | ||||
3733 | unsigned ShuffleCost = | |||
3734 | getShuffleCost(ShuffleKind, SingleMemOpTy, 0, nullptr); | |||
3735 | ||||
3736 | unsigned NumOfLoadsInInterleaveGrp = | |||
3737 | Indices.size() ? Indices.size() : Factor; | |||
3738 | Type *ResultTy = VectorType::get(VecTy->getVectorElementType(), | |||
3739 | VecTy->getVectorNumElements() / Factor); | |||
3740 | unsigned NumOfResults = | |||
3741 | getTLI()->getTypeLegalizationCost(DL, ResultTy).first * | |||
3742 | NumOfLoadsInInterleaveGrp; | |||
3743 | ||||
3744 | // About a half of the loads may be folded in shuffles when we have only | |||
3745 | // one result. If we have more than one result, we do not fold loads at all. | |||
3746 | unsigned NumOfUnfoldedLoads = | |||
3747 | NumOfResults > 1 ? NumOfMemOps : NumOfMemOps / 2; | |||
3748 | ||||
3749 | // Get a number of shuffle operations per result. | |||
3750 | unsigned NumOfShufflesPerResult = | |||
3751 | std::max((unsigned)1, (unsigned)(NumOfMemOps - 1)); | |||
3752 | ||||
3753 | // The SK_MergeTwoSrc shuffle clobbers one of src operands. | |||
3754 | // When we have more than one destination, we need additional instructions | |||
3755 | // to keep sources. | |||
3756 | unsigned NumOfMoves = 0; | |||
3757 | if (NumOfResults > 1 && ShuffleKind == TTI::SK_PermuteTwoSrc) | |||
3758 | NumOfMoves = NumOfResults * NumOfShufflesPerResult / 2; | |||
3759 | ||||
3760 | int Cost = NumOfResults * NumOfShufflesPerResult * ShuffleCost + | |||
3761 | NumOfUnfoldedLoads * MemOpCost + NumOfMoves; | |||
3762 | ||||
3763 | return Cost; | |||
3764 | } | |||
3765 | ||||
3766 | // Store. | |||
3767 | assert(Opcode == Instruction::Store &&((Opcode == Instruction::Store && "Expected Store Instruction at this point" ) ? static_cast<void> (0) : __assert_fail ("Opcode == Instruction::Store && \"Expected Store Instruction at this point\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp" , 3768, __PRETTY_FUNCTION__)) | |||
3768 | "Expected Store Instruction at this point")((Opcode == Instruction::Store && "Expected Store Instruction at this point" ) ? static_cast<void> (0) : __assert_fail ("Opcode == Instruction::Store && \"Expected Store Instruction at this point\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp" , 3768, __PRETTY_FUNCTION__)); | |||
3769 | // X86InterleavedAccess support only the following interleaved-access group. | |||
3770 | static const CostTblEntry AVX512InterleavedStoreTbl[] = { | |||
3771 | {3, MVT::v16i8, 12}, // interleave 3 x 16i8 into 48i8 (and store) | |||
3772 | {3, MVT::v32i8, 14}, // interleave 3 x 32i8 into 96i8 (and store) | |||
3773 | {3, MVT::v64i8, 26}, // interleave 3 x 64i8 into 96i8 (and store) | |||
3774 | ||||
3775 | {4, MVT::v8i8, 10}, // interleave 4 x 8i8 into 32i8 (and store) | |||
3776 | {4, MVT::v16i8, 11}, // interleave 4 x 16i8 into 64i8 (and store) | |||
3777 | {4, MVT::v32i8, 14}, // interleave 4 x 32i8 into 128i8 (and store) | |||
3778 | {4, MVT::v64i8, 24} // interleave 4 x 32i8 into 256i8 (and store) | |||
3779 | }; | |||
3780 | ||||
3781 | if (const auto *Entry = | |||
3782 | CostTableLookup(AVX512InterleavedStoreTbl, Factor, VT)) | |||
3783 | return NumOfMemOps * MemOpCost + Entry->Cost; | |||
3784 | //If an entry does not exist, fallback to the default implementation. | |||
3785 | ||||
3786 | // There is no strided stores meanwhile. And store can't be folded in | |||
3787 | // shuffle. | |||
3788 | unsigned NumOfSources = Factor; // The number of values to be merged. | |||
3789 | unsigned ShuffleCost = | |||
3790 | getShuffleCost(TTI::SK_PermuteTwoSrc, SingleMemOpTy, 0, nullptr); | |||
3791 | unsigned NumOfShufflesPerStore = NumOfSources - 1; | |||
3792 | ||||
3793 | // The SK_MergeTwoSrc shuffle clobbers one of src operands. | |||
3794 | // We need additional instructions to keep sources. | |||
3795 | unsigned NumOfMoves = NumOfMemOps * NumOfShufflesPerStore / 2; | |||
3796 | int Cost = NumOfMemOps * (MemOpCost + NumOfShufflesPerStore * ShuffleCost) + | |||
3797 | NumOfMoves; | |||
3798 | return Cost; | |||
3799 | } | |||
3800 | ||||
3801 | int X86TTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, | |||
3802 | unsigned Factor, | |||
3803 | ArrayRef<unsigned> Indices, | |||
3804 | unsigned Alignment, | |||
3805 | unsigned AddressSpace, | |||
3806 | bool UseMaskForCond, | |||
3807 | bool UseMaskForGaps) { | |||
3808 | auto isSupportedOnAVX512 = [](Type *VecTy, bool HasBW) { | |||
3809 | Type *EltTy = VecTy->getVectorElementType(); | |||
3810 | if (EltTy->isFloatTy() || EltTy->isDoubleTy() || EltTy->isIntegerTy(64) || | |||
3811 | EltTy->isIntegerTy(32) || EltTy->isPointerTy()) | |||
3812 | return true; | |||
3813 | if (EltTy->isIntegerTy(16) || EltTy->isIntegerTy(8)) | |||
3814 | return HasBW; | |||
3815 | return false; | |||
3816 | }; | |||
3817 | if (ST->hasAVX512() && isSupportedOnAVX512(VecTy, ST->hasBWI())) | |||
3818 | return getInterleavedMemoryOpCostAVX512(Opcode, VecTy, Factor, Indices, | |||
3819 | Alignment, AddressSpace, | |||
3820 | UseMaskForCond, UseMaskForGaps); | |||
3821 | if (ST->hasAVX2()) | |||
3822 | return getInterleavedMemoryOpCostAVX2(Opcode, VecTy, Factor, Indices, | |||
3823 | Alignment, AddressSpace, | |||
3824 | UseMaskForCond, UseMaskForGaps); | |||
3825 | ||||
3826 | return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, | |||
3827 | Alignment, AddressSpace, | |||
3828 | UseMaskForCond, UseMaskForGaps); | |||
3829 | } |
1 | //===- TargetTransformInfoImpl.h --------------------------------*- C++ -*-===// | ||||||||||||||
2 | // | ||||||||||||||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||||||||||||||
4 | // See https://llvm.org/LICENSE.txt for license information. | ||||||||||||||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||||||||||||||
6 | // | ||||||||||||||
7 | //===----------------------------------------------------------------------===// | ||||||||||||||
8 | /// \file | ||||||||||||||
9 | /// This file provides helpers for the implementation of | ||||||||||||||
10 | /// a TargetTransformInfo-conforming class. | ||||||||||||||
11 | /// | ||||||||||||||
12 | //===----------------------------------------------------------------------===// | ||||||||||||||
13 | |||||||||||||||
14 | #ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H | ||||||||||||||
15 | #define LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H | ||||||||||||||
16 | |||||||||||||||
17 | #include "llvm/Analysis/ScalarEvolutionExpressions.h" | ||||||||||||||
18 | #include "llvm/Analysis/TargetTransformInfo.h" | ||||||||||||||
19 | #include "llvm/Analysis/VectorUtils.h" | ||||||||||||||
20 | #include "llvm/IR/CallSite.h" | ||||||||||||||
21 | #include "llvm/IR/DataLayout.h" | ||||||||||||||
22 | #include "llvm/IR/Function.h" | ||||||||||||||
23 | #include "llvm/IR/GetElementPtrTypeIterator.h" | ||||||||||||||
24 | #include "llvm/IR/Operator.h" | ||||||||||||||
25 | #include "llvm/IR/Type.h" | ||||||||||||||
26 | |||||||||||||||
27 | namespace llvm { | ||||||||||||||
28 | |||||||||||||||
29 | /// Base class for use as a mix-in that aids implementing | ||||||||||||||
30 | /// a TargetTransformInfo-compatible class. | ||||||||||||||
31 | class TargetTransformInfoImplBase { | ||||||||||||||
32 | protected: | ||||||||||||||
33 | typedef TargetTransformInfo TTI; | ||||||||||||||
34 | |||||||||||||||
35 | const DataLayout &DL; | ||||||||||||||
36 | |||||||||||||||
37 | explicit TargetTransformInfoImplBase(const DataLayout &DL) : DL(DL) {} | ||||||||||||||
38 | |||||||||||||||
39 | public: | ||||||||||||||
40 | // Provide value semantics. MSVC requires that we spell all of these out. | ||||||||||||||
41 | TargetTransformInfoImplBase(const TargetTransformInfoImplBase &Arg) | ||||||||||||||
42 | : DL(Arg.DL) {} | ||||||||||||||
43 | TargetTransformInfoImplBase(TargetTransformInfoImplBase &&Arg) : DL(Arg.DL) {} | ||||||||||||||
44 | |||||||||||||||
45 | const DataLayout &getDataLayout() const { return DL; } | ||||||||||||||
46 | |||||||||||||||
47 | unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) { | ||||||||||||||
48 | switch (Opcode) { | ||||||||||||||
49 | default: | ||||||||||||||
50 | // By default, just classify everything as 'basic'. | ||||||||||||||
51 | return TTI::TCC_Basic; | ||||||||||||||
52 | |||||||||||||||
53 | case Instruction::GetElementPtr: | ||||||||||||||
54 | llvm_unreachable("Use getGEPCost for GEP operations!")::llvm::llvm_unreachable_internal("Use getGEPCost for GEP operations!" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h" , 54); | ||||||||||||||
55 | |||||||||||||||
56 | case Instruction::BitCast: | ||||||||||||||
57 | assert(OpTy && "Cast instructions must provide the operand type")((OpTy && "Cast instructions must provide the operand type" ) ? static_cast<void> (0) : __assert_fail ("OpTy && \"Cast instructions must provide the operand type\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h" , 57, __PRETTY_FUNCTION__)); | ||||||||||||||
58 | if (Ty == OpTy || (Ty->isPointerTy() && OpTy->isPointerTy())) | ||||||||||||||
59 | // Identity and pointer-to-pointer casts are free. | ||||||||||||||
60 | return TTI::TCC_Free; | ||||||||||||||
61 | |||||||||||||||
62 | // Otherwise, the default basic cost is used. | ||||||||||||||
63 | return TTI::TCC_Basic; | ||||||||||||||
64 | |||||||||||||||
65 | case Instruction::FDiv: | ||||||||||||||
66 | case Instruction::FRem: | ||||||||||||||
67 | case Instruction::SDiv: | ||||||||||||||
68 | case Instruction::SRem: | ||||||||||||||
69 | case Instruction::UDiv: | ||||||||||||||
70 | case Instruction::URem: | ||||||||||||||
71 | return TTI::TCC_Expensive; | ||||||||||||||
72 | |||||||||||||||
73 | case Instruction::IntToPtr: { | ||||||||||||||
74 | // An inttoptr cast is free so long as the input is a legal integer type | ||||||||||||||
75 | // which doesn't contain values outside the range of a pointer. | ||||||||||||||
76 | unsigned OpSize = OpTy->getScalarSizeInBits(); | ||||||||||||||
| |||||||||||||||
77 | if (DL.isLegalInteger(OpSize) && | ||||||||||||||
78 | OpSize <= DL.getPointerTypeSizeInBits(Ty)) | ||||||||||||||
79 | return TTI::TCC_Free; | ||||||||||||||
80 | |||||||||||||||
81 | // Otherwise it's not a no-op. | ||||||||||||||
82 | return TTI::TCC_Basic; | ||||||||||||||
83 | } | ||||||||||||||
84 | case Instruction::PtrToInt: { | ||||||||||||||
85 | // A ptrtoint cast is free so long as the result is large enough to store | ||||||||||||||
86 | // the pointer, and a legal integer type. | ||||||||||||||
87 | unsigned DestSize = Ty->getScalarSizeInBits(); | ||||||||||||||
88 | if (DL.isLegalInteger(DestSize) && | ||||||||||||||
89 | DestSize >= DL.getPointerTypeSizeInBits(OpTy)) | ||||||||||||||
90 | return TTI::TCC_Free; | ||||||||||||||
91 | |||||||||||||||
92 | // Otherwise it's not a no-op. | ||||||||||||||
93 | return TTI::TCC_Basic; | ||||||||||||||
94 | } | ||||||||||||||
95 | case Instruction::Trunc: | ||||||||||||||
96 | // trunc to a native type is free (assuming the target has compare and | ||||||||||||||
97 | // shift-right of the same width). | ||||||||||||||
98 | if (DL.isLegalInteger(DL.getTypeSizeInBits(Ty))) | ||||||||||||||
99 | return TTI::TCC_Free; | ||||||||||||||
100 | |||||||||||||||
101 | return TTI::TCC_Basic; | ||||||||||||||
102 | } | ||||||||||||||
103 | } | ||||||||||||||
104 | |||||||||||||||
105 | int getGEPCost(Type *PointeeType, const Value *Ptr, | ||||||||||||||
106 | ArrayRef<const Value *> Operands) { | ||||||||||||||
107 | // In the basic model, we just assume that all-constant GEPs will be folded | ||||||||||||||
108 | // into their uses via addressing modes. | ||||||||||||||
109 | for (unsigned Idx = 0, Size = Operands.size(); Idx != Size; ++Idx) | ||||||||||||||
110 | if (!isa<Constant>(Operands[Idx])) | ||||||||||||||
111 | return TTI::TCC_Basic; | ||||||||||||||
112 | |||||||||||||||
113 | return TTI::TCC_Free; | ||||||||||||||
114 | } | ||||||||||||||
115 | |||||||||||||||
116 | unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, | ||||||||||||||
117 | unsigned &JTSize, | ||||||||||||||
118 | ProfileSummaryInfo *PSI, | ||||||||||||||
119 | BlockFrequencyInfo *BFI) { | ||||||||||||||
120 | (void)PSI; | ||||||||||||||
121 | (void)BFI; | ||||||||||||||
122 | JTSize = 0; | ||||||||||||||
123 | return SI.getNumCases(); | ||||||||||||||
124 | } | ||||||||||||||
125 | |||||||||||||||
126 | int getExtCost(const Instruction *I, const Value *Src) { | ||||||||||||||
127 | return TTI::TCC_Basic; | ||||||||||||||
128 | } | ||||||||||||||
129 | |||||||||||||||
130 | unsigned getCallCost(FunctionType *FTy, int NumArgs, const User *U) { | ||||||||||||||
131 | assert(FTy && "FunctionType must be provided to this routine.")((FTy && "FunctionType must be provided to this routine." ) ? static_cast<void> (0) : __assert_fail ("FTy && \"FunctionType must be provided to this routine.\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h" , 131, __PRETTY_FUNCTION__)); | ||||||||||||||
132 | |||||||||||||||
133 | // The target-independent implementation just measures the size of the | ||||||||||||||
134 | // function by approximating that each argument will take on average one | ||||||||||||||
135 | // instruction to prepare. | ||||||||||||||
136 | |||||||||||||||
137 | if (NumArgs < 0) | ||||||||||||||
138 | // Set the argument number to the number of explicit arguments in the | ||||||||||||||
139 | // function. | ||||||||||||||
140 | NumArgs = FTy->getNumParams(); | ||||||||||||||
141 | |||||||||||||||
142 | return TTI::TCC_Basic * (NumArgs + 1); | ||||||||||||||
143 | } | ||||||||||||||
144 | |||||||||||||||
145 | unsigned getInliningThresholdMultiplier() { return 1; } | ||||||||||||||
146 | |||||||||||||||
147 | int getInlinerVectorBonusPercent() { return 150; } | ||||||||||||||
148 | |||||||||||||||
149 | unsigned getMemcpyCost(const Instruction *I) { | ||||||||||||||
150 | return TTI::TCC_Expensive; | ||||||||||||||
151 | } | ||||||||||||||
152 | |||||||||||||||
153 | bool hasBranchDivergence() { return false; } | ||||||||||||||
154 | |||||||||||||||
155 | bool useGPUDivergenceAnalysis() { return false; } | ||||||||||||||
156 | |||||||||||||||
157 | bool isSourceOfDivergence(const Value *V) { return false; } | ||||||||||||||
158 | |||||||||||||||
159 | bool isAlwaysUniform(const Value *V) { return false; } | ||||||||||||||
160 | |||||||||||||||
161 | unsigned getFlatAddressSpace () { | ||||||||||||||
162 | return -1; | ||||||||||||||
163 | } | ||||||||||||||
164 | |||||||||||||||
165 | bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes, | ||||||||||||||
166 | Intrinsic::ID IID) const { | ||||||||||||||
167 | return false; | ||||||||||||||
168 | } | ||||||||||||||
169 | |||||||||||||||
170 | bool rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, | ||||||||||||||
171 | Value *OldV, Value *NewV) const { | ||||||||||||||
172 | return false; | ||||||||||||||
173 | } | ||||||||||||||
174 | |||||||||||||||
175 | bool isLoweredToCall(const Function *F) { | ||||||||||||||
176 | assert(F && "A concrete function must be provided to this routine.")((F && "A concrete function must be provided to this routine." ) ? static_cast<void> (0) : __assert_fail ("F && \"A concrete function must be provided to this routine.\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h" , 176, __PRETTY_FUNCTION__)); | ||||||||||||||
177 | |||||||||||||||
178 | // FIXME: These should almost certainly not be handled here, and instead | ||||||||||||||
179 | // handled with the help of TLI or the target itself. This was largely | ||||||||||||||
180 | // ported from existing analysis heuristics here so that such refactorings | ||||||||||||||
181 | // can take place in the future. | ||||||||||||||
182 | |||||||||||||||
183 | if (F->isIntrinsic()) | ||||||||||||||
184 | return false; | ||||||||||||||
185 | |||||||||||||||
186 | if (F->hasLocalLinkage() || !F->hasName()) | ||||||||||||||
187 | return true; | ||||||||||||||
188 | |||||||||||||||
189 | StringRef Name = F->getName(); | ||||||||||||||
190 | |||||||||||||||
191 | // These will all likely lower to a single selection DAG node. | ||||||||||||||
192 | if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" || | ||||||||||||||
193 | Name == "fabs" || Name == "fabsf" || Name == "fabsl" || Name == "sin" || | ||||||||||||||
194 | Name == "fmin" || Name == "fminf" || Name == "fminl" || | ||||||||||||||
195 | Name == "fmax" || Name == "fmaxf" || Name == "fmaxl" || | ||||||||||||||
196 | Name == "sinf" || Name == "sinl" || Name == "cos" || Name == "cosf" || | ||||||||||||||
197 | Name == "cosl" || Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl") | ||||||||||||||
198 | return false; | ||||||||||||||
199 | |||||||||||||||
200 | // These are all likely to be optimized into something smaller. | ||||||||||||||
201 | if (Name == "pow" || Name == "powf" || Name == "powl" || Name == "exp2" || | ||||||||||||||
202 | Name == "exp2l" || Name == "exp2f" || Name == "floor" || | ||||||||||||||
203 | Name == "floorf" || Name == "ceil" || Name == "round" || | ||||||||||||||
204 | Name == "ffs" || Name == "ffsl" || Name == "abs" || Name == "labs" || | ||||||||||||||
205 | Name == "llabs") | ||||||||||||||
206 | return false; | ||||||||||||||
207 | |||||||||||||||
208 | return true; | ||||||||||||||
209 | } | ||||||||||||||
210 | |||||||||||||||
211 | bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, | ||||||||||||||
212 | AssumptionCache &AC, | ||||||||||||||
213 | TargetLibraryInfo *LibInfo, | ||||||||||||||
214 | HardwareLoopInfo &HWLoopInfo) { | ||||||||||||||
215 | return false; | ||||||||||||||
216 | } | ||||||||||||||
217 | |||||||||||||||
218 | bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE, | ||||||||||||||
219 | AssumptionCache &AC, TargetLibraryInfo *TLI, | ||||||||||||||
220 | DominatorTree *DT, | ||||||||||||||
221 | const LoopAccessInfo *LAI) const { | ||||||||||||||
222 | return false; | ||||||||||||||
223 | } | ||||||||||||||
224 | |||||||||||||||
225 | void getUnrollingPreferences(Loop *, ScalarEvolution &, | ||||||||||||||
226 | TTI::UnrollingPreferences &) {} | ||||||||||||||
227 | |||||||||||||||
228 | bool isLegalAddImmediate(int64_t Imm) { return false; } | ||||||||||||||
229 | |||||||||||||||
230 | bool isLegalICmpImmediate(int64_t Imm) { return false; } | ||||||||||||||
231 | |||||||||||||||
232 | bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, | ||||||||||||||
233 | bool HasBaseReg, int64_t Scale, | ||||||||||||||
234 | unsigned AddrSpace, Instruction *I = nullptr) { | ||||||||||||||
235 | // Guess that only reg and reg+reg addressing is allowed. This heuristic is | ||||||||||||||
236 | // taken from the implementation of LSR. | ||||||||||||||
237 | return !BaseGV && BaseOffset == 0 && (Scale == 0 || Scale == 1); | ||||||||||||||
238 | } | ||||||||||||||
239 | |||||||||||||||
240 | bool isLSRCostLess(TTI::LSRCost &C1, TTI::LSRCost &C2) { | ||||||||||||||
241 | return std::tie(C1.NumRegs, C1.AddRecCost, C1.NumIVMuls, C1.NumBaseAdds, | ||||||||||||||
242 | C1.ScaleCost, C1.ImmCost, C1.SetupCost) < | ||||||||||||||
243 | std::tie(C2.NumRegs, C2.AddRecCost, C2.NumIVMuls, C2.NumBaseAdds, | ||||||||||||||
244 | C2.ScaleCost, C2.ImmCost, C2.SetupCost); | ||||||||||||||
245 | } | ||||||||||||||
246 | |||||||||||||||
247 | bool canMacroFuseCmp() { return false; } | ||||||||||||||
248 | |||||||||||||||
249 | bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, | ||||||||||||||
250 | DominatorTree *DT, AssumptionCache *AC, | ||||||||||||||
251 | TargetLibraryInfo *LibInfo) { | ||||||||||||||
252 | return false; | ||||||||||||||
253 | } | ||||||||||||||
254 | |||||||||||||||
255 | bool shouldFavorPostInc() const { return false; } | ||||||||||||||
256 | |||||||||||||||
257 | bool shouldFavorBackedgeIndex(const Loop *L) const { return false; } | ||||||||||||||
258 | |||||||||||||||
259 | bool isLegalMaskedStore(Type *DataType, MaybeAlign Alignment) { return false; } | ||||||||||||||
260 | |||||||||||||||
261 | bool isLegalMaskedLoad(Type *DataType, MaybeAlign Alignment) { return false; } | ||||||||||||||
262 | |||||||||||||||
263 | bool isLegalNTStore(Type *DataType, Align Alignment) { | ||||||||||||||
264 | // By default, assume nontemporal memory stores are available for stores | ||||||||||||||
265 | // that are aligned and have a size that is a power of 2. | ||||||||||||||
266 | unsigned DataSize = DL.getTypeStoreSize(DataType); | ||||||||||||||
267 | return Alignment >= DataSize && isPowerOf2_32(DataSize); | ||||||||||||||
268 | } | ||||||||||||||
269 | |||||||||||||||
270 | bool isLegalNTLoad(Type *DataType, Align Alignment) { | ||||||||||||||
271 | // By default, assume nontemporal memory loads are available for loads that | ||||||||||||||
272 | // are aligned and have a size that is a power of 2. | ||||||||||||||
273 | unsigned DataSize = DL.getTypeStoreSize(DataType); | ||||||||||||||
274 | return Alignment >= DataSize && isPowerOf2_32(DataSize); | ||||||||||||||
275 | } | ||||||||||||||
276 | |||||||||||||||
277 | bool isLegalMaskedScatter(Type *DataType, MaybeAlign Alignment) { | ||||||||||||||
278 | return false; | ||||||||||||||
279 | } | ||||||||||||||
280 | |||||||||||||||
281 | bool isLegalMaskedGather(Type *DataType, MaybeAlign Alignment) { | ||||||||||||||
282 | return false; | ||||||||||||||
283 | } | ||||||||||||||
284 | |||||||||||||||
285 | bool isLegalMaskedCompressStore(Type *DataType) { return false; } | ||||||||||||||
286 | |||||||||||||||
287 | bool isLegalMaskedExpandLoad(Type *DataType) { return false; } | ||||||||||||||
288 | |||||||||||||||
289 | bool hasDivRemOp(Type *DataType, bool IsSigned) { return false; } | ||||||||||||||
290 | |||||||||||||||
291 | bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) { return false; } | ||||||||||||||
292 | |||||||||||||||
293 | bool prefersVectorizedAddressing() { return true; } | ||||||||||||||
294 | |||||||||||||||
295 | int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, | ||||||||||||||
296 | bool HasBaseReg, int64_t Scale, unsigned AddrSpace) { | ||||||||||||||
297 | // Guess that all legal addressing mode are free. | ||||||||||||||
298 | if (isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, | ||||||||||||||
299 | Scale, AddrSpace)) | ||||||||||||||
300 | return 0; | ||||||||||||||
301 | return -1; | ||||||||||||||
302 | } | ||||||||||||||
303 | |||||||||||||||
304 | bool LSRWithInstrQueries() { return false; } | ||||||||||||||
305 | |||||||||||||||
306 | bool isTruncateFree(Type *Ty1, Type *Ty2) { return false; } | ||||||||||||||
307 | |||||||||||||||
308 | bool isProfitableToHoist(Instruction *I) { return true; } | ||||||||||||||
309 | |||||||||||||||
310 | bool useAA() { return false; } | ||||||||||||||
311 | |||||||||||||||
312 | bool isTypeLegal(Type *Ty) { return false; } | ||||||||||||||
313 | |||||||||||||||
314 | bool shouldBuildLookupTables() { return true; } | ||||||||||||||
315 | bool shouldBuildLookupTablesForConstant(Constant *C) { return true; } | ||||||||||||||
316 | |||||||||||||||
317 | bool useColdCCForColdCall(Function &F) { return false; } | ||||||||||||||
318 | |||||||||||||||
319 | unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) { | ||||||||||||||
320 | return 0; | ||||||||||||||
321 | } | ||||||||||||||
322 | |||||||||||||||
323 | unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args, | ||||||||||||||
324 | unsigned VF) { return 0; } | ||||||||||||||
325 | |||||||||||||||
326 | bool supportsEfficientVectorElementLoadStore() { return false; } | ||||||||||||||
327 | |||||||||||||||
328 | bool enableAggressiveInterleaving(bool LoopHasReductions) { return false; } | ||||||||||||||
329 | |||||||||||||||
330 | TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, | ||||||||||||||
331 | bool IsZeroCmp) const { | ||||||||||||||
332 | return {}; | ||||||||||||||
333 | } | ||||||||||||||
334 | |||||||||||||||
335 | bool enableInterleavedAccessVectorization() { return false; } | ||||||||||||||
336 | |||||||||||||||
337 | bool enableMaskedInterleavedAccessVectorization() { return false; } | ||||||||||||||
338 | |||||||||||||||
339 | bool isFPVectorizationPotentiallyUnsafe() { return false; } | ||||||||||||||
340 | |||||||||||||||
341 | bool allowsMisalignedMemoryAccesses(LLVMContext &Context, | ||||||||||||||
342 | unsigned BitWidth, | ||||||||||||||
343 | unsigned AddressSpace, | ||||||||||||||
344 | unsigned Alignment, | ||||||||||||||
345 | bool *Fast) { return false; } | ||||||||||||||
346 | |||||||||||||||
347 | TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) { | ||||||||||||||
348 | return TTI::PSK_Software; | ||||||||||||||
349 | } | ||||||||||||||
350 | |||||||||||||||
351 | bool haveFastSqrt(Type *Ty) { return false; } | ||||||||||||||
352 | |||||||||||||||
353 | bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) { return true; } | ||||||||||||||
354 | |||||||||||||||
355 | unsigned getFPOpCost(Type *Ty) { return TargetTransformInfo::TCC_Basic; } | ||||||||||||||
356 | |||||||||||||||
357 | int getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, const APInt &Imm, | ||||||||||||||
358 | Type *Ty) { | ||||||||||||||
359 | return 0; | ||||||||||||||
360 | } | ||||||||||||||
361 | |||||||||||||||
362 | unsigned getIntImmCost(const APInt &Imm, Type *Ty) { return TTI::TCC_Basic; } | ||||||||||||||
363 | |||||||||||||||
364 | unsigned getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, | ||||||||||||||
365 | Type *Ty) { | ||||||||||||||
366 | return TTI::TCC_Free; | ||||||||||||||
367 | } | ||||||||||||||
368 | |||||||||||||||
369 | unsigned getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, | ||||||||||||||
370 | const APInt &Imm, Type *Ty) { | ||||||||||||||
371 | return TTI::TCC_Free; | ||||||||||||||
372 | } | ||||||||||||||
373 | |||||||||||||||
374 | unsigned getNumberOfRegisters(unsigned ClassID) const { return 8; } | ||||||||||||||
375 | |||||||||||||||
376 | unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const { | ||||||||||||||
377 | return Vector ? 1 : 0; | ||||||||||||||
378 | }; | ||||||||||||||
379 | |||||||||||||||
380 | const char* getRegisterClassName(unsigned ClassID) const { | ||||||||||||||
381 | switch (ClassID) { | ||||||||||||||
382 | default: | ||||||||||||||
383 | return "Generic::Unknown Register Class"; | ||||||||||||||
384 | case 0: return "Generic::ScalarRC"; | ||||||||||||||
385 | case 1: return "Generic::VectorRC"; | ||||||||||||||
386 | } | ||||||||||||||
387 | } | ||||||||||||||
388 | |||||||||||||||
389 | unsigned getRegisterBitWidth(bool Vector) const { return 32; } | ||||||||||||||
390 | |||||||||||||||
391 | unsigned getMinVectorRegisterBitWidth() { return 128; } | ||||||||||||||
392 | |||||||||||||||
393 | bool shouldMaximizeVectorBandwidth(bool OptSize) const { return false; } | ||||||||||||||
394 | |||||||||||||||
395 | unsigned getMinimumVF(unsigned ElemWidth) const { return 0; } | ||||||||||||||
396 | |||||||||||||||
397 | bool | ||||||||||||||
398 | shouldConsiderAddressTypePromotion(const Instruction &I, | ||||||||||||||
399 | bool &AllowPromotionWithoutCommonHeader) { | ||||||||||||||
400 | AllowPromotionWithoutCommonHeader = false; | ||||||||||||||
401 | return false; | ||||||||||||||
402 | } | ||||||||||||||
403 | |||||||||||||||
404 | unsigned getCacheLineSize() const { return 0; } | ||||||||||||||
405 | |||||||||||||||
406 | llvm::Optional<unsigned> getCacheSize(TargetTransformInfo::CacheLevel Level) const { | ||||||||||||||
407 | switch (Level) { | ||||||||||||||
408 | case TargetTransformInfo::CacheLevel::L1D: | ||||||||||||||
409 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | ||||||||||||||
410 | case TargetTransformInfo::CacheLevel::L2D: | ||||||||||||||
411 | return llvm::Optional<unsigned>(); | ||||||||||||||
412 | } | ||||||||||||||
413 | llvm_unreachable("Unknown TargetTransformInfo::CacheLevel")::llvm::llvm_unreachable_internal("Unknown TargetTransformInfo::CacheLevel" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h" , 413); | ||||||||||||||
414 | } | ||||||||||||||
415 | |||||||||||||||
416 | llvm::Optional<unsigned> getCacheAssociativity( | ||||||||||||||
417 | TargetTransformInfo::CacheLevel Level) const { | ||||||||||||||
418 | switch (Level) { | ||||||||||||||
419 | case TargetTransformInfo::CacheLevel::L1D: | ||||||||||||||
420 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | ||||||||||||||
421 | case TargetTransformInfo::CacheLevel::L2D: | ||||||||||||||
422 | return llvm::Optional<unsigned>(); | ||||||||||||||
423 | } | ||||||||||||||
424 | |||||||||||||||
425 | llvm_unreachable("Unknown TargetTransformInfo::CacheLevel")::llvm::llvm_unreachable_internal("Unknown TargetTransformInfo::CacheLevel" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h" , 425); | ||||||||||||||
426 | } | ||||||||||||||
427 | |||||||||||||||
428 | unsigned getPrefetchDistance() const { return 0; } | ||||||||||||||
429 | unsigned getMinPrefetchStride() const { return 1; } | ||||||||||||||
430 | unsigned getMaxPrefetchIterationsAhead() const { return UINT_MAX(2147483647 *2U +1U); } | ||||||||||||||
431 | |||||||||||||||
432 | unsigned getMaxInterleaveFactor(unsigned VF) { return 1; } | ||||||||||||||
433 | |||||||||||||||
434 | unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, | ||||||||||||||
435 | TTI::OperandValueKind Opd1Info, | ||||||||||||||
436 | TTI::OperandValueKind Opd2Info, | ||||||||||||||
437 | TTI::OperandValueProperties Opd1PropInfo, | ||||||||||||||
438 | TTI::OperandValueProperties Opd2PropInfo, | ||||||||||||||
439 | ArrayRef<const Value *> Args, | ||||||||||||||
440 | const Instruction *CxtI = nullptr) { | ||||||||||||||
441 | return 1; | ||||||||||||||
442 | } | ||||||||||||||
443 | |||||||||||||||
444 | unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Ty, int Index, | ||||||||||||||
445 | Type *SubTp) { | ||||||||||||||
446 | return 1; | ||||||||||||||
447 | } | ||||||||||||||
448 | |||||||||||||||
449 | unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, | ||||||||||||||
450 | const Instruction *I) { return 1; } | ||||||||||||||
451 | |||||||||||||||
452 | unsigned getExtractWithExtendCost(unsigned Opcode, Type *Dst, | ||||||||||||||
453 | VectorType *VecTy, unsigned Index) { | ||||||||||||||
454 | return 1; | ||||||||||||||
455 | } | ||||||||||||||
456 | |||||||||||||||
457 | unsigned getCFInstrCost(unsigned Opcode) { return 1; } | ||||||||||||||
458 | |||||||||||||||
459 | unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, | ||||||||||||||
460 | const Instruction *I) { | ||||||||||||||
461 | return 1; | ||||||||||||||
462 | } | ||||||||||||||
463 | |||||||||||||||
464 | unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) { | ||||||||||||||
465 | return 1; | ||||||||||||||
466 | } | ||||||||||||||
467 | |||||||||||||||
468 | unsigned getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, | ||||||||||||||
469 | unsigned AddressSpace, const Instruction *I) { | ||||||||||||||
470 | return 1; | ||||||||||||||
471 | } | ||||||||||||||
472 | |||||||||||||||
473 | unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, | ||||||||||||||
474 | unsigned AddressSpace) { | ||||||||||||||
475 | return 1; | ||||||||||||||
476 | } | ||||||||||||||
477 | |||||||||||||||
478 | unsigned getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr, | ||||||||||||||
479 | bool VariableMask, | ||||||||||||||
480 | unsigned Alignment) { | ||||||||||||||
481 | return 1; | ||||||||||||||
482 | } | ||||||||||||||
483 | |||||||||||||||
484 | unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, | ||||||||||||||
485 | unsigned Factor, | ||||||||||||||
486 | ArrayRef<unsigned> Indices, | ||||||||||||||
487 | unsigned Alignment, unsigned AddressSpace, | ||||||||||||||
488 | bool UseMaskForCond = false, | ||||||||||||||
489 | bool UseMaskForGaps = false) { | ||||||||||||||
490 | return 1; | ||||||||||||||
491 | } | ||||||||||||||
492 | |||||||||||||||
493 | unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, | ||||||||||||||
494 | ArrayRef<Type *> Tys, FastMathFlags FMF, | ||||||||||||||
495 | unsigned ScalarizationCostPassed) { | ||||||||||||||
496 | return 1; | ||||||||||||||
497 | } | ||||||||||||||
498 | unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, | ||||||||||||||
499 | ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) { | ||||||||||||||
500 | return 1; | ||||||||||||||
501 | } | ||||||||||||||
502 | |||||||||||||||
503 | unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) { | ||||||||||||||
504 | return 1; | ||||||||||||||
505 | } | ||||||||||||||
506 | |||||||||||||||
507 | unsigned getNumberOfParts(Type *Tp) { return 0; } | ||||||||||||||
508 | |||||||||||||||
509 | unsigned getAddressComputationCost(Type *Tp, ScalarEvolution *, | ||||||||||||||
510 | const SCEV *) { | ||||||||||||||
511 | return 0; | ||||||||||||||
512 | } | ||||||||||||||
513 | |||||||||||||||
514 | unsigned getArithmeticReductionCost(unsigned, Type *, bool) { return 1; } | ||||||||||||||
515 | |||||||||||||||
516 | unsigned getMinMaxReductionCost(Type *, Type *, bool, bool) { return 1; } | ||||||||||||||
517 | |||||||||||||||
518 | unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) { return 0; } | ||||||||||||||
519 | |||||||||||||||
520 | bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) { | ||||||||||||||
521 | return false; | ||||||||||||||
522 | } | ||||||||||||||
523 | |||||||||||||||
524 | unsigned getAtomicMemIntrinsicMaxElementSize() const { | ||||||||||||||
525 | // Note for overrides: You must ensure for all element unordered-atomic | ||||||||||||||
526 | // memory intrinsics that all power-of-2 element sizes up to, and | ||||||||||||||
527 | // including, the return value of this method have a corresponding | ||||||||||||||
528 | // runtime lib call. These runtime lib call definitions can be found | ||||||||||||||
529 | // in RuntimeLibcalls.h | ||||||||||||||
530 | return 0; | ||||||||||||||
531 | } | ||||||||||||||
532 | |||||||||||||||
533 | Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, | ||||||||||||||
534 | Type *ExpectedType) { | ||||||||||||||
535 | return nullptr; | ||||||||||||||
536 | } | ||||||||||||||
537 | |||||||||||||||
538 | Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, | ||||||||||||||
539 | unsigned SrcAlign, unsigned DestAlign) const { | ||||||||||||||
540 | return Type::getInt8Ty(Context); | ||||||||||||||
541 | } | ||||||||||||||
542 | |||||||||||||||
543 | void getMemcpyLoopResidualLoweringType(SmallVectorImpl<Type *> &OpsOut, | ||||||||||||||
544 | LLVMContext &Context, | ||||||||||||||
545 | unsigned RemainingBytes, | ||||||||||||||
546 | unsigned SrcAlign, | ||||||||||||||
547 | unsigned DestAlign) const { | ||||||||||||||
548 | for (unsigned i = 0; i != RemainingBytes; ++i) | ||||||||||||||
549 | OpsOut.push_back(Type::getInt8Ty(Context)); | ||||||||||||||
550 | } | ||||||||||||||
551 | |||||||||||||||
552 | bool areInlineCompatible(const Function *Caller, | ||||||||||||||
553 | const Function *Callee) const { | ||||||||||||||
554 | return (Caller->getFnAttribute("target-cpu") == | ||||||||||||||
555 | Callee->getFnAttribute("target-cpu")) && | ||||||||||||||
556 | (Caller->getFnAttribute("target-features") == | ||||||||||||||
557 | Callee->getFnAttribute("target-features")); | ||||||||||||||
558 | } | ||||||||||||||
559 | |||||||||||||||
560 | bool areFunctionArgsABICompatible(const Function *Caller, const Function *Callee, | ||||||||||||||
561 | SmallPtrSetImpl<Argument *> &Args) const { | ||||||||||||||
562 | return (Caller->getFnAttribute("target-cpu") == | ||||||||||||||
563 | Callee->getFnAttribute("target-cpu")) && | ||||||||||||||
564 | (Caller->getFnAttribute("target-features") == | ||||||||||||||
565 | Callee->getFnAttribute("target-features")); | ||||||||||||||
566 | } | ||||||||||||||
567 | |||||||||||||||
568 | bool isIndexedLoadLegal(TTI::MemIndexedMode Mode, Type *Ty, | ||||||||||||||
569 | const DataLayout &DL) const { | ||||||||||||||
570 | return false; | ||||||||||||||
571 | } | ||||||||||||||
572 | |||||||||||||||
573 | bool isIndexedStoreLegal(TTI::MemIndexedMode Mode, Type *Ty, | ||||||||||||||
574 | const DataLayout &DL) const { | ||||||||||||||
575 | return false; | ||||||||||||||
576 | } | ||||||||||||||
577 | |||||||||||||||
578 | unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const { return 128; } | ||||||||||||||
579 | |||||||||||||||
580 | bool isLegalToVectorizeLoad(LoadInst *LI) const { return true; } | ||||||||||||||
581 | |||||||||||||||
582 | bool isLegalToVectorizeStore(StoreInst *SI) const { return true; } | ||||||||||||||
583 | |||||||||||||||
584 | bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, | ||||||||||||||
585 | unsigned Alignment, | ||||||||||||||
586 | unsigned AddrSpace) const { | ||||||||||||||
587 | return true; | ||||||||||||||
588 | } | ||||||||||||||
589 | |||||||||||||||
590 | bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, | ||||||||||||||
591 | unsigned Alignment, | ||||||||||||||
592 | unsigned AddrSpace) const { | ||||||||||||||
593 | return true; | ||||||||||||||
594 | } | ||||||||||||||
595 | |||||||||||||||
596 | unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, | ||||||||||||||
597 | unsigned ChainSizeInBytes, | ||||||||||||||
598 | VectorType *VecTy) const { | ||||||||||||||
599 | return VF; | ||||||||||||||
600 | } | ||||||||||||||
601 | |||||||||||||||
602 | unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, | ||||||||||||||
603 | unsigned ChainSizeInBytes, | ||||||||||||||
604 | VectorType *VecTy) const { | ||||||||||||||
605 | return VF; | ||||||||||||||
606 | } | ||||||||||||||
607 | |||||||||||||||
608 | bool useReductionIntrinsic(unsigned Opcode, Type *Ty, | ||||||||||||||
609 | TTI::ReductionFlags Flags) const { | ||||||||||||||
610 | return false; | ||||||||||||||
611 | } | ||||||||||||||
612 | |||||||||||||||
613 | bool shouldExpandReduction(const IntrinsicInst *II) const { | ||||||||||||||
614 | return true; | ||||||||||||||
615 | } | ||||||||||||||
616 | |||||||||||||||
617 | unsigned getGISelRematGlobalCost() const { | ||||||||||||||
618 | return 1; | ||||||||||||||
619 | } | ||||||||||||||
620 | |||||||||||||||
621 | protected: | ||||||||||||||
622 | // Obtain the minimum required size to hold the value (without the sign) | ||||||||||||||
623 | // In case of a vector it returns the min required size for one element. | ||||||||||||||
624 | unsigned minRequiredElementSize(const Value* Val, bool &isSigned) { | ||||||||||||||
625 | if (isa<ConstantDataVector>(Val) || isa<ConstantVector>(Val)) { | ||||||||||||||
626 | const auto* VectorValue = cast<Constant>(Val); | ||||||||||||||
627 | |||||||||||||||
628 | // In case of a vector need to pick the max between the min | ||||||||||||||
629 | // required size for each element | ||||||||||||||
630 | auto *VT = cast<VectorType>(Val->getType()); | ||||||||||||||
631 | |||||||||||||||
632 | // Assume unsigned elements | ||||||||||||||
633 | isSigned = false; | ||||||||||||||
634 | |||||||||||||||
635 | // The max required size is the total vector width divided by num | ||||||||||||||
636 | // of elements in the vector | ||||||||||||||
637 | unsigned MaxRequiredSize = VT->getBitWidth() / VT->getNumElements(); | ||||||||||||||
638 | |||||||||||||||
639 | unsigned MinRequiredSize = 0; | ||||||||||||||
640 | for(unsigned i = 0, e = VT->getNumElements(); i < e; ++i) { | ||||||||||||||
641 | if (auto* IntElement = | ||||||||||||||
642 | dyn_cast<ConstantInt>(VectorValue->getAggregateElement(i))) { | ||||||||||||||
643 | bool signedElement = IntElement->getValue().isNegative(); | ||||||||||||||
644 | // Get the element min required size. | ||||||||||||||
645 | unsigned ElementMinRequiredSize = | ||||||||||||||
646 | IntElement->getValue().getMinSignedBits() - 1; | ||||||||||||||
647 | // In case one element is signed then all the vector is signed. | ||||||||||||||
648 | isSigned |= signedElement; | ||||||||||||||
649 | // Save the max required bit size between all the elements. | ||||||||||||||
650 | MinRequiredSize = std::max(MinRequiredSize, ElementMinRequiredSize); | ||||||||||||||
651 | } | ||||||||||||||
652 | else { | ||||||||||||||
653 | // not an int constant element | ||||||||||||||
654 | return MaxRequiredSize; | ||||||||||||||
655 | } | ||||||||||||||
656 | } | ||||||||||||||
657 | return MinRequiredSize; | ||||||||||||||
658 | } | ||||||||||||||
659 | |||||||||||||||
660 | if (const auto* CI = dyn_cast<ConstantInt>(Val)) { | ||||||||||||||
661 | isSigned = CI->getValue().isNegative(); | ||||||||||||||
662 | return CI->getValue().getMinSignedBits() - 1; | ||||||||||||||
663 | } | ||||||||||||||
664 | |||||||||||||||
665 | if (const auto* Cast = dyn_cast<SExtInst>(Val)) { | ||||||||||||||
666 | isSigned = true; | ||||||||||||||
667 | return Cast->getSrcTy()->getScalarSizeInBits() - 1; | ||||||||||||||
668 | } | ||||||||||||||
669 | |||||||||||||||
670 | if (const auto* Cast = dyn_cast<ZExtInst>(Val)) { | ||||||||||||||
671 | isSigned = false; | ||||||||||||||
672 | return Cast->getSrcTy()->getScalarSizeInBits(); | ||||||||||||||
673 | } | ||||||||||||||
674 | |||||||||||||||
675 | isSigned = false; | ||||||||||||||
676 | return Val->getType()->getScalarSizeInBits(); | ||||||||||||||
677 | } | ||||||||||||||
678 | |||||||||||||||
679 | bool isStridedAccess(const SCEV *Ptr) { | ||||||||||||||
680 | return Ptr && isa<SCEVAddRecExpr>(Ptr); | ||||||||||||||
681 | } | ||||||||||||||
682 | |||||||||||||||
683 | const SCEVConstant *getConstantStrideStep(ScalarEvolution *SE, | ||||||||||||||
684 | const SCEV *Ptr) { | ||||||||||||||
685 | if (!isStridedAccess(Ptr)) | ||||||||||||||
686 | return nullptr; | ||||||||||||||
687 | const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ptr); | ||||||||||||||
688 | return dyn_cast<SCEVConstant>(AddRec->getStepRecurrence(*SE)); | ||||||||||||||
689 | } | ||||||||||||||
690 | |||||||||||||||
691 | bool isConstantStridedAccessLessThan(ScalarEvolution *SE, const SCEV *Ptr, | ||||||||||||||
692 | int64_t MergeDistance) { | ||||||||||||||
693 | const SCEVConstant *Step = getConstantStrideStep(SE, Ptr); | ||||||||||||||
694 | if (!Step) | ||||||||||||||
695 | return false; | ||||||||||||||
696 | APInt StrideVal = Step->getAPInt(); | ||||||||||||||
697 | if (StrideVal.getBitWidth() > 64) | ||||||||||||||
698 | return false; | ||||||||||||||
699 | // FIXME: Need to take absolute value for negative stride case. | ||||||||||||||
700 | return StrideVal.getSExtValue() < MergeDistance; | ||||||||||||||
701 | } | ||||||||||||||
702 | }; | ||||||||||||||
703 | |||||||||||||||
704 | /// CRTP base class for use as a mix-in that aids implementing | ||||||||||||||
705 | /// a TargetTransformInfo-compatible class. | ||||||||||||||
706 | template <typename T> | ||||||||||||||
707 | class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase { | ||||||||||||||
708 | private: | ||||||||||||||
709 | typedef TargetTransformInfoImplBase BaseT; | ||||||||||||||
710 | |||||||||||||||
711 | protected: | ||||||||||||||
712 | explicit TargetTransformInfoImplCRTPBase(const DataLayout &DL) : BaseT(DL) {} | ||||||||||||||
713 | |||||||||||||||
714 | public: | ||||||||||||||
715 | using BaseT::getCallCost; | ||||||||||||||
716 | |||||||||||||||
717 | unsigned getCallCost(const Function *F, int NumArgs, const User *U) { | ||||||||||||||
718 | assert(F && "A concrete function must be provided to this routine.")((F && "A concrete function must be provided to this routine." ) ? static_cast<void> (0) : __assert_fail ("F && \"A concrete function must be provided to this routine.\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h" , 718, __PRETTY_FUNCTION__)); | ||||||||||||||
719 | |||||||||||||||
720 | if (NumArgs < 0) | ||||||||||||||
721 | // Set the argument number to the number of explicit arguments in the | ||||||||||||||
722 | // function. | ||||||||||||||
723 | NumArgs = F->arg_size(); | ||||||||||||||
724 | |||||||||||||||
725 | if (Intrinsic::ID IID = F->getIntrinsicID()) { | ||||||||||||||
726 | FunctionType *FTy = F->getFunctionType(); | ||||||||||||||
727 | SmallVector<Type *, 8> ParamTys(FTy->param_begin(), FTy->param_end()); | ||||||||||||||
728 | return static_cast<T *>(this) | ||||||||||||||
729 | ->getIntrinsicCost(IID, FTy->getReturnType(), ParamTys, U); | ||||||||||||||
730 | } | ||||||||||||||
731 | |||||||||||||||
732 | if (!static_cast<T *>(this)->isLoweredToCall(F)) | ||||||||||||||
733 | return TTI::TCC_Basic; // Give a basic cost if it will be lowered | ||||||||||||||
734 | // directly. | ||||||||||||||
735 | |||||||||||||||
736 | return static_cast<T *>(this)->getCallCost(F->getFunctionType(), NumArgs, U); | ||||||||||||||
737 | } | ||||||||||||||
738 | |||||||||||||||
739 | unsigned getCallCost(const Function *F, ArrayRef<const Value *> Arguments, | ||||||||||||||
740 | const User *U) { | ||||||||||||||
741 | // Simply delegate to generic handling of the call. | ||||||||||||||
742 | // FIXME: We should use instsimplify or something else to catch calls which | ||||||||||||||
743 | // will constant fold with these arguments. | ||||||||||||||
744 | return static_cast<T *>(this)->getCallCost(F, Arguments.size(), U); | ||||||||||||||
745 | } | ||||||||||||||
746 | |||||||||||||||
747 | using BaseT::getGEPCost; | ||||||||||||||
748 | |||||||||||||||
749 | int getGEPCost(Type *PointeeType, const Value *Ptr, | ||||||||||||||
750 | ArrayRef<const Value *> Operands) { | ||||||||||||||
751 | assert(PointeeType && Ptr && "can't get GEPCost of nullptr")((PointeeType && Ptr && "can't get GEPCost of nullptr" ) ? static_cast<void> (0) : __assert_fail ("PointeeType && Ptr && \"can't get GEPCost of nullptr\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h" , 751, __PRETTY_FUNCTION__)); | ||||||||||||||
752 | // TODO: will remove this when pointers have an opaque type. | ||||||||||||||
753 | assert(Ptr->getType()->getScalarType()->getPointerElementType() ==((Ptr->getType()->getScalarType()->getPointerElementType () == PointeeType && "explicit pointee type doesn't match operand's pointee type" ) ? static_cast<void> (0) : __assert_fail ("Ptr->getType()->getScalarType()->getPointerElementType() == PointeeType && \"explicit pointee type doesn't match operand's pointee type\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h" , 755, __PRETTY_FUNCTION__)) | ||||||||||||||
754 | PointeeType &&((Ptr->getType()->getScalarType()->getPointerElementType () == PointeeType && "explicit pointee type doesn't match operand's pointee type" ) ? static_cast<void> (0) : __assert_fail ("Ptr->getType()->getScalarType()->getPointerElementType() == PointeeType && \"explicit pointee type doesn't match operand's pointee type\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h" , 755, __PRETTY_FUNCTION__)) | ||||||||||||||
755 | "explicit pointee type doesn't match operand's pointee type")((Ptr->getType()->getScalarType()->getPointerElementType () == PointeeType && "explicit pointee type doesn't match operand's pointee type" ) ? static_cast<void> (0) : __assert_fail ("Ptr->getType()->getScalarType()->getPointerElementType() == PointeeType && \"explicit pointee type doesn't match operand's pointee type\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h" , 755, __PRETTY_FUNCTION__)); | ||||||||||||||
756 | auto *BaseGV = dyn_cast<GlobalValue>(Ptr->stripPointerCasts()); | ||||||||||||||
757 | bool HasBaseReg = (BaseGV == nullptr); | ||||||||||||||
758 | |||||||||||||||
759 | auto PtrSizeBits = DL.getPointerTypeSizeInBits(Ptr->getType()); | ||||||||||||||
760 | APInt BaseOffset(PtrSizeBits, 0); | ||||||||||||||
761 | int64_t Scale = 0; | ||||||||||||||
762 | |||||||||||||||
763 | auto GTI = gep_type_begin(PointeeType, Operands); | ||||||||||||||
764 | Type *TargetType = nullptr; | ||||||||||||||
765 | |||||||||||||||
766 | // Handle the case where the GEP instruction has a single operand, | ||||||||||||||
767 | // the basis, therefore TargetType is a nullptr. | ||||||||||||||
768 | if (Operands.empty()) | ||||||||||||||
769 | return !BaseGV ? TTI::TCC_Free : TTI::TCC_Basic; | ||||||||||||||
770 | |||||||||||||||
771 | for (auto I = Operands.begin(); I != Operands.end(); ++I, ++GTI) { | ||||||||||||||
772 | TargetType = GTI.getIndexedType(); | ||||||||||||||
773 | // We assume that the cost of Scalar GEP with constant index and the | ||||||||||||||
774 | // cost of Vector GEP with splat constant index are the same. | ||||||||||||||
775 | const ConstantInt *ConstIdx = dyn_cast<ConstantInt>(*I); | ||||||||||||||
776 | if (!ConstIdx) | ||||||||||||||
777 | if (auto Splat = getSplatValue(*I)) | ||||||||||||||
778 | ConstIdx = dyn_cast<ConstantInt>(Splat); | ||||||||||||||
779 | if (StructType *STy = GTI.getStructTypeOrNull()) { | ||||||||||||||
780 | // For structures the index is always splat or scalar constant | ||||||||||||||
781 | assert(ConstIdx && "Unexpected GEP index")((ConstIdx && "Unexpected GEP index") ? static_cast< void> (0) : __assert_fail ("ConstIdx && \"Unexpected GEP index\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h" , 781, __PRETTY_FUNCTION__)); | ||||||||||||||
782 | uint64_t Field = ConstIdx->getZExtValue(); | ||||||||||||||
783 | BaseOffset += DL.getStructLayout(STy)->getElementOffset(Field); | ||||||||||||||
784 | } else { | ||||||||||||||
785 | int64_t ElementSize = DL.getTypeAllocSize(GTI.getIndexedType()); | ||||||||||||||
786 | if (ConstIdx) { | ||||||||||||||
787 | BaseOffset += | ||||||||||||||
788 | ConstIdx->getValue().sextOrTrunc(PtrSizeBits) * ElementSize; | ||||||||||||||
789 | } else { | ||||||||||||||
790 | // Needs scale register. | ||||||||||||||
791 | if (Scale != 0) | ||||||||||||||
792 | // No addressing mode takes two scale registers. | ||||||||||||||
793 | return TTI::TCC_Basic; | ||||||||||||||
794 | Scale = ElementSize; | ||||||||||||||
795 | } | ||||||||||||||
796 | } | ||||||||||||||
797 | } | ||||||||||||||
798 | |||||||||||||||
799 | if (static_cast<T *>(this)->isLegalAddressingMode( | ||||||||||||||
800 | TargetType, const_cast<GlobalValue *>(BaseGV), | ||||||||||||||
801 | BaseOffset.sextOrTrunc(64).getSExtValue(), HasBaseReg, Scale, | ||||||||||||||
802 | Ptr->getType()->getPointerAddressSpace())) | ||||||||||||||
803 | return TTI::TCC_Free; | ||||||||||||||
804 | return TTI::TCC_Basic; | ||||||||||||||
805 | } | ||||||||||||||
806 | |||||||||||||||
807 | unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, | ||||||||||||||
808 | ArrayRef<Type *> ParamTys, const User *U) { | ||||||||||||||
809 | switch (IID) { | ||||||||||||||
810 | default: | ||||||||||||||
811 | // Intrinsics rarely (if ever) have normal argument setup constraints. | ||||||||||||||
812 | // Model them as having a basic instruction cost. | ||||||||||||||
813 | return TTI::TCC_Basic; | ||||||||||||||
814 | |||||||||||||||
815 | // TODO: other libc intrinsics. | ||||||||||||||
816 | case Intrinsic::memcpy: | ||||||||||||||
817 | return static_cast<T *>(this)->getMemcpyCost(dyn_cast<Instruction>(U)); | ||||||||||||||
818 | |||||||||||||||
819 | case Intrinsic::annotation: | ||||||||||||||
820 | case Intrinsic::assume: | ||||||||||||||
821 | case Intrinsic::sideeffect: | ||||||||||||||
822 | case Intrinsic::dbg_declare: | ||||||||||||||
823 | case Intrinsic::dbg_value: | ||||||||||||||
824 | case Intrinsic::dbg_label: | ||||||||||||||
825 | case Intrinsic::invariant_start: | ||||||||||||||
826 | case Intrinsic::invariant_end: | ||||||||||||||
827 | case Intrinsic::launder_invariant_group: | ||||||||||||||
828 | case Intrinsic::strip_invariant_group: | ||||||||||||||
829 | case Intrinsic::is_constant: | ||||||||||||||
830 | case Intrinsic::lifetime_start: | ||||||||||||||
831 | case Intrinsic::lifetime_end: | ||||||||||||||
832 | case Intrinsic::objectsize: | ||||||||||||||
833 | case Intrinsic::ptr_annotation: | ||||||||||||||
834 | case Intrinsic::var_annotation: | ||||||||||||||
835 | case Intrinsic::experimental_gc_result: | ||||||||||||||
836 | case Intrinsic::experimental_gc_relocate: | ||||||||||||||
837 | case Intrinsic::coro_alloc: | ||||||||||||||
838 | case Intrinsic::coro_begin: | ||||||||||||||
839 | case Intrinsic::coro_free: | ||||||||||||||
840 | case Intrinsic::coro_end: | ||||||||||||||
841 | case Intrinsic::coro_frame: | ||||||||||||||
842 | case Intrinsic::coro_size: | ||||||||||||||
843 | case Intrinsic::coro_suspend: | ||||||||||||||
844 | case Intrinsic::coro_param: | ||||||||||||||
845 | case Intrinsic::coro_subfn_addr: | ||||||||||||||
846 | // These intrinsics don't actually represent code after lowering. | ||||||||||||||
847 | return TTI::TCC_Free; | ||||||||||||||
848 | } | ||||||||||||||
849 | } | ||||||||||||||
850 | |||||||||||||||
851 | unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, | ||||||||||||||
852 | ArrayRef<const Value *> Arguments, const User *U) { | ||||||||||||||
853 | // Delegate to the generic intrinsic handling code. This mostly provides an | ||||||||||||||
854 | // opportunity for targets to (for example) special case the cost of | ||||||||||||||
855 | // certain intrinsics based on constants used as arguments. | ||||||||||||||
856 | SmallVector<Type *, 8> ParamTys; | ||||||||||||||
857 | ParamTys.reserve(Arguments.size()); | ||||||||||||||
858 | for (unsigned Idx = 0, Size = Arguments.size(); Idx != Size; ++Idx) | ||||||||||||||
859 | ParamTys.push_back(Arguments[Idx]->getType()); | ||||||||||||||
860 | return static_cast<T *>(this)->getIntrinsicCost(IID, RetTy, ParamTys, U); | ||||||||||||||
861 | } | ||||||||||||||
862 | |||||||||||||||
863 | unsigned getUserCost(const User *U, ArrayRef<const Value *> Operands) { | ||||||||||||||
864 | if (isa<PHINode>(U)) | ||||||||||||||
865 | return TTI::TCC_Free; // Model all PHI nodes as free. | ||||||||||||||
866 | |||||||||||||||
867 | if (isa<ExtractValueInst>(U)) | ||||||||||||||
868 | return TTI::TCC_Free; // Model all ExtractValue nodes as free. | ||||||||||||||
869 | |||||||||||||||
870 | if (isa<FreezeInst>(U)) | ||||||||||||||
871 | return TTI::TCC_Free; // Model all Freeze nodes as free. | ||||||||||||||
872 | |||||||||||||||
873 | // Static alloca doesn't generate target instructions. | ||||||||||||||
874 | if (auto *A
| ||||||||||||||
875 | if (A->isStaticAlloca()) | ||||||||||||||
876 | return TTI::TCC_Free; | ||||||||||||||
877 | |||||||||||||||
878 | if (const GEPOperator *GEP
| ||||||||||||||
879 | return static_cast<T *>(this)->getGEPCost(GEP->getSourceElementType(), | ||||||||||||||
880 | GEP->getPointerOperand(), | ||||||||||||||
881 | Operands.drop_front()); | ||||||||||||||
882 | } | ||||||||||||||
883 | |||||||||||||||
884 | if (auto CS = ImmutableCallSite(U)) { | ||||||||||||||
885 | const Function *F = CS.getCalledFunction(); | ||||||||||||||
886 | if (!F) { | ||||||||||||||
887 | // Just use the called value type. | ||||||||||||||
888 | Type *FTy = CS.getCalledValue()->getType()->getPointerElementType(); | ||||||||||||||
889 | return static_cast<T *>(this) | ||||||||||||||
890 | ->getCallCost(cast<FunctionType>(FTy), CS.arg_size(), U); | ||||||||||||||
891 | } | ||||||||||||||
892 | |||||||||||||||
893 | SmallVector<const Value *, 8> Arguments(CS.arg_begin(), CS.arg_end()); | ||||||||||||||
894 | return static_cast<T *>(this)->getCallCost(F, Arguments, U); | ||||||||||||||
895 | } | ||||||||||||||
896 | |||||||||||||||
897 | if (isa<SExtInst>(U) || isa<ZExtInst>(U) || isa<FPExtInst>(U)) | ||||||||||||||
898 | // The old behaviour of generally treating extensions of icmp to be free | ||||||||||||||
899 | // has been removed. A target that needs it should override getUserCost(). | ||||||||||||||
900 | return static_cast<T *>(this)->getExtCost(cast<Instruction>(U), | ||||||||||||||
901 | Operands.back()); | ||||||||||||||
902 | |||||||||||||||
903 | return static_cast<T *>(this)->getOperationCost( | ||||||||||||||
904 | Operator::getOpcode(U), U->getType(), | ||||||||||||||
905 | U->getNumOperands() == 1 ? U->getOperand(0)->getType() : nullptr); | ||||||||||||||
906 | } | ||||||||||||||
907 | |||||||||||||||
908 | int getInstructionLatency(const Instruction *I) { | ||||||||||||||
909 | SmallVector<const Value *, 4> Operands(I->value_op_begin(), | ||||||||||||||
910 | I->value_op_end()); | ||||||||||||||
911 | if (getUserCost(I, Operands) == TTI::TCC_Free) | ||||||||||||||
912 | return 0; | ||||||||||||||
913 | |||||||||||||||
914 | if (isa<LoadInst>(I)) | ||||||||||||||
915 | return 4; | ||||||||||||||
916 | |||||||||||||||
917 | Type *DstTy = I->getType(); | ||||||||||||||
918 | |||||||||||||||
919 | // Usually an intrinsic is a simple instruction. | ||||||||||||||
920 | // A real function call is much slower. | ||||||||||||||
921 | if (auto *CI = dyn_cast<CallInst>(I)) { | ||||||||||||||
922 | const Function *F = CI->getCalledFunction(); | ||||||||||||||
923 | if (!F || static_cast<T *>(this)->isLoweredToCall(F)) | ||||||||||||||
924 | return 40; | ||||||||||||||
925 | // Some intrinsics return a value and a flag, we use the value type | ||||||||||||||
926 | // to decide its latency. | ||||||||||||||
927 | if (StructType* StructTy = dyn_cast<StructType>(DstTy)) | ||||||||||||||
928 | DstTy = StructTy->getElementType(0); | ||||||||||||||
929 | // Fall through to simple instructions. | ||||||||||||||
930 | } | ||||||||||||||
931 | |||||||||||||||
932 | if (VectorType *VectorTy = dyn_cast<VectorType>(DstTy)) | ||||||||||||||
933 | DstTy = VectorTy->getElementType(); | ||||||||||||||
934 | if (DstTy->isFloatingPointTy()) | ||||||||||||||
935 | return 3; | ||||||||||||||
936 | |||||||||||||||
937 | return 1; | ||||||||||||||
938 | } | ||||||||||||||
939 | }; | ||||||||||||||
940 | } | ||||||||||||||
941 | |||||||||||||||
942 | #endif |
1 | //===- CallSite.h - Abstract Call & Invoke instrs ---------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file defines the CallSite class, which is a handy wrapper for code that |
10 | // wants to treat Call, Invoke and CallBr instructions in a generic way. When |
11 | // in non-mutation context (e.g. an analysis) ImmutableCallSite should be used. |
12 | // Finally, when some degree of customization is necessary between these two |
13 | // extremes, CallSiteBase<> can be supplied with fine-tuned parameters. |
14 | // |
15 | // NOTE: These classes are supposed to have "value semantics". So they should be |
16 | // passed by value, not by reference; they should not be "new"ed or "delete"d. |
17 | // They are efficiently copyable, assignable and constructable, with cost |
18 | // equivalent to copying a pointer (notice that they have only a single data |
19 | // member). The internal representation carries a flag which indicates which of |
20 | // the three variants is enclosed. This allows for cheaper checks when various |
21 | // accessors of CallSite are employed. |
22 | // |
23 | //===----------------------------------------------------------------------===// |
24 | |
25 | #ifndef LLVM_IR_CALLSITE_H |
26 | #define LLVM_IR_CALLSITE_H |
27 | |
28 | #include "llvm/ADT/Optional.h" |
29 | #include "llvm/ADT/PointerIntPair.h" |
30 | #include "llvm/ADT/iterator_range.h" |
31 | #include "llvm/IR/Attributes.h" |
32 | #include "llvm/IR/CallingConv.h" |
33 | #include "llvm/IR/Function.h" |
34 | #include "llvm/IR/InstrTypes.h" |
35 | #include "llvm/IR/Instruction.h" |
36 | #include "llvm/IR/Instructions.h" |
37 | #include "llvm/IR/Use.h" |
38 | #include "llvm/IR/User.h" |
39 | #include "llvm/IR/Value.h" |
40 | #include "llvm/Support/Casting.h" |
41 | #include <cassert> |
42 | #include <cstdint> |
43 | #include <iterator> |
44 | |
45 | namespace llvm { |
46 | |
47 | namespace Intrinsic { |
48 | typedef unsigned ID; |
49 | } |
50 | |
51 | template <typename FunTy = const Function, typename BBTy = const BasicBlock, |
52 | typename ValTy = const Value, typename UserTy = const User, |
53 | typename UseTy = const Use, typename InstrTy = const Instruction, |
54 | typename CallTy = const CallInst, |
55 | typename InvokeTy = const InvokeInst, |
56 | typename CallBrTy = const CallBrInst, |
57 | typename IterTy = User::const_op_iterator> |
58 | class CallSiteBase { |
59 | protected: |
60 | PointerIntPair<InstrTy *, 2, int> I; |
61 | |
62 | CallSiteBase() = default; |
63 | CallSiteBase(CallTy *CI) : I(CI, 1) { assert(CI)((CI) ? static_cast<void> (0) : __assert_fail ("CI", "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/IR/CallSite.h" , 63, __PRETTY_FUNCTION__)); } |
64 | CallSiteBase(InvokeTy *II) : I(II, 0) { assert(II)((II) ? static_cast<void> (0) : __assert_fail ("II", "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/IR/CallSite.h" , 64, __PRETTY_FUNCTION__)); } |
65 | CallSiteBase(CallBrTy *CBI) : I(CBI, 2) { assert(CBI)((CBI) ? static_cast<void> (0) : __assert_fail ("CBI", "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/IR/CallSite.h" , 65, __PRETTY_FUNCTION__)); } |
66 | explicit CallSiteBase(ValTy *II) { *this = get(II); } |
67 | |
68 | private: |
69 | /// This static method is like a constructor. It will create an appropriate |
70 | /// call site for a Call, Invoke or CallBr instruction, but it can also create |
71 | /// a null initialized CallSiteBase object for something which is NOT a call |
72 | /// site. |
73 | static CallSiteBase get(ValTy *V) { |
74 | if (InstrTy *II = dyn_cast<InstrTy>(V)) { |
75 | if (II->getOpcode() == Instruction::Call) |
76 | return CallSiteBase(static_cast<CallTy*>(II)); |
77 | if (II->getOpcode() == Instruction::Invoke) |
78 | return CallSiteBase(static_cast<InvokeTy*>(II)); |
79 | if (II->getOpcode() == Instruction::CallBr) |
80 | return CallSiteBase(static_cast<CallBrTy *>(II)); |
81 | } |
82 | return CallSiteBase(); |
83 | } |
84 | |
85 | public: |
86 | /// Return true if a CallInst is enclosed. |
87 | bool isCall() const { return I.getInt() == 1; } |
88 | |
89 | /// Return true if a InvokeInst is enclosed. !I.getInt() may also signify a |
90 | /// NULL instruction pointer, so check that. |
91 | bool isInvoke() const { return getInstruction() && I.getInt() == 0; } |
92 | |
93 | /// Return true if a CallBrInst is enclosed. |
94 | bool isCallBr() const { return I.getInt() == 2; } |
95 | |
96 | InstrTy *getInstruction() const { return I.getPointer(); } |
97 | InstrTy *operator->() const { return I.getPointer(); } |
98 | explicit operator bool() const { return I.getPointer(); } |
99 | |
100 | /// Get the basic block containing the call site. |
101 | BBTy* getParent() const { return getInstruction()->getParent(); } |
102 | |
103 | /// Return the pointer to function that is being called. |
104 | ValTy *getCalledValue() const { |
105 | assert(getInstruction() && "Not a call, invoke or callbr instruction!")((getInstruction() && "Not a call, invoke or callbr instruction!" ) ? static_cast<void> (0) : __assert_fail ("getInstruction() && \"Not a call, invoke or callbr instruction!\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/IR/CallSite.h" , 105, __PRETTY_FUNCTION__)); |
106 | return *getCallee(); |
107 | } |
108 | |
109 | /// Return the function being called if this is a direct call, otherwise |
110 | /// return null (if it's an indirect call). |
111 | FunTy *getCalledFunction() const { |
112 | return dyn_cast<FunTy>(getCalledValue()); |
113 | } |
114 | |
115 | /// Return true if the callsite is an indirect call. |
116 | bool isIndirectCall() const { |
117 | const Value *V = getCalledValue(); |
118 | if (!V) |
119 | return false; |
120 | if (isa<FunTy>(V) || isa<Constant>(V)) |
121 | return false; |
122 | if (const CallBase *CB = dyn_cast<CallBase>(getInstruction())) |
123 | if (CB->isInlineAsm()) |
124 | return false; |
125 | return true; |
126 | } |
127 | |
128 | /// Set the callee to the specified value. Unlike the function of the same |
129 | /// name on CallBase, does not modify the type! |
130 | void setCalledFunction(Value *V) { |
131 | assert(getInstruction() && "Not a call, callbr, or invoke instruction!")((getInstruction() && "Not a call, callbr, or invoke instruction!" ) ? static_cast<void> (0) : __assert_fail ("getInstruction() && \"Not a call, callbr, or invoke instruction!\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/IR/CallSite.h" , 131, __PRETTY_FUNCTION__)); |
132 | assert(cast<PointerType>(V->getType())->getElementType() ==((cast<PointerType>(V->getType())->getElementType () == cast<CallBase>(getInstruction())->getFunctionType () && "New callee type does not match FunctionType on call" ) ? static_cast<void> (0) : __assert_fail ("cast<PointerType>(V->getType())->getElementType() == cast<CallBase>(getInstruction())->getFunctionType() && \"New callee type does not match FunctionType on call\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/IR/CallSite.h" , 134, __PRETTY_FUNCTION__)) |
133 | cast<CallBase>(getInstruction())->getFunctionType() &&((cast<PointerType>(V->getType())->getElementType () == cast<CallBase>(getInstruction())->getFunctionType () && "New callee type does not match FunctionType on call" ) ? static_cast<void> (0) : __assert_fail ("cast<PointerType>(V->getType())->getElementType() == cast<CallBase>(getInstruction())->getFunctionType() && \"New callee type does not match FunctionType on call\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/IR/CallSite.h" , 134, __PRETTY_FUNCTION__)) |
134 | "New callee type does not match FunctionType on call")((cast<PointerType>(V->getType())->getElementType () == cast<CallBase>(getInstruction())->getFunctionType () && "New callee type does not match FunctionType on call" ) ? static_cast<void> (0) : __assert_fail ("cast<PointerType>(V->getType())->getElementType() == cast<CallBase>(getInstruction())->getFunctionType() && \"New callee type does not match FunctionType on call\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/IR/CallSite.h" , 134, __PRETTY_FUNCTION__)); |
135 | *getCallee() = V; |
136 | } |
137 | |
138 | /// Return the intrinsic ID of the intrinsic called by this CallSite, |
139 | /// or Intrinsic::not_intrinsic if the called function is not an |
140 | /// intrinsic, or if this CallSite is an indirect call. |
141 | Intrinsic::ID getIntrinsicID() const { |
142 | if (auto *F = getCalledFunction()) |
143 | return F->getIntrinsicID(); |
144 | // Don't use Intrinsic::not_intrinsic, as it will require pulling |
145 | // Intrinsics.h into every header that uses CallSite. |
146 | return static_cast<Intrinsic::ID>(0); |
147 | } |
148 | |
149 | /// Determine whether the passed iterator points to the callee operand's Use. |
150 | bool isCallee(Value::const_user_iterator UI) const { |
151 | return isCallee(&UI.getUse()); |
152 | } |
153 | |
154 | /// Determine whether this Use is the callee operand's Use. |
155 | bool isCallee(const Use *U) const { return getCallee() == U; } |
156 | |
157 | /// Determine whether the passed iterator points to an argument operand. |
158 | bool isArgOperand(Value::const_user_iterator UI) const { |
159 | return isArgOperand(&UI.getUse()); |
160 | } |
161 | |
162 | /// Determine whether the passed use points to an argument operand. |
163 | bool isArgOperand(const Use *U) const { |
164 | assert(getInstruction() == U->getUser())((getInstruction() == U->getUser()) ? static_cast<void> (0) : __assert_fail ("getInstruction() == U->getUser()", "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/IR/CallSite.h" , 164, __PRETTY_FUNCTION__)); |
165 | return arg_begin() <= U && U < arg_end(); |
166 | } |
167 | |
168 | /// Determine whether the passed iterator points to a bundle operand. |
169 | bool isBundleOperand(Value::const_user_iterator UI) const { |
170 | return isBundleOperand(&UI.getUse()); |
171 | } |
172 | |
173 | /// Determine whether the passed use points to a bundle operand. |
174 | bool isBundleOperand(const Use *U) const { |
175 | assert(getInstruction() == U->getUser())((getInstruction() == U->getUser()) ? static_cast<void> (0) : __assert_fail ("getInstruction() == U->getUser()", "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/IR/CallSite.h" , 175, __PRETTY_FUNCTION__)); |
176 | if (!hasOperandBundles()) |
177 | return false; |
178 | unsigned OperandNo = U - (*this)->op_begin(); |
179 | return getBundleOperandsStartIndex() <= OperandNo && |
180 | OperandNo < getBundleOperandsEndIndex(); |
181 | } |
182 | |
183 | /// Determine whether the passed iterator points to a data operand. |
184 | bool isDataOperand(Value::const_user_iterator UI) const { |
185 | return isDataOperand(&UI.getUse()); |
186 | } |
187 | |
188 | /// Determine whether the passed use points to a data operand. |
189 | bool isDataOperand(const Use *U) const { |
190 | return data_operands_begin() <= U && U < data_operands_end(); |
191 | } |
192 | |
193 | ValTy *getArgument(unsigned ArgNo) const { |
194 | assert(arg_begin() + ArgNo < arg_end() && "Argument # out of range!")((arg_begin() + ArgNo < arg_end() && "Argument # out of range!" ) ? static_cast<void> (0) : __assert_fail ("arg_begin() + ArgNo < arg_end() && \"Argument # out of range!\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/IR/CallSite.h" , 194, __PRETTY_FUNCTION__)); |
195 | return *(arg_begin() + ArgNo); |
196 | } |
197 | |
198 | void setArgument(unsigned ArgNo, Value* newVal) { |
199 | assert(getInstruction() && "Not a call, invoke or callbr instruction!")((getInstruction() && "Not a call, invoke or callbr instruction!" ) ? static_cast<void> (0) : __assert_fail ("getInstruction() && \"Not a call, invoke or callbr instruction!\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/IR/CallSite.h" , 199, __PRETTY_FUNCTION__)); |
200 | assert(arg_begin() + ArgNo < arg_end() && "Argument # out of range!")((arg_begin() + ArgNo < arg_end() && "Argument # out of range!" ) ? static_cast<void> (0) : __assert_fail ("arg_begin() + ArgNo < arg_end() && \"Argument # out of range!\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/IR/CallSite.h" , 200, __PRETTY_FUNCTION__)); |
201 | getInstruction()->setOperand(ArgNo, newVal); |
202 | } |
203 | |
204 | /// Given a value use iterator, returns the argument that corresponds to it. |
205 | /// Iterator must actually correspond to an argument. |
206 | unsigned getArgumentNo(Value::const_user_iterator I) const { |
207 | return getArgumentNo(&I.getUse()); |
208 | } |
209 | |
210 | /// Given a use for an argument, get the argument number that corresponds to |
211 | /// it. |
212 | unsigned getArgumentNo(const Use *U) const { |
213 | assert(getInstruction() && "Not a call, invoke or callbr instruction!")((getInstruction() && "Not a call, invoke or callbr instruction!" ) ? static_cast<void> (0) : __assert_fail ("getInstruction() && \"Not a call, invoke or callbr instruction!\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/IR/CallSite.h" , 213, __PRETTY_FUNCTION__)); |
214 | assert(isArgOperand(U) && "Argument # out of range!")((isArgOperand(U) && "Argument # out of range!") ? static_cast <void> (0) : __assert_fail ("isArgOperand(U) && \"Argument # out of range!\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/IR/CallSite.h" , 214, __PRETTY_FUNCTION__)); |
215 | return U - arg_begin(); |
216 | } |
217 | |
218 | /// The type of iterator to use when looping over actual arguments at this |
219 | /// call site. |
220 | using arg_iterator = IterTy; |
221 | |
222 | iterator_range<IterTy> args() const { |
223 | return make_range(arg_begin(), arg_end()); |
224 | } |
225 | bool arg_empty() const { return arg_end() == arg_begin(); } |
226 | unsigned arg_size() const { return unsigned(arg_end() - arg_begin()); } |
227 | |
228 | /// Given a value use iterator, return the data operand corresponding to it. |
229 | /// Iterator must actually correspond to a data operand. |
230 | unsigned getDataOperandNo(Value::const_user_iterator UI) const { |
231 | return getDataOperandNo(&UI.getUse()); |
232 | } |
233 | |
234 | /// Given a use for a data operand, get the data operand number that |
235 | /// corresponds to it. |
236 | unsigned getDataOperandNo(const Use *U) const { |
237 | assert(getInstruction() && "Not a call, invoke or callbr instruction!")((getInstruction() && "Not a call, invoke or callbr instruction!" ) ? static_cast<void> (0) : __assert_fail ("getInstruction() && \"Not a call, invoke or callbr instruction!\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/IR/CallSite.h" , 237, __PRETTY_FUNCTION__)); |
238 | assert(isDataOperand(U) && "Data operand # out of range!")((isDataOperand(U) && "Data operand # out of range!") ? static_cast<void> (0) : __assert_fail ("isDataOperand(U) && \"Data operand # out of range!\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/IR/CallSite.h" , 238, __PRETTY_FUNCTION__)); |
239 | return U - data_operands_begin(); |
240 | } |
241 | |
242 | /// Type of iterator to use when looping over data operands at this call site |
243 | /// (see below). |
244 | using data_operand_iterator = IterTy; |
245 | |
246 | /// data_operands_begin/data_operands_end - Return iterators iterating over |
247 | /// the call / invoke / callbr argument list and bundle operands. For invokes, |
248 | /// this is the set of instruction operands except the invoke target and the |
249 | /// two successor blocks; for calls this is the set of instruction operands |
250 | /// except the call target; for callbrs the number of labels to skip must be |
251 | /// determined first. |
252 | |
253 | IterTy data_operands_begin() const { |
254 | assert(getInstruction() && "Not a call or invoke instruction!")((getInstruction() && "Not a call or invoke instruction!" ) ? static_cast<void> (0) : __assert_fail ("getInstruction() && \"Not a call or invoke instruction!\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/IR/CallSite.h" , 254, __PRETTY_FUNCTION__)); |
255 | return cast<CallBase>(getInstruction())->data_operands_begin(); |
256 | } |
257 | IterTy data_operands_end() const { |
258 | assert(getInstruction() && "Not a call or invoke instruction!")((getInstruction() && "Not a call or invoke instruction!" ) ? static_cast<void> (0) : __assert_fail ("getInstruction() && \"Not a call or invoke instruction!\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/IR/CallSite.h" , 258, __PRETTY_FUNCTION__)); |
259 | return cast<CallBase>(getInstruction())->data_operands_end(); |
260 | } |
261 | iterator_range<IterTy> data_ops() const { |
262 | return make_range(data_operands_begin(), data_operands_end()); |
263 | } |
264 | bool data_operands_empty() const { |
265 | return data_operands_end() == data_operands_begin(); |
266 | } |
267 | unsigned data_operands_size() const { |
268 | return std::distance(data_operands_begin(), data_operands_end()); |
269 | } |
270 | |
271 | /// Return the type of the instruction that generated this call site. |
272 | Type *getType() const { return (*this)->getType(); } |
273 | |
274 | /// Return the caller function for this call site. |
275 | FunTy *getCaller() const { return (*this)->getParent()->getParent(); } |
276 | |
277 | /// Tests if this call site must be tail call optimized. Only a CallInst can |
278 | /// be tail call optimized. |
279 | bool isMustTailCall() const { |
280 | return isCall() && cast<CallInst>(getInstruction())->isMustTailCall(); |
281 | } |
282 | |
283 | /// Tests if this call site is marked as a tail call. |
284 | bool isTailCall() const { |
285 | return isCall() && cast<CallInst>(getInstruction())->isTailCall(); |
286 | } |
287 | |
288 | #define CALLSITE_DELEGATE_GETTER(METHOD) \ |
289 | InstrTy *II = getInstruction(); \ |
290 | return isCall() ? cast<CallInst>(II)->METHOD \ |
291 | : isCallBr() ? cast<CallBrInst>(II)->METHOD \ |
292 | : cast<InvokeInst>(II)->METHOD |
293 | |
294 | #define CALLSITE_DELEGATE_SETTER(METHOD) \ |
295 | InstrTy *II = getInstruction(); \ |
296 | if (isCall()) \ |
297 | cast<CallInst>(II)->METHOD; \ |
298 | else if (isCallBr()) \ |
299 | cast<CallBrInst>(II)->METHOD; \ |
300 | else \ |
301 | cast<InvokeInst>(II)->METHOD |
302 | |
303 | unsigned getNumArgOperands() const { |
304 | CALLSITE_DELEGATE_GETTER(getNumArgOperands()); |
305 | } |
306 | |
307 | ValTy *getArgOperand(unsigned i) const { |
308 | CALLSITE_DELEGATE_GETTER(getArgOperand(i)); |
309 | } |
310 | |
311 | ValTy *getReturnedArgOperand() const { |
312 | CALLSITE_DELEGATE_GETTER(getReturnedArgOperand()); |
313 | } |
314 | |
315 | bool isInlineAsm() const { |
316 | return cast<CallBase>(getInstruction())->isInlineAsm(); |
317 | } |
318 | |
319 | /// Get the calling convention of the call. |
320 | CallingConv::ID getCallingConv() const { |
321 | CALLSITE_DELEGATE_GETTER(getCallingConv()); |
322 | } |
323 | /// Set the calling convention of the call. |
324 | void setCallingConv(CallingConv::ID CC) { |
325 | CALLSITE_DELEGATE_SETTER(setCallingConv(CC)); |
326 | } |
327 | |
328 | FunctionType *getFunctionType() const { |
329 | CALLSITE_DELEGATE_GETTER(getFunctionType()); |
330 | } |
331 | |
332 | void mutateFunctionType(FunctionType *Ty) const { |
333 | CALLSITE_DELEGATE_SETTER(mutateFunctionType(Ty)); |
334 | } |
335 | |
336 | /// Get the parameter attributes of the call. |
337 | AttributeList getAttributes() const { |
338 | CALLSITE_DELEGATE_GETTER(getAttributes()); |
339 | } |
340 | /// Set the parameter attributes of the call. |
341 | void setAttributes(AttributeList PAL) { |
342 | CALLSITE_DELEGATE_SETTER(setAttributes(PAL)); |
343 | } |
344 | |
345 | void addAttribute(unsigned i, Attribute::AttrKind Kind) { |
346 | CALLSITE_DELEGATE_SETTER(addAttribute(i, Kind)); |
347 | } |
348 | |
349 | void addAttribute(unsigned i, Attribute Attr) { |
350 | CALLSITE_DELEGATE_SETTER(addAttribute(i, Attr)); |
351 | } |
352 | |
353 | void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) { |
354 | CALLSITE_DELEGATE_SETTER(addParamAttr(ArgNo, Kind)); |
355 | } |
356 | |
357 | void removeAttribute(unsigned i, Attribute::AttrKind Kind) { |
358 | CALLSITE_DELEGATE_SETTER(removeAttribute(i, Kind)); |
359 | } |
360 | |
361 | void removeAttribute(unsigned i, StringRef Kind) { |
362 | CALLSITE_DELEGATE_SETTER(removeAttribute(i, Kind)); |
363 | } |
364 | |
365 | void removeParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) { |
366 | CALLSITE_DELEGATE_SETTER(removeParamAttr(ArgNo, Kind)); |
367 | } |
368 | |
369 | /// Return true if this function has the given attribute. |
370 | bool hasFnAttr(Attribute::AttrKind Kind) const { |
371 | CALLSITE_DELEGATE_GETTER(hasFnAttr(Kind)); |
372 | } |
373 | |
374 | /// Return true if this function has the given attribute. |
375 | bool hasFnAttr(StringRef Kind) const { |
376 | CALLSITE_DELEGATE_GETTER(hasFnAttr(Kind)); |
377 | } |
378 | |
379 | /// Return true if this return value has the given attribute. |
380 | bool hasRetAttr(Attribute::AttrKind Kind) const { |
381 | CALLSITE_DELEGATE_GETTER(hasRetAttr(Kind)); |
382 | } |
383 | |
384 | /// Return true if the call or the callee has the given attribute. |
385 | bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const { |
386 | CALLSITE_DELEGATE_GETTER(paramHasAttr(ArgNo, Kind)); |
387 | } |
388 | |
389 | Attribute getAttribute(unsigned i, Attribute::AttrKind Kind) const { |
390 | CALLSITE_DELEGATE_GETTER(getAttribute(i, Kind)); |
391 | } |
392 | |
393 | Attribute getAttribute(unsigned i, StringRef Kind) const { |
394 | CALLSITE_DELEGATE_GETTER(getAttribute(i, Kind)); |
395 | } |
396 | |
397 | /// Return true if the data operand at index \p i directly or indirectly has |
398 | /// the attribute \p A. |
399 | /// |
400 | /// Normal call, invoke or callbr arguments have per operand attributes, as |
401 | /// specified in the attribute set attached to this instruction, while operand |
402 | /// bundle operands may have some attributes implied by the type of its |
403 | /// containing operand bundle. |
404 | bool dataOperandHasImpliedAttr(unsigned i, Attribute::AttrKind Kind) const { |
405 | CALLSITE_DELEGATE_GETTER(dataOperandHasImpliedAttr(i, Kind)); |
406 | } |
407 | |
408 | /// Extract the alignment of the return value. |
409 | unsigned getRetAlignment() const { |
410 | CALLSITE_DELEGATE_GETTER(getRetAlignment()); |
411 | } |
412 | |
413 | /// Extract the alignment for a call or parameter (0=unknown). |
414 | unsigned getParamAlignment(unsigned ArgNo) const { |
415 | CALLSITE_DELEGATE_GETTER(getParamAlignment(ArgNo)); |
416 | } |
417 | |
418 | /// Extract the byval type for a call or parameter (nullptr=unknown). |
419 | Type *getParamByValType(unsigned ArgNo) const { |
420 | CALLSITE_DELEGATE_GETTER(getParamByValType(ArgNo)); |
421 | } |
422 | |
423 | /// Extract the number of dereferenceable bytes for a call or parameter |
424 | /// (0=unknown). |
425 | uint64_t getDereferenceableBytes(unsigned i) const { |
426 | CALLSITE_DELEGATE_GETTER(getDereferenceableBytes(i)); |
427 | } |
428 | |
429 | /// Extract the number of dereferenceable_or_null bytes for a call or |
430 | /// parameter (0=unknown). |
431 | uint64_t getDereferenceableOrNullBytes(unsigned i) const { |
432 | CALLSITE_DELEGATE_GETTER(getDereferenceableOrNullBytes(i)); |
433 | } |
434 | |
435 | /// Determine if the return value is marked with NoAlias attribute. |
436 | bool returnDoesNotAlias() const { |
437 | CALLSITE_DELEGATE_GETTER(returnDoesNotAlias()); |
438 | } |
439 | |
440 | /// Return true if the call should not be treated as a call to a builtin. |
441 | bool isNoBuiltin() const { |
442 | CALLSITE_DELEGATE_GETTER(isNoBuiltin()); |
443 | } |
444 | |
445 | /// Return true if the call requires strict floating point semantics. |
446 | bool isStrictFP() const { |
447 | CALLSITE_DELEGATE_GETTER(isStrictFP()); |
448 | } |
449 | |
450 | /// Return true if the call should not be inlined. |
451 | bool isNoInline() const { |
452 | CALLSITE_DELEGATE_GETTER(isNoInline()); |
453 | } |
454 | void setIsNoInline(bool Value = true) { |
455 | CALLSITE_DELEGATE_SETTER(setIsNoInline(Value)); |
456 | } |
457 | |
458 | /// Determine if the call does not access memory. |
459 | bool doesNotAccessMemory() const { |
460 | CALLSITE_DELEGATE_GETTER(doesNotAccessMemory()); |
461 | } |
462 | void setDoesNotAccessMemory() { |
463 | CALLSITE_DELEGATE_SETTER(setDoesNotAccessMemory()); |
464 | } |
465 | |
466 | /// Determine if the call does not access or only reads memory. |
467 | bool onlyReadsMemory() const { |
468 | CALLSITE_DELEGATE_GETTER(onlyReadsMemory()); |
469 | } |
470 | void setOnlyReadsMemory() { |
471 | CALLSITE_DELEGATE_SETTER(setOnlyReadsMemory()); |
472 | } |
473 | |
474 | /// Determine if the call does not access or only writes memory. |
475 | bool doesNotReadMemory() const { |
476 | CALLSITE_DELEGATE_GETTER(doesNotReadMemory()); |
477 | } |
478 | void setDoesNotReadMemory() { |
479 | CALLSITE_DELEGATE_SETTER(setDoesNotReadMemory()); |
480 | } |
481 | |
482 | /// Determine if the call can access memmory only using pointers based |
483 | /// on its arguments. |
484 | bool onlyAccessesArgMemory() const { |
485 | CALLSITE_DELEGATE_GETTER(onlyAccessesArgMemory()); |
486 | } |
487 | void setOnlyAccessesArgMemory() { |
488 | CALLSITE_DELEGATE_SETTER(setOnlyAccessesArgMemory()); |
489 | } |
490 | |
491 | /// Determine if the function may only access memory that is |
492 | /// inaccessible from the IR. |
493 | bool onlyAccessesInaccessibleMemory() const { |
494 | CALLSITE_DELEGATE_GETTER(onlyAccessesInaccessibleMemory()); |
495 | } |
496 | void setOnlyAccessesInaccessibleMemory() { |
497 | CALLSITE_DELEGATE_SETTER(setOnlyAccessesInaccessibleMemory()); |
498 | } |
499 | |
500 | /// Determine if the function may only access memory that is |
501 | /// either inaccessible from the IR or pointed to by its arguments. |
502 | bool onlyAccessesInaccessibleMemOrArgMem() const { |
503 | CALLSITE_DELEGATE_GETTER(onlyAccessesInaccessibleMemOrArgMem()); |
504 | } |
505 | void setOnlyAccessesInaccessibleMemOrArgMem() { |
506 | CALLSITE_DELEGATE_SETTER(setOnlyAccessesInaccessibleMemOrArgMem()); |
507 | } |
508 | |
509 | /// Determine if the call cannot return. |
510 | bool doesNotReturn() const { |
511 | CALLSITE_DELEGATE_GETTER(doesNotReturn()); |
512 | } |
513 | void setDoesNotReturn() { |
514 | CALLSITE_DELEGATE_SETTER(setDoesNotReturn()); |
515 | } |
516 | |
517 | /// Determine if the call cannot unwind. |
518 | bool doesNotThrow() const { |
519 | CALLSITE_DELEGATE_GETTER(doesNotThrow()); |
520 | } |
521 | void setDoesNotThrow() { |
522 | CALLSITE_DELEGATE_SETTER(setDoesNotThrow()); |
523 | } |
524 | |
525 | /// Determine if the call can be duplicated. |
526 | bool cannotDuplicate() const { |
527 | CALLSITE_DELEGATE_GETTER(cannotDuplicate()); |
528 | } |
529 | void setCannotDuplicate() { |
530 | CALLSITE_DELEGATE_SETTER(setCannotDuplicate()); |
531 | } |
532 | |
533 | /// Determine if the call is convergent. |
534 | bool isConvergent() const { |
535 | CALLSITE_DELEGATE_GETTER(isConvergent()); |
536 | } |
537 | void setConvergent() { |
538 | CALLSITE_DELEGATE_SETTER(setConvergent()); |
539 | } |
540 | void setNotConvergent() { |
541 | CALLSITE_DELEGATE_SETTER(setNotConvergent()); |
542 | } |
543 | |
544 | unsigned getNumOperandBundles() const { |
545 | CALLSITE_DELEGATE_GETTER(getNumOperandBundles()); |
546 | } |
547 | |
548 | bool hasOperandBundles() const { |
549 | CALLSITE_DELEGATE_GETTER(hasOperandBundles()); |
550 | } |
551 | |
552 | unsigned getBundleOperandsStartIndex() const { |
553 | CALLSITE_DELEGATE_GETTER(getBundleOperandsStartIndex()); |
554 | } |
555 | |
556 | unsigned getBundleOperandsEndIndex() const { |
557 | CALLSITE_DELEGATE_GETTER(getBundleOperandsEndIndex()); |
558 | } |
559 | |
560 | unsigned getNumTotalBundleOperands() const { |
561 | CALLSITE_DELEGATE_GETTER(getNumTotalBundleOperands()); |
562 | } |
563 | |
564 | OperandBundleUse getOperandBundleAt(unsigned Index) const { |
565 | CALLSITE_DELEGATE_GETTER(getOperandBundleAt(Index)); |
566 | } |
567 | |
568 | Optional<OperandBundleUse> getOperandBundle(StringRef Name) const { |
569 | CALLSITE_DELEGATE_GETTER(getOperandBundle(Name)); |
570 | } |
571 | |
572 | Optional<OperandBundleUse> getOperandBundle(uint32_t ID) const { |
573 | CALLSITE_DELEGATE_GETTER(getOperandBundle(ID)); |
574 | } |
575 | |
576 | unsigned countOperandBundlesOfType(uint32_t ID) const { |
577 | CALLSITE_DELEGATE_GETTER(countOperandBundlesOfType(ID)); |
578 | } |
579 | |
580 | bool isBundleOperand(unsigned Idx) const { |
581 | CALLSITE_DELEGATE_GETTER(isBundleOperand(Idx)); |
582 | } |
583 | |
584 | IterTy arg_begin() const { |
585 | CALLSITE_DELEGATE_GETTER(arg_begin()); |
586 | } |
587 | |
588 | IterTy arg_end() const { |
589 | CALLSITE_DELEGATE_GETTER(arg_end()); |
590 | } |
591 | |
592 | #undef CALLSITE_DELEGATE_GETTER |
593 | #undef CALLSITE_DELEGATE_SETTER |
594 | |
595 | void getOperandBundlesAsDefs(SmallVectorImpl<OperandBundleDef> &Defs) const { |
596 | // Since this is actually a getter that "looks like" a setter, don't use the |
597 | // above macros to avoid confusion. |
598 | cast<CallBase>(getInstruction())->getOperandBundlesAsDefs(Defs); |
599 | } |
600 | |
601 | /// Determine whether this data operand is not captured. |
602 | bool doesNotCapture(unsigned OpNo) const { |
603 | return dataOperandHasImpliedAttr(OpNo + 1, Attribute::NoCapture); |
604 | } |
605 | |
606 | /// Determine whether this argument is passed by value. |
607 | bool isByValArgument(unsigned ArgNo) const { |
608 | return paramHasAttr(ArgNo, Attribute::ByVal); |
609 | } |
610 | |
611 | /// Determine whether this argument is passed in an alloca. |
612 | bool isInAllocaArgument(unsigned ArgNo) const { |
613 | return paramHasAttr(ArgNo, Attribute::InAlloca); |
614 | } |
615 | |
616 | /// Determine whether this argument is passed by value or in an alloca. |
617 | bool isByValOrInAllocaArgument(unsigned ArgNo) const { |
618 | return paramHasAttr(ArgNo, Attribute::ByVal) || |
619 | paramHasAttr(ArgNo, Attribute::InAlloca); |
620 | } |
621 | |
622 | /// Determine if there are is an inalloca argument. Only the last argument can |
623 | /// have the inalloca attribute. |
624 | bool hasInAllocaArgument() const { |
625 | return !arg_empty() && paramHasAttr(arg_size() - 1, Attribute::InAlloca); |
626 | } |
627 | |
628 | bool doesNotAccessMemory(unsigned OpNo) const { |
629 | return dataOperandHasImpliedAttr(OpNo + 1, Attribute::ReadNone); |
630 | } |
631 | |
632 | bool onlyReadsMemory(unsigned OpNo) const { |
633 | return dataOperandHasImpliedAttr(OpNo + 1, Attribute::ReadOnly) || |
634 | dataOperandHasImpliedAttr(OpNo + 1, Attribute::ReadNone); |
635 | } |
636 | |
637 | bool doesNotReadMemory(unsigned OpNo) const { |
638 | return dataOperandHasImpliedAttr(OpNo + 1, Attribute::WriteOnly) || |
639 | dataOperandHasImpliedAttr(OpNo + 1, Attribute::ReadNone); |
640 | } |
641 | |
642 | /// Return true if the return value is known to be not null. |
643 | /// This may be because it has the nonnull attribute, or because at least |
644 | /// one byte is dereferenceable and the pointer is in addrspace(0). |
645 | bool isReturnNonNull() const { |
646 | if (hasRetAttr(Attribute::NonNull)) |
647 | return true; |
648 | else if (getDereferenceableBytes(AttributeList::ReturnIndex) > 0 && |
649 | !NullPointerIsDefined(getCaller(), |
650 | getType()->getPointerAddressSpace())) |
651 | return true; |
652 | |
653 | return false; |
654 | } |
655 | |
656 | /// Returns true if this CallSite passes the given Value* as an argument to |
657 | /// the called function. |
658 | bool hasArgument(const Value *Arg) const { |
659 | for (arg_iterator AI = this->arg_begin(), E = this->arg_end(); AI != E; |
660 | ++AI) |
661 | if (AI->get() == Arg) |
662 | return true; |
663 | return false; |
664 | } |
665 | |
666 | private: |
667 | IterTy getCallee() const { |
668 | return cast<CallBase>(getInstruction())->op_end() - 1; |
669 | } |
670 | }; |
671 | |
672 | class CallSite : public CallSiteBase<Function, BasicBlock, Value, User, Use, |
673 | Instruction, CallInst, InvokeInst, |
674 | CallBrInst, User::op_iterator> { |
675 | public: |
676 | CallSite() = default; |
677 | CallSite(CallSiteBase B) : CallSiteBase(B) {} |
678 | CallSite(CallInst *CI) : CallSiteBase(CI) {} |
679 | CallSite(InvokeInst *II) : CallSiteBase(II) {} |
680 | CallSite(CallBrInst *CBI) : CallSiteBase(CBI) {} |
681 | explicit CallSite(Instruction *II) : CallSiteBase(II) {} |
682 | explicit CallSite(Value *V) : CallSiteBase(V) {} |
683 | |
684 | bool operator==(const CallSite &CS) const { return I == CS.I; } |
685 | bool operator!=(const CallSite &CS) const { return I != CS.I; } |
686 | bool operator<(const CallSite &CS) const { |
687 | return getInstruction() < CS.getInstruction(); |
688 | } |
689 | |
690 | private: |
691 | friend struct DenseMapInfo<CallSite>; |
692 | |
693 | User::op_iterator getCallee() const; |
694 | }; |
695 | |
696 | /// Establish a view to a call site for examination. |
697 | class ImmutableCallSite : public CallSiteBase<> { |
698 | public: |
699 | ImmutableCallSite() = default; |
700 | ImmutableCallSite(const CallInst *CI) : CallSiteBase(CI) {} |
701 | ImmutableCallSite(const InvokeInst *II) : CallSiteBase(II) {} |
702 | ImmutableCallSite(const CallBrInst *CBI) : CallSiteBase(CBI) {} |
703 | explicit ImmutableCallSite(const Instruction *II) : CallSiteBase(II) {} |
704 | explicit ImmutableCallSite(const Value *V) : CallSiteBase(V) {} |
705 | ImmutableCallSite(CallSite CS) : CallSiteBase(CS.getInstruction()) {} |
706 | }; |
707 | |
708 | /// AbstractCallSite |
709 | /// |
710 | /// An abstract call site is a wrapper that allows to treat direct, |
711 | /// indirect, and callback calls the same. If an abstract call site |
712 | /// represents a direct or indirect call site it behaves like a stripped |
713 | /// down version of a normal call site object. The abstract call site can |
714 | /// also represent a callback call, thus the fact that the initially |
715 | /// called function (=broker) may invoke a third one (=callback callee). |
716 | /// In this case, the abstract call site hides the middle man, hence the |
717 | /// broker function. The result is a representation of the callback call, |
718 | /// inside the broker, but in the context of the original call to the broker. |
719 | /// |
720 | /// There are up to three functions involved when we talk about callback call |
721 | /// sites. The caller (1), which invokes the broker function. The broker |
722 | /// function (2), that will invoke the callee zero or more times. And finally |
723 | /// the callee (3), which is the target of the callback call. |
724 | /// |
725 | /// The abstract call site will handle the mapping from parameters to arguments |
726 | /// depending on the semantic of the broker function. However, it is important |
727 | /// to note that the mapping is often partial. Thus, some arguments of the |
728 | /// call/invoke instruction are mapped to parameters of the callee while others |
729 | /// are not. |
730 | class AbstractCallSite { |
731 | public: |
732 | |
733 | /// The encoding of a callback with regards to the underlying instruction. |
734 | struct CallbackInfo { |
735 | |
736 | /// For direct/indirect calls the parameter encoding is empty. If it is not, |
737 | /// the abstract call site represents a callback. In that case, the first |
738 | /// element of the encoding vector represents which argument of the call |
739 | /// site CS is the callback callee. The remaining elements map parameters |
740 | /// (identified by their position) to the arguments that will be passed |
741 | /// through (also identified by position but in the call site instruction). |
742 | /// |
743 | /// NOTE that we use LLVM argument numbers (starting at 0) and not |
744 | /// clang/source argument numbers (starting at 1). The -1 entries represent |
745 | /// unknown values that are passed to the callee. |
746 | using ParameterEncodingTy = SmallVector<int, 0>; |
747 | ParameterEncodingTy ParameterEncoding; |
748 | |
749 | }; |
750 | |
751 | private: |
752 | |
753 | /// The underlying call site: |
754 | /// caller -> callee, if this is a direct or indirect call site |
755 | /// caller -> broker function, if this is a callback call site |
756 | CallSite CS; |
757 | |
758 | /// The encoding of a callback with regards to the underlying instruction. |
759 | CallbackInfo CI; |
760 | |
761 | public: |
762 | /// Sole constructor for abstract call sites (ACS). |
763 | /// |
764 | /// An abstract call site can only be constructed through a llvm::Use because |
765 | /// each operand (=use) of an instruction could potentially be a different |
766 | /// abstract call site. Furthermore, even if the value of the llvm::Use is the |
767 | /// same, and the user is as well, the abstract call sites might not be. |
768 | /// |
769 | /// If a use is not associated with an abstract call site the constructed ACS |
770 | /// will evaluate to false if converted to a boolean. |
771 | /// |
772 | /// If the use is the callee use of a call or invoke instruction, the |
773 | /// constructed abstract call site will behave as a llvm::CallSite would. |
774 | /// |
775 | /// If the use is not a callee use of a call or invoke instruction, the |
776 | /// callback metadata is used to determine the argument <-> parameter mapping |
777 | /// as well as the callee of the abstract call site. |
778 | AbstractCallSite(const Use *U); |
779 | |
780 | /// Add operand uses of \p ICS that represent callback uses into \p CBUses. |
781 | /// |
782 | /// All uses added to \p CBUses can be used to create abstract call sites for |
783 | /// which AbstractCallSite::isCallbackCall() will return true. |
784 | static void getCallbackUses(ImmutableCallSite ICS, |
785 | SmallVectorImpl<const Use *> &CBUses); |
786 | |
787 | /// Conversion operator to conveniently check for a valid/initialized ACS. |
788 | explicit operator bool() const { return (bool)CS; } |
789 | |
790 | /// Return the underlying instruction. |
791 | Instruction *getInstruction() const { return CS.getInstruction(); } |
792 | |
793 | /// Return the call site abstraction for the underlying instruction. |
794 | CallSite getCallSite() const { return CS; } |
795 | |
796 | /// Return true if this ACS represents a direct call. |
797 | bool isDirectCall() const { |
798 | return !isCallbackCall() && !CS.isIndirectCall(); |
799 | } |
800 | |
801 | /// Return true if this ACS represents an indirect call. |
802 | bool isIndirectCall() const { |
803 | return !isCallbackCall() && CS.isIndirectCall(); |
804 | } |
805 | |
806 | /// Return true if this ACS represents a callback call. |
807 | bool isCallbackCall() const { |
808 | // For a callback call site the callee is ALWAYS stored first in the |
809 | // transitive values vector. Thus, a non-empty vector indicates a callback. |
810 | return !CI.ParameterEncoding.empty(); |
811 | } |
812 | |
813 | /// Return true if @p UI is the use that defines the callee of this ACS. |
814 | bool isCallee(Value::const_user_iterator UI) const { |
815 | return isCallee(&UI.getUse()); |
816 | } |
817 | |
818 | /// Return true if @p U is the use that defines the callee of this ACS. |
819 | bool isCallee(const Use *U) const { |
820 | if (isDirectCall()) |
821 | return CS.isCallee(U); |
822 | |
823 | assert(!CI.ParameterEncoding.empty() &&((!CI.ParameterEncoding.empty() && "Callback without parameter encoding!" ) ? static_cast<void> (0) : __assert_fail ("!CI.ParameterEncoding.empty() && \"Callback without parameter encoding!\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/IR/CallSite.h" , 824, __PRETTY_FUNCTION__)) |
824 | "Callback without parameter encoding!")((!CI.ParameterEncoding.empty() && "Callback without parameter encoding!" ) ? static_cast<void> (0) : __assert_fail ("!CI.ParameterEncoding.empty() && \"Callback without parameter encoding!\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/IR/CallSite.h" , 824, __PRETTY_FUNCTION__)); |
825 | |
826 | return (int)CS.getArgumentNo(U) == CI.ParameterEncoding[0]; |
827 | } |
828 | |
829 | /// Return the number of parameters of the callee. |
830 | unsigned getNumArgOperands() const { |
831 | if (isDirectCall()) |
832 | return CS.getNumArgOperands(); |
833 | // Subtract 1 for the callee encoding. |
834 | return CI.ParameterEncoding.size() - 1; |
835 | } |
836 | |
837 | /// Return the operand index of the underlying instruction associated with @p |
838 | /// Arg. |
839 | int getCallArgOperandNo(Argument &Arg) const { |
840 | return getCallArgOperandNo(Arg.getArgNo()); |
841 | } |
842 | |
843 | /// Return the operand index of the underlying instruction associated with |
844 | /// the function parameter number @p ArgNo or -1 if there is none. |
845 | int getCallArgOperandNo(unsigned ArgNo) const { |
846 | if (isDirectCall()) |
847 | return ArgNo; |
848 | // Add 1 for the callee encoding. |
849 | return CI.ParameterEncoding[ArgNo + 1]; |
850 | } |
851 | |
852 | /// Return the operand of the underlying instruction associated with @p Arg. |
853 | Value *getCallArgOperand(Argument &Arg) const { |
854 | return getCallArgOperand(Arg.getArgNo()); |
855 | } |
856 | |
857 | /// Return the operand of the underlying instruction associated with the |
858 | /// function parameter number @p ArgNo or nullptr if there is none. |
859 | Value *getCallArgOperand(unsigned ArgNo) const { |
860 | if (isDirectCall()) |
861 | return CS.getArgOperand(ArgNo); |
862 | // Add 1 for the callee encoding. |
863 | return CI.ParameterEncoding[ArgNo + 1] >= 0 |
864 | ? CS.getArgOperand(CI.ParameterEncoding[ArgNo + 1]) |
865 | : nullptr; |
866 | } |
867 | |
868 | /// Return the operand index of the underlying instruction associated with the |
869 | /// callee of this ACS. Only valid for callback calls! |
870 | int getCallArgOperandNoForCallee() const { |
871 | assert(isCallbackCall())((isCallbackCall()) ? static_cast<void> (0) : __assert_fail ("isCallbackCall()", "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/IR/CallSite.h" , 871, __PRETTY_FUNCTION__)); |
872 | assert(CI.ParameterEncoding.size() && CI.ParameterEncoding[0] >= 0)((CI.ParameterEncoding.size() && CI.ParameterEncoding [0] >= 0) ? static_cast<void> (0) : __assert_fail ("CI.ParameterEncoding.size() && CI.ParameterEncoding[0] >= 0" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/IR/CallSite.h" , 872, __PRETTY_FUNCTION__)); |
873 | return CI.ParameterEncoding[0]; |
874 | } |
875 | |
876 | /// Return the use of the callee value in the underlying instruction. Only |
877 | /// valid for callback calls! |
878 | const Use &getCalleeUseForCallback() const { |
879 | int CalleeArgIdx = getCallArgOperandNoForCallee(); |
880 | assert(CalleeArgIdx >= 0 &&((CalleeArgIdx >= 0 && unsigned(CalleeArgIdx) < getInstruction()->getNumOperands()) ? static_cast<void > (0) : __assert_fail ("CalleeArgIdx >= 0 && unsigned(CalleeArgIdx) < getInstruction()->getNumOperands()" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/IR/CallSite.h" , 881, __PRETTY_FUNCTION__)) |
881 | unsigned(CalleeArgIdx) < getInstruction()->getNumOperands())((CalleeArgIdx >= 0 && unsigned(CalleeArgIdx) < getInstruction()->getNumOperands()) ? static_cast<void > (0) : __assert_fail ("CalleeArgIdx >= 0 && unsigned(CalleeArgIdx) < getInstruction()->getNumOperands()" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/IR/CallSite.h" , 881, __PRETTY_FUNCTION__)); |
882 | return getInstruction()->getOperandUse(CalleeArgIdx); |
883 | } |
884 | |
885 | /// Return the pointer to function that is being called. |
886 | Value *getCalledValue() const { |
887 | if (isDirectCall()) |
888 | return CS.getCalledValue(); |
889 | return CS.getArgOperand(getCallArgOperandNoForCallee()); |
890 | } |
891 | |
892 | /// Return the function being called if this is a direct call, otherwise |
893 | /// return null (if it's an indirect call). |
894 | Function *getCalledFunction() const { |
895 | Value *V = getCalledValue(); |
896 | return V ? dyn_cast<Function>(V->stripPointerCasts()) : nullptr; |
897 | } |
898 | }; |
899 | |
900 | template <> struct DenseMapInfo<CallSite> { |
901 | using BaseInfo = DenseMapInfo<decltype(CallSite::I)>; |
902 | |
903 | static CallSite getEmptyKey() { |
904 | CallSite CS; |
905 | CS.I = BaseInfo::getEmptyKey(); |
906 | return CS; |
907 | } |
908 | |
909 | static CallSite getTombstoneKey() { |
910 | CallSite CS; |
911 | CS.I = BaseInfo::getTombstoneKey(); |
912 | return CS; |
913 | } |
914 | |
915 | static unsigned getHashValue(const CallSite &CS) { |
916 | return BaseInfo::getHashValue(CS.I); |
917 | } |
918 | |
919 | static bool isEqual(const CallSite &LHS, const CallSite &RHS) { |
920 | return LHS == RHS; |
921 | } |
922 | }; |
923 | |
924 | } // end namespace llvm |
925 | |
926 | #endif // LLVM_IR_CALLSITE_H |
1 | //===- llvm/ADT/PointerIntPair.h - Pair for pointer and int -----*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file defines the PointerIntPair class. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #ifndef LLVM_ADT_POINTERINTPAIR_H |
14 | #define LLVM_ADT_POINTERINTPAIR_H |
15 | |
16 | #include "llvm/Support/Compiler.h" |
17 | #include "llvm/Support/PointerLikeTypeTraits.h" |
18 | #include "llvm/Support/type_traits.h" |
19 | #include <cassert> |
20 | #include <cstdint> |
21 | #include <limits> |
22 | |
23 | namespace llvm { |
24 | |
25 | template <typename T> struct DenseMapInfo; |
26 | template <typename PointerT, unsigned IntBits, typename PtrTraits> |
27 | struct PointerIntPairInfo; |
28 | |
29 | /// PointerIntPair - This class implements a pair of a pointer and small |
30 | /// integer. It is designed to represent this in the space required by one |
31 | /// pointer by bitmangling the integer into the low part of the pointer. This |
32 | /// can only be done for small integers: typically up to 3 bits, but it depends |
33 | /// on the number of bits available according to PointerLikeTypeTraits for the |
34 | /// type. |
35 | /// |
36 | /// Note that PointerIntPair always puts the IntVal part in the highest bits |
37 | /// possible. For example, PointerIntPair<void*, 1, bool> will put the bit for |
38 | /// the bool into bit #2, not bit #0, which allows the low two bits to be used |
39 | /// for something else. For example, this allows: |
40 | /// PointerIntPair<PointerIntPair<void*, 1, bool>, 1, bool> |
41 | /// ... and the two bools will land in different bits. |
42 | template <typename PointerTy, unsigned IntBits, typename IntType = unsigned, |
43 | typename PtrTraits = PointerLikeTypeTraits<PointerTy>, |
44 | typename Info = PointerIntPairInfo<PointerTy, IntBits, PtrTraits>> |
45 | class PointerIntPair { |
46 | // Used by MSVC visualizer and generally helpful for debugging/visualizing. |
47 | using InfoTy = Info; |
48 | intptr_t Value = 0; |
49 | |
50 | public: |
51 | constexpr PointerIntPair() = default; |
52 | |
53 | PointerIntPair(PointerTy PtrVal, IntType IntVal) { |
54 | setPointerAndInt(PtrVal, IntVal); |
55 | } |
56 | |
57 | explicit PointerIntPair(PointerTy PtrVal) { initWithPointer(PtrVal); } |
58 | |
59 | PointerTy getPointer() const { return Info::getPointer(Value); } |
60 | |
61 | IntType getInt() const { return (IntType)Info::getInt(Value); } |
62 | |
63 | void setPointer(PointerTy PtrVal) LLVM_LVALUE_FUNCTION& { |
64 | Value = Info::updatePointer(Value, PtrVal); |
65 | } |
66 | |
67 | void setInt(IntType IntVal) LLVM_LVALUE_FUNCTION& { |
68 | Value = Info::updateInt(Value, static_cast<intptr_t>(IntVal)); |
69 | } |
70 | |
71 | void initWithPointer(PointerTy PtrVal) LLVM_LVALUE_FUNCTION& { |
72 | Value = Info::updatePointer(0, PtrVal); |
73 | } |
74 | |
75 | void setPointerAndInt(PointerTy PtrVal, IntType IntVal) LLVM_LVALUE_FUNCTION& { |
76 | Value = Info::updateInt(Info::updatePointer(0, PtrVal), |
77 | static_cast<intptr_t>(IntVal)); |
78 | } |
79 | |
80 | PointerTy const *getAddrOfPointer() const { |
81 | return const_cast<PointerIntPair *>(this)->getAddrOfPointer(); |
82 | } |
83 | |
84 | PointerTy *getAddrOfPointer() { |
85 | assert(Value == reinterpret_cast<intptr_t>(getPointer()) &&((Value == reinterpret_cast<intptr_t>(getPointer()) && "Can only return the address if IntBits is cleared and " "PtrTraits doesn't change the pointer" ) ? static_cast<void> (0) : __assert_fail ("Value == reinterpret_cast<intptr_t>(getPointer()) && \"Can only return the address if IntBits is cleared and \" \"PtrTraits doesn't change the pointer\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/ADT/PointerIntPair.h" , 87, __PRETTY_FUNCTION__)) |
86 | "Can only return the address if IntBits is cleared and "((Value == reinterpret_cast<intptr_t>(getPointer()) && "Can only return the address if IntBits is cleared and " "PtrTraits doesn't change the pointer" ) ? static_cast<void> (0) : __assert_fail ("Value == reinterpret_cast<intptr_t>(getPointer()) && \"Can only return the address if IntBits is cleared and \" \"PtrTraits doesn't change the pointer\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/ADT/PointerIntPair.h" , 87, __PRETTY_FUNCTION__)) |
87 | "PtrTraits doesn't change the pointer")((Value == reinterpret_cast<intptr_t>(getPointer()) && "Can only return the address if IntBits is cleared and " "PtrTraits doesn't change the pointer" ) ? static_cast<void> (0) : __assert_fail ("Value == reinterpret_cast<intptr_t>(getPointer()) && \"Can only return the address if IntBits is cleared and \" \"PtrTraits doesn't change the pointer\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/ADT/PointerIntPair.h" , 87, __PRETTY_FUNCTION__)); |
88 | return reinterpret_cast<PointerTy *>(&Value); |
89 | } |
90 | |
91 | void *getOpaqueValue() const { return reinterpret_cast<void *>(Value); } |
92 | |
93 | void setFromOpaqueValue(void *Val) LLVM_LVALUE_FUNCTION& { |
94 | Value = reinterpret_cast<intptr_t>(Val); |
95 | } |
96 | |
97 | static PointerIntPair getFromOpaqueValue(void *V) { |
98 | PointerIntPair P; |
99 | P.setFromOpaqueValue(V); |
100 | return P; |
101 | } |
102 | |
103 | // Allow PointerIntPairs to be created from const void * if and only if the |
104 | // pointer type could be created from a const void *. |
105 | static PointerIntPair getFromOpaqueValue(const void *V) { |
106 | (void)PtrTraits::getFromVoidPointer(V); |
107 | return getFromOpaqueValue(const_cast<void *>(V)); |
108 | } |
109 | |
110 | bool operator==(const PointerIntPair &RHS) const { |
111 | return Value == RHS.Value; |
112 | } |
113 | |
114 | bool operator!=(const PointerIntPair &RHS) const { |
115 | return Value != RHS.Value; |
116 | } |
117 | |
118 | bool operator<(const PointerIntPair &RHS) const { return Value < RHS.Value; } |
119 | bool operator>(const PointerIntPair &RHS) const { return Value > RHS.Value; } |
120 | |
121 | bool operator<=(const PointerIntPair &RHS) const { |
122 | return Value <= RHS.Value; |
123 | } |
124 | |
125 | bool operator>=(const PointerIntPair &RHS) const { |
126 | return Value >= RHS.Value; |
127 | } |
128 | }; |
129 | |
130 | // Specialize is_trivially_copyable to avoid limitation of llvm::is_trivially_copyable |
131 | // when compiled with gcc 4.9. |
132 | template <typename PointerTy, unsigned IntBits, typename IntType, |
133 | typename PtrTraits, |
134 | typename Info> |
135 | struct is_trivially_copyable<PointerIntPair<PointerTy, IntBits, IntType, PtrTraits, Info>> : std::true_type { |
136 | #ifdef HAVE_STD_IS_TRIVIALLY_COPYABLE |
137 | static_assert(std::is_trivially_copyable<PointerIntPair<PointerTy, IntBits, IntType, PtrTraits, Info>>::value, |
138 | "inconsistent behavior between llvm:: and std:: implementation of is_trivially_copyable"); |
139 | #endif |
140 | }; |
141 | |
142 | |
143 | template <typename PointerT, unsigned IntBits, typename PtrTraits> |
144 | struct PointerIntPairInfo { |
145 | static_assert(PtrTraits::NumLowBitsAvailable < |
146 | std::numeric_limits<uintptr_t>::digits, |
147 | "cannot use a pointer type that has all bits free"); |
148 | static_assert(IntBits <= PtrTraits::NumLowBitsAvailable, |
149 | "PointerIntPair with integer size too large for pointer"); |
150 | enum MaskAndShiftConstants : uintptr_t { |
151 | /// PointerBitMask - The bits that come from the pointer. |
152 | PointerBitMask = |
153 | ~(uintptr_t)(((intptr_t)1 << PtrTraits::NumLowBitsAvailable) - 1), |
154 | |
155 | /// IntShift - The number of low bits that we reserve for other uses, and |
156 | /// keep zero. |
157 | IntShift = (uintptr_t)PtrTraits::NumLowBitsAvailable - IntBits, |
158 | |
159 | /// IntMask - This is the unshifted mask for valid bits of the int type. |
160 | IntMask = (uintptr_t)(((intptr_t)1 << IntBits) - 1), |
161 | |
162 | // ShiftedIntMask - This is the bits for the integer shifted in place. |
163 | ShiftedIntMask = (uintptr_t)(IntMask << IntShift) |
164 | }; |
165 | |
166 | static PointerT getPointer(intptr_t Value) { |
167 | return PtrTraits::getFromVoidPointer( |
168 | reinterpret_cast<void *>(Value & PointerBitMask)); |
169 | } |
170 | |
171 | static intptr_t getInt(intptr_t Value) { |
172 | return (Value >> IntShift) & IntMask; |
173 | } |
174 | |
175 | static intptr_t updatePointer(intptr_t OrigValue, PointerT Ptr) { |
176 | intptr_t PtrWord = |
177 | reinterpret_cast<intptr_t>(PtrTraits::getAsVoidPointer(Ptr)); |
178 | assert((PtrWord & ~PointerBitMask) == 0 &&(((PtrWord & ~PointerBitMask) == 0 && "Pointer is not sufficiently aligned" ) ? static_cast<void> (0) : __assert_fail ("(PtrWord & ~PointerBitMask) == 0 && \"Pointer is not sufficiently aligned\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/ADT/PointerIntPair.h" , 179, __PRETTY_FUNCTION__)) |
179 | "Pointer is not sufficiently aligned")(((PtrWord & ~PointerBitMask) == 0 && "Pointer is not sufficiently aligned" ) ? static_cast<void> (0) : __assert_fail ("(PtrWord & ~PointerBitMask) == 0 && \"Pointer is not sufficiently aligned\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/ADT/PointerIntPair.h" , 179, __PRETTY_FUNCTION__)); |
180 | // Preserve all low bits, just update the pointer. |
181 | return PtrWord | (OrigValue & ~PointerBitMask); |
182 | } |
183 | |
184 | static intptr_t updateInt(intptr_t OrigValue, intptr_t Int) { |
185 | intptr_t IntWord = static_cast<intptr_t>(Int); |
186 | assert((IntWord & ~IntMask) == 0 && "Integer too large for field")(((IntWord & ~IntMask) == 0 && "Integer too large for field" ) ? static_cast<void> (0) : __assert_fail ("(IntWord & ~IntMask) == 0 && \"Integer too large for field\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/ADT/PointerIntPair.h" , 186, __PRETTY_FUNCTION__)); |
187 | |
188 | // Preserve all bits other than the ones we are updating. |
189 | return (OrigValue & ~ShiftedIntMask) | IntWord << IntShift; |
190 | } |
191 | }; |
192 | |
193 | // Provide specialization of DenseMapInfo for PointerIntPair. |
194 | template <typename PointerTy, unsigned IntBits, typename IntType> |
195 | struct DenseMapInfo<PointerIntPair<PointerTy, IntBits, IntType>> { |
196 | using Ty = PointerIntPair<PointerTy, IntBits, IntType>; |
197 | |
198 | static Ty getEmptyKey() { |
199 | uintptr_t Val = static_cast<uintptr_t>(-1); |
200 | Val <<= PointerLikeTypeTraits<Ty>::NumLowBitsAvailable; |
201 | return Ty::getFromOpaqueValue(reinterpret_cast<void *>(Val)); |
202 | } |
203 | |
204 | static Ty getTombstoneKey() { |
205 | uintptr_t Val = static_cast<uintptr_t>(-2); |
206 | Val <<= PointerLikeTypeTraits<PointerTy>::NumLowBitsAvailable; |
207 | return Ty::getFromOpaqueValue(reinterpret_cast<void *>(Val)); |
208 | } |
209 | |
210 | static unsigned getHashValue(Ty V) { |
211 | uintptr_t IV = reinterpret_cast<uintptr_t>(V.getOpaqueValue()); |
212 | return unsigned(IV) ^ unsigned(IV >> 9); |
213 | } |
214 | |
215 | static bool isEqual(const Ty &LHS, const Ty &RHS) { return LHS == RHS; } |
216 | }; |
217 | |
218 | // Teach SmallPtrSet that PointerIntPair is "basically a pointer". |
219 | template <typename PointerTy, unsigned IntBits, typename IntType, |
220 | typename PtrTraits> |
221 | struct PointerLikeTypeTraits< |
222 | PointerIntPair<PointerTy, IntBits, IntType, PtrTraits>> { |
223 | static inline void * |
224 | getAsVoidPointer(const PointerIntPair<PointerTy, IntBits, IntType> &P) { |
225 | return P.getOpaqueValue(); |
226 | } |
227 | |
228 | static inline PointerIntPair<PointerTy, IntBits, IntType> |
229 | getFromVoidPointer(void *P) { |
230 | return PointerIntPair<PointerTy, IntBits, IntType>::getFromOpaqueValue(P); |
231 | } |
232 | |
233 | static inline PointerIntPair<PointerTy, IntBits, IntType> |
234 | getFromVoidPointer(const void *P) { |
235 | return PointerIntPair<PointerTy, IntBits, IntType>::getFromOpaqueValue(P); |
236 | } |
237 | |
238 | static constexpr int NumLowBitsAvailable = |
239 | PtrTraits::NumLowBitsAvailable - IntBits; |
240 | }; |
241 | |
242 | } // end namespace llvm |
243 | |
244 | #endif // LLVM_ADT_POINTERINTPAIR_H |
1 | //===- llvm/Support/PointerLikeTypeTraits.h - Pointer Traits ----*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file defines the PointerLikeTypeTraits class. This allows data |
10 | // structures to reason about pointers and other things that are pointer sized. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #ifndef LLVM_SUPPORT_POINTERLIKETYPETRAITS_H |
15 | #define LLVM_SUPPORT_POINTERLIKETYPETRAITS_H |
16 | |
17 | #include "llvm/Support/DataTypes.h" |
18 | #include <assert.h> |
19 | #include <type_traits> |
20 | |
21 | namespace llvm { |
22 | |
23 | /// A traits type that is used to handle pointer types and things that are just |
24 | /// wrappers for pointers as a uniform entity. |
25 | template <typename T> struct PointerLikeTypeTraits; |
26 | |
27 | namespace detail { |
28 | /// A tiny meta function to compute the log2 of a compile time constant. |
29 | template <size_t N> |
30 | struct ConstantLog2 |
31 | : std::integral_constant<size_t, ConstantLog2<N / 2>::value + 1> {}; |
32 | template <> struct ConstantLog2<1> : std::integral_constant<size_t, 0> {}; |
33 | |
34 | // Provide a trait to check if T is pointer-like. |
35 | template <typename T, typename U = void> struct HasPointerLikeTypeTraits { |
36 | static const bool value = false; |
37 | }; |
38 | |
39 | // sizeof(T) is valid only for a complete T. |
40 | template <typename T> struct HasPointerLikeTypeTraits< |
41 | T, decltype((sizeof(PointerLikeTypeTraits<T>) + sizeof(T)), void())> { |
42 | static const bool value = true; |
43 | }; |
44 | |
45 | template <typename T> struct IsPointerLike { |
46 | static const bool value = HasPointerLikeTypeTraits<T>::value; |
47 | }; |
48 | |
49 | template <typename T> struct IsPointerLike<T *> { |
50 | static const bool value = true; |
51 | }; |
52 | } // namespace detail |
53 | |
54 | // Provide PointerLikeTypeTraits for non-cvr pointers. |
55 | template <typename T> struct PointerLikeTypeTraits<T *> { |
56 | static inline void *getAsVoidPointer(T *P) { return P; } |
57 | static inline T *getFromVoidPointer(void *P) { return static_cast<T *>(P); } |
58 | |
59 | static constexpr int NumLowBitsAvailable = |
60 | detail::ConstantLog2<alignof(T)>::value; |
61 | }; |
62 | |
63 | template <> struct PointerLikeTypeTraits<void *> { |
64 | static inline void *getAsVoidPointer(void *P) { return P; } |
65 | static inline void *getFromVoidPointer(void *P) { return P; } |
66 | |
67 | /// Note, we assume here that void* is related to raw malloc'ed memory and |
68 | /// that malloc returns objects at least 4-byte aligned. However, this may be |
69 | /// wrong, or pointers may be from something other than malloc. In this case, |
70 | /// you should specify a real typed pointer or avoid this template. |
71 | /// |
72 | /// All clients should use assertions to do a run-time check to ensure that |
73 | /// this is actually true. |
74 | static constexpr int NumLowBitsAvailable = 2; |
75 | }; |
76 | |
77 | // Provide PointerLikeTypeTraits for const things. |
78 | template <typename T> struct PointerLikeTypeTraits<const T> { |
79 | typedef PointerLikeTypeTraits<T> NonConst; |
80 | |
81 | static inline const void *getAsVoidPointer(const T P) { |
82 | return NonConst::getAsVoidPointer(P); |
83 | } |
84 | static inline const T getFromVoidPointer(const void *P) { |
85 | return NonConst::getFromVoidPointer(const_cast<void *>(P)); |
86 | } |
87 | static constexpr int NumLowBitsAvailable = NonConst::NumLowBitsAvailable; |
88 | }; |
89 | |
90 | // Provide PointerLikeTypeTraits for const pointers. |
91 | template <typename T> struct PointerLikeTypeTraits<const T *> { |
92 | typedef PointerLikeTypeTraits<T *> NonConst; |
93 | |
94 | static inline const void *getAsVoidPointer(const T *P) { |
95 | return NonConst::getAsVoidPointer(const_cast<T *>(P)); |
96 | } |
97 | static inline const T *getFromVoidPointer(const void *P) { |
98 | return NonConst::getFromVoidPointer(const_cast<void *>(P)); |
99 | } |
100 | static constexpr int NumLowBitsAvailable = NonConst::NumLowBitsAvailable; |
101 | }; |
102 | |
103 | // Provide PointerLikeTypeTraits for uintptr_t. |
104 | template <> struct PointerLikeTypeTraits<uintptr_t> { |
105 | static inline void *getAsVoidPointer(uintptr_t P) { |
106 | return reinterpret_cast<void *>(P); |
107 | } |
108 | static inline uintptr_t getFromVoidPointer(void *P) { |
109 | return reinterpret_cast<uintptr_t>(P); |
110 | } |
111 | // No bits are available! |
112 | static constexpr int NumLowBitsAvailable = 0; |
113 | }; |
114 | |
115 | /// Provide suitable custom traits struct for function pointers. |
116 | /// |
117 | /// Function pointers can't be directly given these traits as functions can't |
118 | /// have their alignment computed with `alignof` and we need different casting. |
119 | /// |
120 | /// To rely on higher alignment for a specialized use, you can provide a |
121 | /// customized form of this template explicitly with higher alignment, and |
122 | /// potentially use alignment attributes on functions to satisfy that. |
123 | template <int Alignment, typename FunctionPointerT> |
124 | struct FunctionPointerLikeTypeTraits { |
125 | static constexpr int NumLowBitsAvailable = |
126 | detail::ConstantLog2<Alignment>::value; |
127 | static inline void *getAsVoidPointer(FunctionPointerT P) { |
128 | assert((reinterpret_cast<uintptr_t>(P) &(((reinterpret_cast<uintptr_t>(P) & ~((uintptr_t)-1 << NumLowBitsAvailable)) == 0 && "Alignment not satisfied for an actual function pointer!" ) ? static_cast<void> (0) : __assert_fail ("(reinterpret_cast<uintptr_t>(P) & ~((uintptr_t)-1 << NumLowBitsAvailable)) == 0 && \"Alignment not satisfied for an actual function pointer!\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/Support/PointerLikeTypeTraits.h" , 130, __PRETTY_FUNCTION__)) |
129 | ~((uintptr_t)-1 << NumLowBitsAvailable)) == 0 &&(((reinterpret_cast<uintptr_t>(P) & ~((uintptr_t)-1 << NumLowBitsAvailable)) == 0 && "Alignment not satisfied for an actual function pointer!" ) ? static_cast<void> (0) : __assert_fail ("(reinterpret_cast<uintptr_t>(P) & ~((uintptr_t)-1 << NumLowBitsAvailable)) == 0 && \"Alignment not satisfied for an actual function pointer!\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/Support/PointerLikeTypeTraits.h" , 130, __PRETTY_FUNCTION__)) |
130 | "Alignment not satisfied for an actual function pointer!")(((reinterpret_cast<uintptr_t>(P) & ~((uintptr_t)-1 << NumLowBitsAvailable)) == 0 && "Alignment not satisfied for an actual function pointer!" ) ? static_cast<void> (0) : __assert_fail ("(reinterpret_cast<uintptr_t>(P) & ~((uintptr_t)-1 << NumLowBitsAvailable)) == 0 && \"Alignment not satisfied for an actual function pointer!\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/Support/PointerLikeTypeTraits.h" , 130, __PRETTY_FUNCTION__)); |
131 | return reinterpret_cast<void *>(P); |
132 | } |
133 | static inline FunctionPointerT getFromVoidPointer(void *P) { |
134 | return reinterpret_cast<FunctionPointerT>(P); |
135 | } |
136 | }; |
137 | |
138 | /// Provide a default specialization for function pointers that assumes 4-byte |
139 | /// alignment. |
140 | /// |
141 | /// We assume here that functions used with this are always at least 4-byte |
142 | /// aligned. This means that, for example, thumb functions won't work or systems |
143 | /// with weird unaligned function pointers won't work. But all practical systems |
144 | /// we support satisfy this requirement. |
145 | template <typename ReturnT, typename... ParamTs> |
146 | struct PointerLikeTypeTraits<ReturnT (*)(ParamTs...)> |
147 | : FunctionPointerLikeTypeTraits<4, ReturnT (*)(ParamTs...)> {}; |
148 | |
149 | } // end namespace llvm |
150 | |
151 | #endif |
1 | //===-- llvm/Operator.h - Operator utility subclass -------------*- C++ -*-===// | ||||||||||||||
2 | // | ||||||||||||||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||||||||||||||
4 | // See https://llvm.org/LICENSE.txt for license information. | ||||||||||||||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||||||||||||||
6 | // | ||||||||||||||
7 | //===----------------------------------------------------------------------===// | ||||||||||||||
8 | // | ||||||||||||||
9 | // This file defines various classes for working with Instructions and | ||||||||||||||
10 | // ConstantExprs. | ||||||||||||||
11 | // | ||||||||||||||
12 | //===----------------------------------------------------------------------===// | ||||||||||||||
13 | |||||||||||||||
14 | #ifndef LLVM_IR_OPERATOR_H | ||||||||||||||
15 | #define LLVM_IR_OPERATOR_H | ||||||||||||||
16 | |||||||||||||||
17 | #include "llvm/ADT/None.h" | ||||||||||||||
18 | #include "llvm/ADT/Optional.h" | ||||||||||||||
19 | #include "llvm/IR/Constants.h" | ||||||||||||||
20 | #include "llvm/IR/Instruction.h" | ||||||||||||||
21 | #include "llvm/IR/Type.h" | ||||||||||||||
22 | #include "llvm/IR/Value.h" | ||||||||||||||
23 | #include "llvm/Support/Casting.h" | ||||||||||||||
24 | #include <cstddef> | ||||||||||||||
25 | |||||||||||||||
26 | namespace llvm { | ||||||||||||||
27 | |||||||||||||||
28 | /// This is a utility class that provides an abstraction for the common | ||||||||||||||
29 | /// functionality between Instructions and ConstantExprs. | ||||||||||||||
30 | class Operator : public User { | ||||||||||||||
31 | public: | ||||||||||||||
32 | // The Operator class is intended to be used as a utility, and is never itself | ||||||||||||||
33 | // instantiated. | ||||||||||||||
34 | Operator() = delete; | ||||||||||||||
35 | ~Operator() = delete; | ||||||||||||||
36 | |||||||||||||||
37 | void *operator new(size_t s) = delete; | ||||||||||||||
38 | |||||||||||||||
39 | /// Return the opcode for this Instruction or ConstantExpr. | ||||||||||||||
40 | unsigned getOpcode() const { | ||||||||||||||
41 | if (const Instruction *I = dyn_cast<Instruction>(this)) | ||||||||||||||
42 | return I->getOpcode(); | ||||||||||||||
43 | return cast<ConstantExpr>(this)->getOpcode(); | ||||||||||||||
44 | } | ||||||||||||||
45 | |||||||||||||||
46 | /// If V is an Instruction or ConstantExpr, return its opcode. | ||||||||||||||
47 | /// Otherwise return UserOp1. | ||||||||||||||
48 | static unsigned getOpcode(const Value *V) { | ||||||||||||||
49 | if (const Instruction *I
| ||||||||||||||
50 | return I->getOpcode(); | ||||||||||||||
51 | if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) | ||||||||||||||
52 | return CE->getOpcode(); | ||||||||||||||
53 | return Instruction::UserOp1; | ||||||||||||||
54 | } | ||||||||||||||
55 | |||||||||||||||
56 | static bool classof(const Instruction *) { return true; } | ||||||||||||||
57 | static bool classof(const ConstantExpr *) { return true; } | ||||||||||||||
58 | static bool classof(const Value *V) { | ||||||||||||||
59 | return isa<Instruction>(V) || isa<ConstantExpr>(V); | ||||||||||||||
60 | } | ||||||||||||||
61 | }; | ||||||||||||||
62 | |||||||||||||||
63 | /// Utility class for integer operators which may exhibit overflow - Add, Sub, | ||||||||||||||
64 | /// Mul, and Shl. It does not include SDiv, despite that operator having the | ||||||||||||||
65 | /// potential for overflow. | ||||||||||||||
66 | class OverflowingBinaryOperator : public Operator { | ||||||||||||||
67 | public: | ||||||||||||||
68 | enum { | ||||||||||||||
69 | AnyWrap = 0, | ||||||||||||||
70 | NoUnsignedWrap = (1 << 0), | ||||||||||||||
71 | NoSignedWrap = (1 << 1) | ||||||||||||||
72 | }; | ||||||||||||||
73 | |||||||||||||||
74 | private: | ||||||||||||||
75 | friend class Instruction; | ||||||||||||||
76 | friend class ConstantExpr; | ||||||||||||||
77 | |||||||||||||||
78 | void setHasNoUnsignedWrap(bool B) { | ||||||||||||||
79 | SubclassOptionalData = | ||||||||||||||
80 | (SubclassOptionalData & ~NoUnsignedWrap) | (B * NoUnsignedWrap); | ||||||||||||||
81 | } | ||||||||||||||
82 | void setHasNoSignedWrap(bool B) { | ||||||||||||||
83 | SubclassOptionalData = | ||||||||||||||
84 | (SubclassOptionalData & ~NoSignedWrap) | (B * NoSignedWrap); | ||||||||||||||
85 | } | ||||||||||||||
86 | |||||||||||||||
87 | public: | ||||||||||||||
88 | /// Test whether this operation is known to never | ||||||||||||||
89 | /// undergo unsigned overflow, aka the nuw property. | ||||||||||||||
90 | bool hasNoUnsignedWrap() const { | ||||||||||||||
91 | return SubclassOptionalData & NoUnsignedWrap; | ||||||||||||||
92 | } | ||||||||||||||
93 | |||||||||||||||
94 | /// Test whether this operation is known to never | ||||||||||||||
95 | /// undergo signed overflow, aka the nsw property. | ||||||||||||||
96 | bool hasNoSignedWrap() const { | ||||||||||||||
97 | return (SubclassOptionalData & NoSignedWrap) != 0; | ||||||||||||||
98 | } | ||||||||||||||
99 | |||||||||||||||
100 | static bool classof(const Instruction *I) { | ||||||||||||||
101 | return I->getOpcode() == Instruction::Add || | ||||||||||||||
102 | I->getOpcode() == Instruction::Sub || | ||||||||||||||
103 | I->getOpcode() == Instruction::Mul || | ||||||||||||||
104 | I->getOpcode() == Instruction::Shl; | ||||||||||||||
105 | } | ||||||||||||||
106 | static bool classof(const ConstantExpr *CE) { | ||||||||||||||
107 | return CE->getOpcode() == Instruction::Add || | ||||||||||||||
108 | CE->getOpcode() == Instruction::Sub || | ||||||||||||||
109 | CE->getOpcode() == Instruction::Mul || | ||||||||||||||
110 | CE->getOpcode() == Instruction::Shl; | ||||||||||||||
111 | } | ||||||||||||||
112 | static bool classof(const Value *V) { | ||||||||||||||
113 | return (isa<Instruction>(V) && classof(cast<Instruction>(V))) || | ||||||||||||||
114 | (isa<ConstantExpr>(V) && classof(cast<ConstantExpr>(V))); | ||||||||||||||
115 | } | ||||||||||||||
116 | }; | ||||||||||||||
117 | |||||||||||||||
118 | /// A udiv or sdiv instruction, which can be marked as "exact", | ||||||||||||||
119 | /// indicating that no bits are destroyed. | ||||||||||||||
120 | class PossiblyExactOperator : public Operator { | ||||||||||||||
121 | public: | ||||||||||||||
122 | enum { | ||||||||||||||
123 | IsExact = (1 << 0) | ||||||||||||||
124 | }; | ||||||||||||||
125 | |||||||||||||||
126 | private: | ||||||||||||||
127 | friend class Instruction; | ||||||||||||||
128 | friend class ConstantExpr; | ||||||||||||||
129 | |||||||||||||||
130 | void setIsExact(bool B) { | ||||||||||||||
131 | SubclassOptionalData = (SubclassOptionalData & ~IsExact) | (B * IsExact); | ||||||||||||||
132 | } | ||||||||||||||
133 | |||||||||||||||
134 | public: | ||||||||||||||
135 | /// Test whether this division is known to be exact, with zero remainder. | ||||||||||||||
136 | bool isExact() const { | ||||||||||||||
137 | return SubclassOptionalData & IsExact; | ||||||||||||||
138 | } | ||||||||||||||
139 | |||||||||||||||
140 | static bool isPossiblyExactOpcode(unsigned OpC) { | ||||||||||||||
141 | return OpC == Instruction::SDiv || | ||||||||||||||
142 | OpC == Instruction::UDiv || | ||||||||||||||
143 | OpC == Instruction::AShr || | ||||||||||||||
144 | OpC == Instruction::LShr; | ||||||||||||||
145 | } | ||||||||||||||
146 | |||||||||||||||
147 | static bool classof(const ConstantExpr *CE) { | ||||||||||||||
148 | return isPossiblyExactOpcode(CE->getOpcode()); | ||||||||||||||
149 | } | ||||||||||||||
150 | static bool classof(const Instruction *I) { | ||||||||||||||
151 | return isPossiblyExactOpcode(I->getOpcode()); | ||||||||||||||
152 | } | ||||||||||||||
153 | static bool classof(const Value *V) { | ||||||||||||||
154 | return (isa<Instruction>(V) && classof(cast<Instruction>(V))) || | ||||||||||||||
155 | (isa<ConstantExpr>(V) && classof(cast<ConstantExpr>(V))); | ||||||||||||||
156 | } | ||||||||||||||
157 | }; | ||||||||||||||
158 | |||||||||||||||
159 | /// Convenience struct for specifying and reasoning about fast-math flags. | ||||||||||||||
160 | class FastMathFlags { | ||||||||||||||
161 | private: | ||||||||||||||
162 | friend class FPMathOperator; | ||||||||||||||
163 | |||||||||||||||
164 | unsigned Flags = 0; | ||||||||||||||
165 | |||||||||||||||
166 | FastMathFlags(unsigned F) { | ||||||||||||||
167 | // If all 7 bits are set, turn this into -1. If the number of bits grows, | ||||||||||||||
168 | // this must be updated. This is intended to provide some forward binary | ||||||||||||||
169 | // compatibility insurance for the meaning of 'fast' in case bits are added. | ||||||||||||||
170 | if (F == 0x7F) Flags = ~0U; | ||||||||||||||
171 | else Flags = F; | ||||||||||||||
172 | } | ||||||||||||||
173 | |||||||||||||||
174 | public: | ||||||||||||||
175 | // This is how the bits are used in Value::SubclassOptionalData so they | ||||||||||||||
176 | // should fit there too. | ||||||||||||||
177 | // WARNING: We're out of space. SubclassOptionalData only has 7 bits. New | ||||||||||||||
178 | // functionality will require a change in how this information is stored. | ||||||||||||||
179 | enum { | ||||||||||||||
180 | AllowReassoc = (1 << 0), | ||||||||||||||
181 | NoNaNs = (1 << 1), | ||||||||||||||
182 | NoInfs = (1 << 2), | ||||||||||||||
183 | NoSignedZeros = (1 << 3), | ||||||||||||||
184 | AllowReciprocal = (1 << 4), | ||||||||||||||
185 | AllowContract = (1 << 5), | ||||||||||||||
186 | ApproxFunc = (1 << 6) | ||||||||||||||
187 | }; | ||||||||||||||
188 | |||||||||||||||
189 | FastMathFlags() = default; | ||||||||||||||
190 | |||||||||||||||
191 | static FastMathFlags getFast() { | ||||||||||||||
192 | FastMathFlags FMF; | ||||||||||||||
193 | FMF.setFast(); | ||||||||||||||
194 | return FMF; | ||||||||||||||
195 | } | ||||||||||||||
196 | |||||||||||||||
197 | bool any() const { return Flags != 0; } | ||||||||||||||
198 | bool none() const { return Flags == 0; } | ||||||||||||||
199 | bool all() const { return Flags == ~0U; } | ||||||||||||||
200 | |||||||||||||||
201 | void clear() { Flags = 0; } | ||||||||||||||
202 | void set() { Flags = ~0U; } | ||||||||||||||
203 | |||||||||||||||
204 | /// Flag queries | ||||||||||||||
205 | bool allowReassoc() const { return 0 != (Flags & AllowReassoc); } | ||||||||||||||
206 | bool noNaNs() const { return 0 != (Flags & NoNaNs); } | ||||||||||||||
207 | bool noInfs() const { return 0 != (Flags & NoInfs); } | ||||||||||||||
208 | bool noSignedZeros() const { return 0 != (Flags & NoSignedZeros); } | ||||||||||||||
209 | bool allowReciprocal() const { return 0 != (Flags & AllowReciprocal); } | ||||||||||||||
210 | bool allowContract() const { return 0 != (Flags & AllowContract); } | ||||||||||||||
211 | bool approxFunc() const { return 0 != (Flags & ApproxFunc); } | ||||||||||||||
212 | /// 'Fast' means all bits are set. | ||||||||||||||
213 | bool isFast() const { return all(); } | ||||||||||||||
214 | |||||||||||||||
215 | /// Flag setters | ||||||||||||||
216 | void setAllowReassoc(bool B = true) { | ||||||||||||||
217 | Flags = (Flags & ~AllowReassoc) | B * AllowReassoc; | ||||||||||||||
218 | } | ||||||||||||||
219 | void setNoNaNs(bool B = true) { | ||||||||||||||
220 | Flags = (Flags & ~NoNaNs) | B * NoNaNs; | ||||||||||||||
221 | } | ||||||||||||||
222 | void setNoInfs(bool B = true) { | ||||||||||||||
223 | Flags = (Flags & ~NoInfs) | B * NoInfs; | ||||||||||||||
224 | } | ||||||||||||||
225 | void setNoSignedZeros(bool B = true) { | ||||||||||||||
226 | Flags = (Flags & ~NoSignedZeros) | B * NoSignedZeros; | ||||||||||||||
227 | } | ||||||||||||||
228 | void setAllowReciprocal(bool B = true) { | ||||||||||||||
229 | Flags = (Flags & ~AllowReciprocal) | B * AllowReciprocal; | ||||||||||||||
230 | } | ||||||||||||||
231 | void setAllowContract(bool B = true) { | ||||||||||||||
232 | Flags = (Flags & ~AllowContract) | B * AllowContract; | ||||||||||||||
233 | } | ||||||||||||||
234 | void setApproxFunc(bool B = true) { | ||||||||||||||
235 | Flags = (Flags & ~ApproxFunc) | B * ApproxFunc; | ||||||||||||||
236 | } | ||||||||||||||
237 | void setFast(bool B = true) { B ? set() : clear(); } | ||||||||||||||
238 | |||||||||||||||
239 | void operator&=(const FastMathFlags &OtherFlags) { | ||||||||||||||
240 | Flags &= OtherFlags.Flags; | ||||||||||||||
241 | } | ||||||||||||||
242 | }; | ||||||||||||||
243 | |||||||||||||||
244 | /// Utility class for floating point operations which can have | ||||||||||||||
245 | /// information about relaxed accuracy requirements attached to them. | ||||||||||||||
246 | class FPMathOperator : public Operator { | ||||||||||||||
247 | private: | ||||||||||||||
248 | friend class Instruction; | ||||||||||||||
249 | |||||||||||||||
250 | /// 'Fast' means all bits are set. | ||||||||||||||
251 | void setFast(bool B) { | ||||||||||||||
252 | setHasAllowReassoc(B); | ||||||||||||||
253 | setHasNoNaNs(B); | ||||||||||||||
254 | setHasNoInfs(B); | ||||||||||||||
255 | setHasNoSignedZeros(B); | ||||||||||||||
256 | setHasAllowReciprocal(B); | ||||||||||||||
257 | setHasAllowContract(B); | ||||||||||||||
258 | setHasApproxFunc(B); | ||||||||||||||
259 | } | ||||||||||||||
260 | |||||||||||||||
261 | void setHasAllowReassoc(bool B) { | ||||||||||||||
262 | SubclassOptionalData = | ||||||||||||||
263 | (SubclassOptionalData & ~FastMathFlags::AllowReassoc) | | ||||||||||||||
264 | (B * FastMathFlags::AllowReassoc); | ||||||||||||||
265 | } | ||||||||||||||
266 | |||||||||||||||
267 | void setHasNoNaNs(bool B) { | ||||||||||||||
268 | SubclassOptionalData = | ||||||||||||||
269 | (SubclassOptionalData & ~FastMathFlags::NoNaNs) | | ||||||||||||||
270 | (B * FastMathFlags::NoNaNs); | ||||||||||||||
271 | } | ||||||||||||||
272 | |||||||||||||||
273 | void setHasNoInfs(bool B) { | ||||||||||||||
274 | SubclassOptionalData = | ||||||||||||||
275 | (SubclassOptionalData & ~FastMathFlags::NoInfs) | | ||||||||||||||
276 | (B * FastMathFlags::NoInfs); | ||||||||||||||
277 | } | ||||||||||||||
278 | |||||||||||||||
279 | void setHasNoSignedZeros(bool B) { | ||||||||||||||
280 | SubclassOptionalData = | ||||||||||||||
281 | (SubclassOptionalData & ~FastMathFlags::NoSignedZeros) | | ||||||||||||||
282 | (B * FastMathFlags::NoSignedZeros); | ||||||||||||||
283 | } | ||||||||||||||
284 | |||||||||||||||
285 | void setHasAllowReciprocal(bool B) { | ||||||||||||||
286 | SubclassOptionalData = | ||||||||||||||
287 | (SubclassOptionalData & ~FastMathFlags::AllowReciprocal) | | ||||||||||||||
288 | (B * FastMathFlags::AllowReciprocal); | ||||||||||||||
289 | } | ||||||||||||||
290 | |||||||||||||||
291 | void setHasAllowContract(bool B) { | ||||||||||||||
292 | SubclassOptionalData = | ||||||||||||||
293 | (SubclassOptionalData & ~FastMathFlags::AllowContract) | | ||||||||||||||
294 | (B * FastMathFlags::AllowContract); | ||||||||||||||
295 | } | ||||||||||||||
296 | |||||||||||||||
297 | void setHasApproxFunc(bool B) { | ||||||||||||||
298 | SubclassOptionalData = | ||||||||||||||
299 | (SubclassOptionalData & ~FastMathFlags::ApproxFunc) | | ||||||||||||||
300 | (B * FastMathFlags::ApproxFunc); | ||||||||||||||
301 | } | ||||||||||||||
302 | |||||||||||||||
303 | /// Convenience function for setting multiple fast-math flags. | ||||||||||||||
304 | /// FMF is a mask of the bits to set. | ||||||||||||||
305 | void setFastMathFlags(FastMathFlags FMF) { | ||||||||||||||
306 | SubclassOptionalData |= FMF.Flags; | ||||||||||||||
307 | } | ||||||||||||||
308 | |||||||||||||||
309 | /// Convenience function for copying all fast-math flags. | ||||||||||||||
310 | /// All values in FMF are transferred to this operator. | ||||||||||||||
311 | void copyFastMathFlags(FastMathFlags FMF) { | ||||||||||||||
312 | SubclassOptionalData = FMF.Flags; | ||||||||||||||
313 | } | ||||||||||||||
314 | |||||||||||||||
315 | public: | ||||||||||||||
316 | /// Test if this operation allows all non-strict floating-point transforms. | ||||||||||||||
317 | bool isFast() const { | ||||||||||||||
318 | return ((SubclassOptionalData & FastMathFlags::AllowReassoc) != 0 && | ||||||||||||||
319 | (SubclassOptionalData & FastMathFlags::NoNaNs) != 0 && | ||||||||||||||
320 | (SubclassOptionalData & FastMathFlags::NoInfs) != 0 && | ||||||||||||||
321 | (SubclassOptionalData & FastMathFlags::NoSignedZeros) != 0 && | ||||||||||||||
322 | (SubclassOptionalData & FastMathFlags::AllowReciprocal) != 0 && | ||||||||||||||
323 | (SubclassOptionalData & FastMathFlags::AllowContract) != 0 && | ||||||||||||||
324 | (SubclassOptionalData & FastMathFlags::ApproxFunc) != 0); | ||||||||||||||
325 | } | ||||||||||||||
326 | |||||||||||||||
327 | /// Test if this operation may be simplified with reassociative transforms. | ||||||||||||||
328 | bool hasAllowReassoc() const { | ||||||||||||||
329 | return (SubclassOptionalData & FastMathFlags::AllowReassoc) != 0; | ||||||||||||||
330 | } | ||||||||||||||
331 | |||||||||||||||
332 | /// Test if this operation's arguments and results are assumed not-NaN. | ||||||||||||||
333 | bool hasNoNaNs() const { | ||||||||||||||
334 | return (SubclassOptionalData & FastMathFlags::NoNaNs) != 0; | ||||||||||||||
335 | } | ||||||||||||||
336 | |||||||||||||||
337 | /// Test if this operation's arguments and results are assumed not-infinite. | ||||||||||||||
338 | bool hasNoInfs() const { | ||||||||||||||
339 | return (SubclassOptionalData & FastMathFlags::NoInfs) != 0; | ||||||||||||||
340 | } | ||||||||||||||
341 | |||||||||||||||
342 | /// Test if this operation can ignore the sign of zero. | ||||||||||||||
343 | bool hasNoSignedZeros() const { | ||||||||||||||
344 | return (SubclassOptionalData & FastMathFlags::NoSignedZeros) != 0; | ||||||||||||||
345 | } | ||||||||||||||
346 | |||||||||||||||
347 | /// Test if this operation can use reciprocal multiply instead of division. | ||||||||||||||
348 | bool hasAllowReciprocal() const { | ||||||||||||||
349 | return (SubclassOptionalData & FastMathFlags::AllowReciprocal) != 0; | ||||||||||||||
350 | } | ||||||||||||||
351 | |||||||||||||||
352 | /// Test if this operation can be floating-point contracted (FMA). | ||||||||||||||
353 | bool hasAllowContract() const { | ||||||||||||||
354 | return (SubclassOptionalData & FastMathFlags::AllowContract) != 0; | ||||||||||||||
355 | } | ||||||||||||||
356 | |||||||||||||||
357 | /// Test if this operation allows approximations of math library functions or | ||||||||||||||
358 | /// intrinsics. | ||||||||||||||
359 | bool hasApproxFunc() const { | ||||||||||||||
360 | return (SubclassOptionalData & FastMathFlags::ApproxFunc) != 0; | ||||||||||||||
361 | } | ||||||||||||||
362 | |||||||||||||||
363 | /// Convenience function for getting all the fast-math flags | ||||||||||||||
364 | FastMathFlags getFastMathFlags() const { | ||||||||||||||
365 | return FastMathFlags(SubclassOptionalData); | ||||||||||||||
366 | } | ||||||||||||||
367 | |||||||||||||||
368 | /// Get the maximum error permitted by this operation in ULPs. An accuracy of | ||||||||||||||
369 | /// 0.0 means that the operation should be performed with the default | ||||||||||||||
370 | /// precision. | ||||||||||||||
371 | float getFPAccuracy() const; | ||||||||||||||
372 | |||||||||||||||
373 | static bool classof(const Value *V) { | ||||||||||||||
374 | unsigned Opcode; | ||||||||||||||
375 | if (auto *I = dyn_cast<Instruction>(V)) | ||||||||||||||
376 | Opcode = I->getOpcode(); | ||||||||||||||
377 | else if (auto *CE = dyn_cast<ConstantExpr>(V)) | ||||||||||||||
378 | Opcode = CE->getOpcode(); | ||||||||||||||
379 | else | ||||||||||||||
380 | return false; | ||||||||||||||
381 | |||||||||||||||
382 | switch (Opcode) { | ||||||||||||||
383 | case Instruction::FNeg: | ||||||||||||||
384 | case Instruction::FAdd: | ||||||||||||||
385 | case Instruction::FSub: | ||||||||||||||
386 | case Instruction::FMul: | ||||||||||||||
387 | case Instruction::FDiv: | ||||||||||||||
388 | case Instruction::FRem: | ||||||||||||||
389 | // FIXME: To clean up and correct the semantics of fast-math-flags, FCmp | ||||||||||||||
390 | // should not be treated as a math op, but the other opcodes should. | ||||||||||||||
391 | // This would make things consistent with Select/PHI (FP value type | ||||||||||||||
392 | // determines whether they are math ops and, therefore, capable of | ||||||||||||||
393 | // having fast-math-flags). | ||||||||||||||
394 | case Instruction::FCmp: | ||||||||||||||
395 | return true; | ||||||||||||||
396 | case Instruction::PHI: | ||||||||||||||
397 | case Instruction::Select: | ||||||||||||||
398 | case Instruction::Call: { | ||||||||||||||
399 | Type *Ty = V->getType(); | ||||||||||||||
400 | while (ArrayType *ArrTy = dyn_cast<ArrayType>(Ty)) | ||||||||||||||
401 | Ty = ArrTy->getElementType(); | ||||||||||||||
402 | return Ty->isFPOrFPVectorTy(); | ||||||||||||||
403 | } | ||||||||||||||
404 | default: | ||||||||||||||
405 | return false; | ||||||||||||||
406 | } | ||||||||||||||
407 | } | ||||||||||||||
408 | }; | ||||||||||||||
409 | |||||||||||||||
410 | /// A helper template for defining operators for individual opcodes. | ||||||||||||||
411 | template<typename SuperClass, unsigned Opc> | ||||||||||||||
412 | class ConcreteOperator : public SuperClass { | ||||||||||||||
413 | public: | ||||||||||||||
414 | static bool classof(const Instruction *I) { | ||||||||||||||
415 | return I->getOpcode() == Opc; | ||||||||||||||
416 | } | ||||||||||||||
417 | static bool classof(const ConstantExpr *CE) { | ||||||||||||||
418 | return CE->getOpcode() == Opc; | ||||||||||||||
419 | } | ||||||||||||||
420 | static bool classof(const Value *V) { | ||||||||||||||
421 | return (isa<Instruction>(V) && classof(cast<Instruction>(V))) || | ||||||||||||||
422 | (isa<ConstantExpr>(V) && classof(cast<ConstantExpr>(V))); | ||||||||||||||
423 | } | ||||||||||||||
424 | }; | ||||||||||||||
425 | |||||||||||||||
426 | class AddOperator | ||||||||||||||
427 | : public ConcreteOperator<OverflowingBinaryOperator, Instruction::Add> { | ||||||||||||||
428 | }; | ||||||||||||||
429 | class SubOperator | ||||||||||||||
430 | : public ConcreteOperator<OverflowingBinaryOperator, Instruction::Sub> { | ||||||||||||||
431 | }; | ||||||||||||||
432 | class MulOperator | ||||||||||||||
433 | : public ConcreteOperator<OverflowingBinaryOperator, Instruction::Mul> { | ||||||||||||||
434 | }; | ||||||||||||||
435 | class ShlOperator | ||||||||||||||
436 | : public ConcreteOperator<OverflowingBinaryOperator, Instruction::Shl> { | ||||||||||||||
437 | }; | ||||||||||||||
438 | |||||||||||||||
439 | class SDivOperator | ||||||||||||||
440 | : public ConcreteOperator<PossiblyExactOperator, Instruction::SDiv> { | ||||||||||||||
441 | }; | ||||||||||||||
442 | class UDivOperator | ||||||||||||||
443 | : public ConcreteOperator<PossiblyExactOperator, Instruction::UDiv> { | ||||||||||||||
444 | }; | ||||||||||||||
445 | class AShrOperator | ||||||||||||||
446 | : public ConcreteOperator<PossiblyExactOperator, Instruction::AShr> { | ||||||||||||||
447 | }; | ||||||||||||||
448 | class LShrOperator | ||||||||||||||
449 | : public ConcreteOperator<PossiblyExactOperator, Instruction::LShr> { | ||||||||||||||
450 | }; | ||||||||||||||
451 | |||||||||||||||
452 | class ZExtOperator : public ConcreteOperator<Operator, Instruction::ZExt> {}; | ||||||||||||||
453 | |||||||||||||||
454 | class GEPOperator | ||||||||||||||
455 | : public ConcreteOperator<Operator, Instruction::GetElementPtr> { | ||||||||||||||
456 | friend class GetElementPtrInst; | ||||||||||||||
457 | friend class ConstantExpr; | ||||||||||||||
458 | |||||||||||||||
459 | enum { | ||||||||||||||
460 | IsInBounds = (1 << 0), | ||||||||||||||
461 | // InRangeIndex: bits 1-6 | ||||||||||||||
462 | }; | ||||||||||||||
463 | |||||||||||||||
464 | void setIsInBounds(bool B) { | ||||||||||||||
465 | SubclassOptionalData = | ||||||||||||||
466 | (SubclassOptionalData & ~IsInBounds) | (B * IsInBounds); | ||||||||||||||
467 | } | ||||||||||||||
468 | |||||||||||||||
469 | public: | ||||||||||||||
470 | /// Test whether this is an inbounds GEP, as defined by LangRef.html. | ||||||||||||||
471 | bool isInBounds() const { | ||||||||||||||
472 | return SubclassOptionalData & IsInBounds; | ||||||||||||||
473 | } | ||||||||||||||
474 | |||||||||||||||
475 | /// Returns the offset of the index with an inrange attachment, or None if | ||||||||||||||
476 | /// none. | ||||||||||||||
477 | Optional<unsigned> getInRangeIndex() const { | ||||||||||||||
478 | if (SubclassOptionalData >> 1 == 0) return None; | ||||||||||||||
479 | return (SubclassOptionalData >> 1) - 1; | ||||||||||||||
480 | } | ||||||||||||||
481 | |||||||||||||||
482 | inline op_iterator idx_begin() { return op_begin()+1; } | ||||||||||||||
483 | inline const_op_iterator idx_begin() const { return op_begin()+1; } | ||||||||||||||
484 | inline op_iterator idx_end() { return op_end(); } | ||||||||||||||
485 | inline const_op_iterator idx_end() const { return op_end(); } | ||||||||||||||
486 | |||||||||||||||
487 | Value *getPointerOperand() { | ||||||||||||||
488 | return getOperand(0); | ||||||||||||||
489 | } | ||||||||||||||
490 | const Value *getPointerOperand() const { | ||||||||||||||
491 | return getOperand(0); | ||||||||||||||
492 | } | ||||||||||||||
493 | static unsigned getPointerOperandIndex() { | ||||||||||||||
494 | return 0U; // get index for modifying correct operand | ||||||||||||||
495 | } | ||||||||||||||
496 | |||||||||||||||
497 | /// Method to return the pointer operand as a PointerType. | ||||||||||||||
498 | Type *getPointerOperandType() const { | ||||||||||||||
499 | return getPointerOperand()->getType(); | ||||||||||||||
500 | } | ||||||||||||||
501 | |||||||||||||||
502 | Type *getSourceElementType() const; | ||||||||||||||
503 | Type *getResultElementType() const; | ||||||||||||||
504 | |||||||||||||||
505 | /// Method to return the address space of the pointer operand. | ||||||||||||||
506 | unsigned getPointerAddressSpace() const { | ||||||||||||||
507 | return getPointerOperandType()->getPointerAddressSpace(); | ||||||||||||||
508 | } | ||||||||||||||
509 | |||||||||||||||
510 | unsigned getNumIndices() const { // Note: always non-negative | ||||||||||||||
511 | return getNumOperands() - 1; | ||||||||||||||
512 | } | ||||||||||||||
513 | |||||||||||||||
514 | bool hasIndices() const { | ||||||||||||||
515 | return getNumOperands() > 1; | ||||||||||||||
516 | } | ||||||||||||||
517 | |||||||||||||||
518 | /// Return true if all of the indices of this GEP are zeros. | ||||||||||||||
519 | /// If so, the result pointer and the first operand have the same | ||||||||||||||
520 | /// value, just potentially different types. | ||||||||||||||
521 | bool hasAllZeroIndices() const { | ||||||||||||||
522 | for (const_op_iterator I = idx_begin(), E = idx_end(); I != E; ++I) { | ||||||||||||||
523 | if (ConstantInt *C = dyn_cast<ConstantInt>(I)) | ||||||||||||||
524 | if (C->isZero()) | ||||||||||||||
525 | continue; | ||||||||||||||
526 | return false; | ||||||||||||||
527 | } | ||||||||||||||
528 | return true; | ||||||||||||||
529 | } | ||||||||||||||
530 | |||||||||||||||
531 | /// Return true if all of the indices of this GEP are constant integers. | ||||||||||||||
532 | /// If so, the result pointer and the first operand have | ||||||||||||||
533 | /// a constant offset between them. | ||||||||||||||
534 | bool hasAllConstantIndices() const { | ||||||||||||||
535 | for (const_op_iterator I = idx_begin(), E = idx_end(); I != E; ++I) { | ||||||||||||||
536 | if (!isa<ConstantInt>(I)) | ||||||||||||||
537 | return false; | ||||||||||||||
538 | } | ||||||||||||||
539 | return true; | ||||||||||||||
540 | } | ||||||||||||||
541 | |||||||||||||||
542 | unsigned countNonConstantIndices() const { | ||||||||||||||
543 | return count_if(make_range(idx_begin(), idx_end()), [](const Use& use) { | ||||||||||||||
544 | return !isa<ConstantInt>(*use); | ||||||||||||||
545 | }); | ||||||||||||||
546 | } | ||||||||||||||
547 | |||||||||||||||
548 | /// Accumulate the constant address offset of this GEP if possible. | ||||||||||||||
549 | /// | ||||||||||||||
550 | /// This routine accepts an APInt into which it will accumulate the constant | ||||||||||||||
551 | /// offset of this GEP if the GEP is in fact constant. If the GEP is not | ||||||||||||||
552 | /// all-constant, it returns false and the value of the offset APInt is | ||||||||||||||
553 | /// undefined (it is *not* preserved!). The APInt passed into this routine | ||||||||||||||
554 | /// must be at exactly as wide as the IntPtr type for the address space of the | ||||||||||||||
555 | /// base GEP pointer. | ||||||||||||||
556 | bool accumulateConstantOffset(const DataLayout &DL, APInt &Offset) const; | ||||||||||||||
557 | }; | ||||||||||||||
558 | |||||||||||||||
559 | class PtrToIntOperator | ||||||||||||||
560 | : public ConcreteOperator<Operator, Instruction::PtrToInt> { | ||||||||||||||
561 | friend class PtrToInt; | ||||||||||||||
562 | friend class ConstantExpr; | ||||||||||||||
563 | |||||||||||||||
564 | public: | ||||||||||||||
565 | Value *getPointerOperand() { | ||||||||||||||
566 | return getOperand(0); | ||||||||||||||
567 | } | ||||||||||||||
568 | const Value *getPointerOperand() const { | ||||||||||||||
569 | return getOperand(0); | ||||||||||||||
570 | } | ||||||||||||||
571 | |||||||||||||||
572 | static unsigned getPointerOperandIndex() { | ||||||||||||||
573 | return 0U; // get index for modifying correct operand | ||||||||||||||
574 | } | ||||||||||||||
575 | |||||||||||||||
576 | /// Method to return the pointer operand as a PointerType. | ||||||||||||||
577 | Type *getPointerOperandType() const { | ||||||||||||||
578 | return getPointerOperand()->getType(); | ||||||||||||||
579 | } | ||||||||||||||
580 | |||||||||||||||
581 | /// Method to return the address space of the pointer operand. | ||||||||||||||
582 | unsigned getPointerAddressSpace() const { | ||||||||||||||
583 | return cast<PointerType>(getPointerOperandType())->getAddressSpace(); | ||||||||||||||
584 | } | ||||||||||||||
585 | }; | ||||||||||||||
586 | |||||||||||||||
587 | class BitCastOperator | ||||||||||||||
588 | : public ConcreteOperator<Operator, Instruction::BitCast> { | ||||||||||||||
589 | friend class BitCastInst; | ||||||||||||||
590 | friend class ConstantExpr; | ||||||||||||||
591 | |||||||||||||||
592 | public: | ||||||||||||||
593 | Type *getSrcTy() const { | ||||||||||||||
594 | return getOperand(0)->getType(); | ||||||||||||||
595 | } | ||||||||||||||
596 | |||||||||||||||
597 | Type *getDestTy() const { | ||||||||||||||
598 | return getType(); | ||||||||||||||
599 | } | ||||||||||||||
600 | }; | ||||||||||||||
601 | |||||||||||||||
602 | } // end namespace llvm | ||||||||||||||
603 | |||||||||||||||
604 | #endif // LLVM_IR_OPERATOR_H |
1 | //===- BasicTTIImpl.h -------------------------------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | /// \file |
10 | /// This file provides a helper that implements much of the TTI interface in |
11 | /// terms of the target-independent code generator and TargetLowering |
12 | /// interfaces. |
13 | // |
14 | //===----------------------------------------------------------------------===// |
15 | |
16 | #ifndef LLVM_CODEGEN_BASICTTIIMPL_H |
17 | #define LLVM_CODEGEN_BASICTTIIMPL_H |
18 | |
19 | #include "llvm/ADT/APInt.h" |
20 | #include "llvm/ADT/ArrayRef.h" |
21 | #include "llvm/ADT/BitVector.h" |
22 | #include "llvm/ADT/SmallPtrSet.h" |
23 | #include "llvm/ADT/SmallVector.h" |
24 | #include "llvm/Analysis/LoopInfo.h" |
25 | #include "llvm/Analysis/TargetTransformInfo.h" |
26 | #include "llvm/Analysis/TargetTransformInfoImpl.h" |
27 | #include "llvm/CodeGen/ISDOpcodes.h" |
28 | #include "llvm/CodeGen/TargetLowering.h" |
29 | #include "llvm/CodeGen/TargetSubtargetInfo.h" |
30 | #include "llvm/CodeGen/ValueTypes.h" |
31 | #include "llvm/IR/BasicBlock.h" |
32 | #include "llvm/IR/CallSite.h" |
33 | #include "llvm/IR/Constant.h" |
34 | #include "llvm/IR/Constants.h" |
35 | #include "llvm/IR/DataLayout.h" |
36 | #include "llvm/IR/DerivedTypes.h" |
37 | #include "llvm/IR/InstrTypes.h" |
38 | #include "llvm/IR/Instruction.h" |
39 | #include "llvm/IR/Instructions.h" |
40 | #include "llvm/IR/Intrinsics.h" |
41 | #include "llvm/IR/Operator.h" |
42 | #include "llvm/IR/Type.h" |
43 | #include "llvm/IR/Value.h" |
44 | #include "llvm/MC/MCSchedule.h" |
45 | #include "llvm/Support/Casting.h" |
46 | #include "llvm/Support/CommandLine.h" |
47 | #include "llvm/Support/ErrorHandling.h" |
48 | #include "llvm/Support/MachineValueType.h" |
49 | #include "llvm/Support/MathExtras.h" |
50 | #include <algorithm> |
51 | #include <cassert> |
52 | #include <cstdint> |
53 | #include <limits> |
54 | #include <utility> |
55 | |
56 | namespace llvm { |
57 | |
58 | class Function; |
59 | class GlobalValue; |
60 | class LLVMContext; |
61 | class ScalarEvolution; |
62 | class SCEV; |
63 | class TargetMachine; |
64 | |
65 | extern cl::opt<unsigned> PartialUnrollingThreshold; |
66 | |
67 | /// Base class which can be used to help build a TTI implementation. |
68 | /// |
69 | /// This class provides as much implementation of the TTI interface as is |
70 | /// possible using the target independent parts of the code generator. |
71 | /// |
72 | /// In order to subclass it, your class must implement a getST() method to |
73 | /// return the subtarget, and a getTLI() method to return the target lowering. |
74 | /// We need these methods implemented in the derived class so that this class |
75 | /// doesn't have to duplicate storage for them. |
76 | template <typename T> |
77 | class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> { |
78 | private: |
79 | using BaseT = TargetTransformInfoImplCRTPBase<T>; |
80 | using TTI = TargetTransformInfo; |
81 | |
82 | /// Estimate a cost of Broadcast as an extract and sequence of insert |
83 | /// operations. |
84 | unsigned getBroadcastShuffleOverhead(Type *Ty) { |
85 | assert(Ty->isVectorTy() && "Can only shuffle vectors")((Ty->isVectorTy() && "Can only shuffle vectors") ? static_cast<void> (0) : __assert_fail ("Ty->isVectorTy() && \"Can only shuffle vectors\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 85, __PRETTY_FUNCTION__)); |
86 | unsigned Cost = 0; |
87 | // Broadcast cost is equal to the cost of extracting the zero'th element |
88 | // plus the cost of inserting it into every element of the result vector. |
89 | Cost += static_cast<T *>(this)->getVectorInstrCost( |
90 | Instruction::ExtractElement, Ty, 0); |
91 | |
92 | for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) { |
93 | Cost += static_cast<T *>(this)->getVectorInstrCost( |
94 | Instruction::InsertElement, Ty, i); |
95 | } |
96 | return Cost; |
97 | } |
98 | |
99 | /// Estimate a cost of shuffle as a sequence of extract and insert |
100 | /// operations. |
101 | unsigned getPermuteShuffleOverhead(Type *Ty) { |
102 | assert(Ty->isVectorTy() && "Can only shuffle vectors")((Ty->isVectorTy() && "Can only shuffle vectors") ? static_cast<void> (0) : __assert_fail ("Ty->isVectorTy() && \"Can only shuffle vectors\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 102, __PRETTY_FUNCTION__)); |
103 | unsigned Cost = 0; |
104 | // Shuffle cost is equal to the cost of extracting element from its argument |
105 | // plus the cost of inserting them onto the result vector. |
106 | |
107 | // e.g. <4 x float> has a mask of <0,5,2,7> i.e we need to extract from |
108 | // index 0 of first vector, index 1 of second vector,index 2 of first |
109 | // vector and finally index 3 of second vector and insert them at index |
110 | // <0,1,2,3> of result vector. |
111 | for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) { |
112 | Cost += static_cast<T *>(this) |
113 | ->getVectorInstrCost(Instruction::InsertElement, Ty, i); |
114 | Cost += static_cast<T *>(this) |
115 | ->getVectorInstrCost(Instruction::ExtractElement, Ty, i); |
116 | } |
117 | return Cost; |
118 | } |
119 | |
120 | /// Estimate a cost of subvector extraction as a sequence of extract and |
121 | /// insert operations. |
122 | unsigned getExtractSubvectorOverhead(Type *Ty, int Index, Type *SubTy) { |
123 | assert(Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() &&((Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() && "Can only extract subvectors from vectors" ) ? static_cast<void> (0) : __assert_fail ("Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() && \"Can only extract subvectors from vectors\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 124, __PRETTY_FUNCTION__)) |
124 | "Can only extract subvectors from vectors")((Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() && "Can only extract subvectors from vectors" ) ? static_cast<void> (0) : __assert_fail ("Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() && \"Can only extract subvectors from vectors\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 124, __PRETTY_FUNCTION__)); |
125 | int NumSubElts = SubTy->getVectorNumElements(); |
126 | assert((Index + NumSubElts) <= (int)Ty->getVectorNumElements() &&(((Index + NumSubElts) <= (int)Ty->getVectorNumElements () && "SK_ExtractSubvector index out of range") ? static_cast <void> (0) : __assert_fail ("(Index + NumSubElts) <= (int)Ty->getVectorNumElements() && \"SK_ExtractSubvector index out of range\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 127, __PRETTY_FUNCTION__)) |
127 | "SK_ExtractSubvector index out of range")(((Index + NumSubElts) <= (int)Ty->getVectorNumElements () && "SK_ExtractSubvector index out of range") ? static_cast <void> (0) : __assert_fail ("(Index + NumSubElts) <= (int)Ty->getVectorNumElements() && \"SK_ExtractSubvector index out of range\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 127, __PRETTY_FUNCTION__)); |
128 | |
129 | unsigned Cost = 0; |
130 | // Subvector extraction cost is equal to the cost of extracting element from |
131 | // the source type plus the cost of inserting them into the result vector |
132 | // type. |
133 | for (int i = 0; i != NumSubElts; ++i) { |
134 | Cost += static_cast<T *>(this)->getVectorInstrCost( |
135 | Instruction::ExtractElement, Ty, i + Index); |
136 | Cost += static_cast<T *>(this)->getVectorInstrCost( |
137 | Instruction::InsertElement, SubTy, i); |
138 | } |
139 | return Cost; |
140 | } |
141 | |
142 | /// Estimate a cost of subvector insertion as a sequence of extract and |
143 | /// insert operations. |
144 | unsigned getInsertSubvectorOverhead(Type *Ty, int Index, Type *SubTy) { |
145 | assert(Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() &&((Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() && "Can only insert subvectors into vectors" ) ? static_cast<void> (0) : __assert_fail ("Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() && \"Can only insert subvectors into vectors\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 146, __PRETTY_FUNCTION__)) |
146 | "Can only insert subvectors into vectors")((Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() && "Can only insert subvectors into vectors" ) ? static_cast<void> (0) : __assert_fail ("Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() && \"Can only insert subvectors into vectors\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 146, __PRETTY_FUNCTION__)); |
147 | int NumSubElts = SubTy->getVectorNumElements(); |
148 | assert((Index + NumSubElts) <= (int)Ty->getVectorNumElements() &&(((Index + NumSubElts) <= (int)Ty->getVectorNumElements () && "SK_InsertSubvector index out of range") ? static_cast <void> (0) : __assert_fail ("(Index + NumSubElts) <= (int)Ty->getVectorNumElements() && \"SK_InsertSubvector index out of range\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 149, __PRETTY_FUNCTION__)) |
149 | "SK_InsertSubvector index out of range")(((Index + NumSubElts) <= (int)Ty->getVectorNumElements () && "SK_InsertSubvector index out of range") ? static_cast <void> (0) : __assert_fail ("(Index + NumSubElts) <= (int)Ty->getVectorNumElements() && \"SK_InsertSubvector index out of range\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 149, __PRETTY_FUNCTION__)); |
150 | |
151 | unsigned Cost = 0; |
152 | // Subvector insertion cost is equal to the cost of extracting element from |
153 | // the source type plus the cost of inserting them into the result vector |
154 | // type. |
155 | for (int i = 0; i != NumSubElts; ++i) { |
156 | Cost += static_cast<T *>(this)->getVectorInstrCost( |
157 | Instruction::ExtractElement, SubTy, i); |
158 | Cost += static_cast<T *>(this)->getVectorInstrCost( |
159 | Instruction::InsertElement, Ty, i + Index); |
160 | } |
161 | return Cost; |
162 | } |
163 | |
164 | /// Local query method delegates up to T which *must* implement this! |
165 | const TargetSubtargetInfo *getST() const { |
166 | return static_cast<const T *>(this)->getST(); |
167 | } |
168 | |
169 | /// Local query method delegates up to T which *must* implement this! |
170 | const TargetLoweringBase *getTLI() const { |
171 | return static_cast<const T *>(this)->getTLI(); |
172 | } |
173 | |
174 | static ISD::MemIndexedMode getISDIndexedMode(TTI::MemIndexedMode M) { |
175 | switch (M) { |
176 | case TTI::MIM_Unindexed: |
177 | return ISD::UNINDEXED; |
178 | case TTI::MIM_PreInc: |
179 | return ISD::PRE_INC; |
180 | case TTI::MIM_PreDec: |
181 | return ISD::PRE_DEC; |
182 | case TTI::MIM_PostInc: |
183 | return ISD::POST_INC; |
184 | case TTI::MIM_PostDec: |
185 | return ISD::POST_DEC; |
186 | } |
187 | llvm_unreachable("Unexpected MemIndexedMode")::llvm::llvm_unreachable_internal("Unexpected MemIndexedMode" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 187); |
188 | } |
189 | |
190 | protected: |
191 | explicit BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL) |
192 | : BaseT(DL) {} |
193 | virtual ~BasicTTIImplBase() = default; |
194 | |
195 | using TargetTransformInfoImplBase::DL; |
196 | |
197 | public: |
198 | /// \name Scalar TTI Implementations |
199 | /// @{ |
200 | bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, |
201 | unsigned AddressSpace, unsigned Alignment, |
202 | bool *Fast) const { |
203 | EVT E = EVT::getIntegerVT(Context, BitWidth); |
204 | return getTLI()->allowsMisalignedMemoryAccesses( |
205 | E, AddressSpace, Alignment, MachineMemOperand::MONone, Fast); |
206 | } |
207 | |
208 | bool hasBranchDivergence() { return false; } |
209 | |
210 | bool useGPUDivergenceAnalysis() { return false; } |
211 | |
212 | bool isSourceOfDivergence(const Value *V) { return false; } |
213 | |
214 | bool isAlwaysUniform(const Value *V) { return false; } |
215 | |
216 | unsigned getFlatAddressSpace() { |
217 | // Return an invalid address space. |
218 | return -1; |
219 | } |
220 | |
221 | bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes, |
222 | Intrinsic::ID IID) const { |
223 | return false; |
224 | } |
225 | |
226 | bool rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, |
227 | Value *OldV, Value *NewV) const { |
228 | return false; |
229 | } |
230 | |
231 | bool isLegalAddImmediate(int64_t imm) { |
232 | return getTLI()->isLegalAddImmediate(imm); |
233 | } |
234 | |
235 | bool isLegalICmpImmediate(int64_t imm) { |
236 | return getTLI()->isLegalICmpImmediate(imm); |
237 | } |
238 | |
239 | bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, |
240 | bool HasBaseReg, int64_t Scale, |
241 | unsigned AddrSpace, Instruction *I = nullptr) { |
242 | TargetLoweringBase::AddrMode AM; |
243 | AM.BaseGV = BaseGV; |
244 | AM.BaseOffs = BaseOffset; |
245 | AM.HasBaseReg = HasBaseReg; |
246 | AM.Scale = Scale; |
247 | return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace, I); |
248 | } |
249 | |
250 | bool isIndexedLoadLegal(TTI::MemIndexedMode M, Type *Ty, |
251 | const DataLayout &DL) const { |
252 | EVT VT = getTLI()->getValueType(DL, Ty); |
253 | return getTLI()->isIndexedLoadLegal(getISDIndexedMode(M), VT); |
254 | } |
255 | |
256 | bool isIndexedStoreLegal(TTI::MemIndexedMode M, Type *Ty, |
257 | const DataLayout &DL) const { |
258 | EVT VT = getTLI()->getValueType(DL, Ty); |
259 | return getTLI()->isIndexedStoreLegal(getISDIndexedMode(M), VT); |
260 | } |
261 | |
262 | bool isLSRCostLess(TTI::LSRCost C1, TTI::LSRCost C2) { |
263 | return TargetTransformInfoImplBase::isLSRCostLess(C1, C2); |
264 | } |
265 | |
266 | int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, |
267 | bool HasBaseReg, int64_t Scale, unsigned AddrSpace) { |
268 | TargetLoweringBase::AddrMode AM; |
269 | AM.BaseGV = BaseGV; |
270 | AM.BaseOffs = BaseOffset; |
271 | AM.HasBaseReg = HasBaseReg; |
272 | AM.Scale = Scale; |
273 | return getTLI()->getScalingFactorCost(DL, AM, Ty, AddrSpace); |
274 | } |
275 | |
276 | bool isTruncateFree(Type *Ty1, Type *Ty2) { |
277 | return getTLI()->isTruncateFree(Ty1, Ty2); |
278 | } |
279 | |
280 | bool isProfitableToHoist(Instruction *I) { |
281 | return getTLI()->isProfitableToHoist(I); |
282 | } |
283 | |
284 | bool useAA() const { return getST()->useAA(); } |
285 | |
286 | bool isTypeLegal(Type *Ty) { |
287 | EVT VT = getTLI()->getValueType(DL, Ty); |
288 | return getTLI()->isTypeLegal(VT); |
289 | } |
290 | |
291 | int getGEPCost(Type *PointeeType, const Value *Ptr, |
292 | ArrayRef<const Value *> Operands) { |
293 | return BaseT::getGEPCost(PointeeType, Ptr, Operands); |
294 | } |
295 | |
296 | int getExtCost(const Instruction *I, const Value *Src) { |
297 | if (getTLI()->isExtFree(I)) |
298 | return TargetTransformInfo::TCC_Free; |
299 | |
300 | if (isa<ZExtInst>(I) || isa<SExtInst>(I)) |
301 | if (const LoadInst *LI = dyn_cast<LoadInst>(Src)) |
302 | if (getTLI()->isExtLoad(LI, I, DL)) |
303 | return TargetTransformInfo::TCC_Free; |
304 | |
305 | return TargetTransformInfo::TCC_Basic; |
306 | } |
307 | |
308 | unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, |
309 | ArrayRef<const Value *> Arguments, const User *U) { |
310 | return BaseT::getIntrinsicCost(IID, RetTy, Arguments, U); |
311 | } |
312 | |
313 | unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, |
314 | ArrayRef<Type *> ParamTys, const User *U) { |
315 | if (IID == Intrinsic::cttz) { |
316 | if (getTLI()->isCheapToSpeculateCttz()) |
317 | return TargetTransformInfo::TCC_Basic; |
318 | return TargetTransformInfo::TCC_Expensive; |
319 | } |
320 | |
321 | if (IID == Intrinsic::ctlz) { |
322 | if (getTLI()->isCheapToSpeculateCtlz()) |
323 | return TargetTransformInfo::TCC_Basic; |
324 | return TargetTransformInfo::TCC_Expensive; |
325 | } |
326 | |
327 | return BaseT::getIntrinsicCost(IID, RetTy, ParamTys, U); |
328 | } |
329 | |
330 | unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, |
331 | unsigned &JumpTableSize, |
332 | ProfileSummaryInfo *PSI, |
333 | BlockFrequencyInfo *BFI) { |
334 | /// Try to find the estimated number of clusters. Note that the number of |
335 | /// clusters identified in this function could be different from the actual |
336 | /// numbers found in lowering. This function ignore switches that are |
337 | /// lowered with a mix of jump table / bit test / BTree. This function was |
338 | /// initially intended to be used when estimating the cost of switch in |
339 | /// inline cost heuristic, but it's a generic cost model to be used in other |
340 | /// places (e.g., in loop unrolling). |
341 | unsigned N = SI.getNumCases(); |
342 | const TargetLoweringBase *TLI = getTLI(); |
343 | const DataLayout &DL = this->getDataLayout(); |
344 | |
345 | JumpTableSize = 0; |
346 | bool IsJTAllowed = TLI->areJTsAllowed(SI.getParent()->getParent()); |
347 | |
348 | // Early exit if both a jump table and bit test are not allowed. |
349 | if (N < 1 || (!IsJTAllowed && DL.getIndexSizeInBits(0u) < N)) |
350 | return N; |
351 | |
352 | APInt MaxCaseVal = SI.case_begin()->getCaseValue()->getValue(); |
353 | APInt MinCaseVal = MaxCaseVal; |
354 | for (auto CI : SI.cases()) { |
355 | const APInt &CaseVal = CI.getCaseValue()->getValue(); |
356 | if (CaseVal.sgt(MaxCaseVal)) |
357 | MaxCaseVal = CaseVal; |
358 | if (CaseVal.slt(MinCaseVal)) |
359 | MinCaseVal = CaseVal; |
360 | } |
361 | |
362 | // Check if suitable for a bit test |
363 | if (N <= DL.getIndexSizeInBits(0u)) { |
364 | SmallPtrSet<const BasicBlock *, 4> Dests; |
365 | for (auto I : SI.cases()) |
366 | Dests.insert(I.getCaseSuccessor()); |
367 | |
368 | if (TLI->isSuitableForBitTests(Dests.size(), N, MinCaseVal, MaxCaseVal, |
369 | DL)) |
370 | return 1; |
371 | } |
372 | |
373 | // Check if suitable for a jump table. |
374 | if (IsJTAllowed) { |
375 | if (N < 2 || N < TLI->getMinimumJumpTableEntries()) |
376 | return N; |
377 | uint64_t Range = |
378 | (MaxCaseVal - MinCaseVal) |
379 | .getLimitedValue(std::numeric_limits<uint64_t>::max() - 1) + 1; |
380 | // Check whether a range of clusters is dense enough for a jump table |
381 | if (TLI->isSuitableForJumpTable(&SI, N, Range, PSI, BFI)) { |
382 | JumpTableSize = Range; |
383 | return 1; |
384 | } |
385 | } |
386 | return N; |
387 | } |
388 | |
389 | bool shouldBuildLookupTables() { |
390 | const TargetLoweringBase *TLI = getTLI(); |
391 | return TLI->isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || |
392 | TLI->isOperationLegalOrCustom(ISD::BRIND, MVT::Other); |
393 | } |
394 | |
395 | bool haveFastSqrt(Type *Ty) { |
396 | const TargetLoweringBase *TLI = getTLI(); |
397 | EVT VT = TLI->getValueType(DL, Ty); |
398 | return TLI->isTypeLegal(VT) && |
399 | TLI->isOperationLegalOrCustom(ISD::FSQRT, VT); |
400 | } |
401 | |
402 | bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) { |
403 | return true; |
404 | } |
405 | |
406 | unsigned getFPOpCost(Type *Ty) { |
407 | // Check whether FADD is available, as a proxy for floating-point in |
408 | // general. |
409 | const TargetLoweringBase *TLI = getTLI(); |
410 | EVT VT = TLI->getValueType(DL, Ty); |
411 | if (TLI->isOperationLegalOrCustomOrPromote(ISD::FADD, VT)) |
412 | return TargetTransformInfo::TCC_Basic; |
413 | return TargetTransformInfo::TCC_Expensive; |
414 | } |
415 | |
416 | unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) { |
417 | const TargetLoweringBase *TLI = getTLI(); |
418 | switch (Opcode) { |
419 | default: break; |
420 | case Instruction::Trunc: |
421 | if (TLI->isTruncateFree(OpTy, Ty)) |
422 | return TargetTransformInfo::TCC_Free; |
423 | return TargetTransformInfo::TCC_Basic; |
424 | case Instruction::ZExt: |
425 | if (TLI->isZExtFree(OpTy, Ty)) |
426 | return TargetTransformInfo::TCC_Free; |
427 | return TargetTransformInfo::TCC_Basic; |
428 | |
429 | case Instruction::AddrSpaceCast: |
430 | if (TLI->isFreeAddrSpaceCast(OpTy->getPointerAddressSpace(), |
431 | Ty->getPointerAddressSpace())) |
432 | return TargetTransformInfo::TCC_Free; |
433 | return TargetTransformInfo::TCC_Basic; |
434 | } |
435 | |
436 | return BaseT::getOperationCost(Opcode, Ty, OpTy); |
437 | } |
438 | |
439 | unsigned getInliningThresholdMultiplier() { return 1; } |
440 | |
441 | int getInlinerVectorBonusPercent() { return 150; } |
442 | |
443 | void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, |
444 | TTI::UnrollingPreferences &UP) { |
445 | // This unrolling functionality is target independent, but to provide some |
446 | // motivation for its intended use, for x86: |
447 | |
448 | // According to the Intel 64 and IA-32 Architectures Optimization Reference |
449 | // Manual, Intel Core models and later have a loop stream detector (and |
450 | // associated uop queue) that can benefit from partial unrolling. |
451 | // The relevant requirements are: |
452 | // - The loop must have no more than 4 (8 for Nehalem and later) branches |
453 | // taken, and none of them may be calls. |
454 | // - The loop can have no more than 18 (28 for Nehalem and later) uops. |
455 | |
456 | // According to the Software Optimization Guide for AMD Family 15h |
457 | // Processors, models 30h-4fh (Steamroller and later) have a loop predictor |
458 | // and loop buffer which can benefit from partial unrolling. |
459 | // The relevant requirements are: |
460 | // - The loop must have fewer than 16 branches |
461 | // - The loop must have less than 40 uops in all executed loop branches |
462 | |
463 | // The number of taken branches in a loop is hard to estimate here, and |
464 | // benchmarking has revealed that it is better not to be conservative when |
465 | // estimating the branch count. As a result, we'll ignore the branch limits |
466 | // until someone finds a case where it matters in practice. |
467 | |
468 | unsigned MaxOps; |
469 | const TargetSubtargetInfo *ST = getST(); |
470 | if (PartialUnrollingThreshold.getNumOccurrences() > 0) |
471 | MaxOps = PartialUnrollingThreshold; |
472 | else if (ST->getSchedModel().LoopMicroOpBufferSize > 0) |
473 | MaxOps = ST->getSchedModel().LoopMicroOpBufferSize; |
474 | else |
475 | return; |
476 | |
477 | // Scan the loop: don't unroll loops with calls. |
478 | for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); I != E; |
479 | ++I) { |
480 | BasicBlock *BB = *I; |
481 | |
482 | for (BasicBlock::iterator J = BB->begin(), JE = BB->end(); J != JE; ++J) |
483 | if (isa<CallInst>(J) || isa<InvokeInst>(J)) { |
484 | ImmutableCallSite CS(&*J); |
485 | if (const Function *F = CS.getCalledFunction()) { |
486 | if (!static_cast<T *>(this)->isLoweredToCall(F)) |
487 | continue; |
488 | } |
489 | |
490 | return; |
491 | } |
492 | } |
493 | |
494 | // Enable runtime and partial unrolling up to the specified size. |
495 | // Enable using trip count upper bound to unroll loops. |
496 | UP.Partial = UP.Runtime = UP.UpperBound = true; |
497 | UP.PartialThreshold = MaxOps; |
498 | |
499 | // Avoid unrolling when optimizing for size. |
500 | UP.OptSizeThreshold = 0; |
501 | UP.PartialOptSizeThreshold = 0; |
502 | |
503 | // Set number of instructions optimized when "back edge" |
504 | // becomes "fall through" to default value of 2. |
505 | UP.BEInsns = 2; |
506 | } |
507 | |
508 | bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, |
509 | AssumptionCache &AC, |
510 | TargetLibraryInfo *LibInfo, |
511 | HardwareLoopInfo &HWLoopInfo) { |
512 | return BaseT::isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo); |
513 | } |
514 | |
515 | bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE, |
516 | AssumptionCache &AC, TargetLibraryInfo *TLI, |
517 | DominatorTree *DT, |
518 | const LoopAccessInfo *LAI) { |
519 | return BaseT::preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LAI); |
520 | } |
521 | |
522 | int getInstructionLatency(const Instruction *I) { |
523 | if (isa<LoadInst>(I)) |
524 | return getST()->getSchedModel().DefaultLoadLatency; |
525 | |
526 | return BaseT::getInstructionLatency(I); |
527 | } |
528 | |
529 | virtual Optional<unsigned> |
530 | getCacheSize(TargetTransformInfo::CacheLevel Level) const { |
531 | return Optional<unsigned>( |
532 | getST()->getCacheSize(static_cast<unsigned>(Level))); |
533 | } |
534 | |
535 | virtual Optional<unsigned> |
536 | getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const { |
537 | Optional<unsigned> TargetResult = |
538 | getST()->getCacheAssociativity(static_cast<unsigned>(Level)); |
539 | |
540 | if (TargetResult) |
541 | return TargetResult; |
542 | |
543 | return BaseT::getCacheAssociativity(Level); |
544 | } |
545 | |
546 | virtual unsigned getCacheLineSize() const { |
547 | return getST()->getCacheLineSize(); |
548 | } |
549 | |
550 | virtual unsigned getPrefetchDistance() const { |
551 | return getST()->getPrefetchDistance(); |
552 | } |
553 | |
554 | virtual unsigned getMinPrefetchStride() const { |
555 | return getST()->getMinPrefetchStride(); |
556 | } |
557 | |
558 | virtual unsigned getMaxPrefetchIterationsAhead() const { |
559 | return getST()->getMaxPrefetchIterationsAhead(); |
560 | } |
561 | |
562 | /// @} |
563 | |
564 | /// \name Vector TTI Implementations |
565 | /// @{ |
566 | |
567 | unsigned getRegisterBitWidth(bool Vector) const { return 32; } |
568 | |
569 | /// Estimate the overhead of scalarizing an instruction. Insert and Extract |
570 | /// are set if the result needs to be inserted and/or extracted from vectors. |
571 | unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) { |
572 | assert(Ty->isVectorTy() && "Can only scalarize vectors")((Ty->isVectorTy() && "Can only scalarize vectors" ) ? static_cast<void> (0) : __assert_fail ("Ty->isVectorTy() && \"Can only scalarize vectors\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 572, __PRETTY_FUNCTION__)); |
573 | unsigned Cost = 0; |
574 | |
575 | for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) { |
576 | if (Insert) |
577 | Cost += static_cast<T *>(this) |
578 | ->getVectorInstrCost(Instruction::InsertElement, Ty, i); |
579 | if (Extract) |
580 | Cost += static_cast<T *>(this) |
581 | ->getVectorInstrCost(Instruction::ExtractElement, Ty, i); |
582 | } |
583 | |
584 | return Cost; |
585 | } |
586 | |
587 | /// Estimate the overhead of scalarizing an instructions unique |
588 | /// non-constant operands. The types of the arguments are ordinarily |
589 | /// scalar, in which case the costs are multiplied with VF. |
590 | unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args, |
591 | unsigned VF) { |
592 | unsigned Cost = 0; |
593 | SmallPtrSet<const Value*, 4> UniqueOperands; |
594 | for (const Value *A : Args) { |
595 | if (!isa<Constant>(A) && UniqueOperands.insert(A).second) { |
596 | Type *VecTy = nullptr; |
597 | if (A->getType()->isVectorTy()) { |
598 | VecTy = A->getType(); |
599 | // If A is a vector operand, VF should be 1 or correspond to A. |
600 | assert((VF == 1 || VF == VecTy->getVectorNumElements()) &&(((VF == 1 || VF == VecTy->getVectorNumElements()) && "Vector argument does not match VF") ? static_cast<void> (0) : __assert_fail ("(VF == 1 || VF == VecTy->getVectorNumElements()) && \"Vector argument does not match VF\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 601, __PRETTY_FUNCTION__)) |
601 | "Vector argument does not match VF")(((VF == 1 || VF == VecTy->getVectorNumElements()) && "Vector argument does not match VF") ? static_cast<void> (0) : __assert_fail ("(VF == 1 || VF == VecTy->getVectorNumElements()) && \"Vector argument does not match VF\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 601, __PRETTY_FUNCTION__)); |
602 | } |
603 | else |
604 | VecTy = VectorType::get(A->getType(), VF); |
605 | |
606 | Cost += getScalarizationOverhead(VecTy, false, true); |
607 | } |
608 | } |
609 | |
610 | return Cost; |
611 | } |
612 | |
613 | unsigned getScalarizationOverhead(Type *VecTy, ArrayRef<const Value *> Args) { |
614 | assert(VecTy->isVectorTy())((VecTy->isVectorTy()) ? static_cast<void> (0) : __assert_fail ("VecTy->isVectorTy()", "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 614, __PRETTY_FUNCTION__)); |
615 | |
616 | unsigned Cost = 0; |
617 | |
618 | Cost += getScalarizationOverhead(VecTy, true, false); |
619 | if (!Args.empty()) |
620 | Cost += getOperandsScalarizationOverhead(Args, |
621 | VecTy->getVectorNumElements()); |
622 | else |
623 | // When no information on arguments is provided, we add the cost |
624 | // associated with one argument as a heuristic. |
625 | Cost += getScalarizationOverhead(VecTy, false, true); |
626 | |
627 | return Cost; |
628 | } |
629 | |
630 | unsigned getMaxInterleaveFactor(unsigned VF) { return 1; } |
631 | |
632 | unsigned getArithmeticInstrCost( |
633 | unsigned Opcode, Type *Ty, |
634 | TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, |
635 | TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, |
636 | TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, |
637 | TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None, |
638 | ArrayRef<const Value *> Args = ArrayRef<const Value *>(), |
639 | const Instruction *CxtI = nullptr) { |
640 | // Check if any of the operands are vector operands. |
641 | const TargetLoweringBase *TLI = getTLI(); |
642 | int ISD = TLI->InstructionOpcodeToISD(Opcode); |
643 | assert(ISD && "Invalid opcode")((ISD && "Invalid opcode") ? static_cast<void> ( 0) : __assert_fail ("ISD && \"Invalid opcode\"", "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 643, __PRETTY_FUNCTION__)); |
644 | |
645 | std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty); |
646 | |
647 | bool IsFloat = Ty->isFPOrFPVectorTy(); |
648 | // Assume that floating point arithmetic operations cost twice as much as |
649 | // integer operations. |
650 | unsigned OpCost = (IsFloat ? 2 : 1); |
651 | |
652 | if (TLI->isOperationLegalOrPromote(ISD, LT.second)) { |
653 | // The operation is legal. Assume it costs 1. |
654 | // TODO: Once we have extract/insert subvector cost we need to use them. |
655 | return LT.first * OpCost; |
656 | } |
657 | |
658 | if (!TLI->isOperationExpand(ISD, LT.second)) { |
659 | // If the operation is custom lowered, then assume that the code is twice |
660 | // as expensive. |
661 | return LT.first * 2 * OpCost; |
662 | } |
663 | |
664 | // Else, assume that we need to scalarize this op. |
665 | // TODO: If one of the types get legalized by splitting, handle this |
666 | // similarly to what getCastInstrCost() does. |
667 | if (Ty->isVectorTy()) { |
668 | unsigned Num = Ty->getVectorNumElements(); |
669 | unsigned Cost = static_cast<T *>(this) |
670 | ->getArithmeticInstrCost(Opcode, Ty->getScalarType()); |
671 | // Return the cost of multiple scalar invocation plus the cost of |
672 | // inserting and extracting the values. |
673 | return getScalarizationOverhead(Ty, Args) + Num * Cost; |
674 | } |
675 | |
676 | // We don't know anything about this scalar instruction. |
677 | return OpCost; |
678 | } |
679 | |
680 | unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, |
681 | Type *SubTp) { |
682 | switch (Kind) { |
683 | case TTI::SK_Broadcast: |
684 | return getBroadcastShuffleOverhead(Tp); |
685 | case TTI::SK_Select: |
686 | case TTI::SK_Reverse: |
687 | case TTI::SK_Transpose: |
688 | case TTI::SK_PermuteSingleSrc: |
689 | case TTI::SK_PermuteTwoSrc: |
690 | return getPermuteShuffleOverhead(Tp); |
691 | case TTI::SK_ExtractSubvector: |
692 | return getExtractSubvectorOverhead(Tp, Index, SubTp); |
693 | case TTI::SK_InsertSubvector: |
694 | return getInsertSubvectorOverhead(Tp, Index, SubTp); |
695 | } |
696 | llvm_unreachable("Unknown TTI::ShuffleKind")::llvm::llvm_unreachable_internal("Unknown TTI::ShuffleKind", "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 696); |
697 | } |
698 | |
699 | unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, |
700 | const Instruction *I = nullptr) { |
701 | const TargetLoweringBase *TLI = getTLI(); |
702 | int ISD = TLI->InstructionOpcodeToISD(Opcode); |
703 | assert(ISD && "Invalid opcode")((ISD && "Invalid opcode") ? static_cast<void> ( 0) : __assert_fail ("ISD && \"Invalid opcode\"", "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 703, __PRETTY_FUNCTION__)); |
704 | std::pair<unsigned, MVT> SrcLT = TLI->getTypeLegalizationCost(DL, Src); |
705 | std::pair<unsigned, MVT> DstLT = TLI->getTypeLegalizationCost(DL, Dst); |
706 | |
707 | // Check for NOOP conversions. |
708 | if (SrcLT.first == DstLT.first && |
709 | SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) { |
710 | |
711 | // Bitcast between types that are legalized to the same type are free. |
712 | if (Opcode == Instruction::BitCast || Opcode == Instruction::Trunc) |
713 | return 0; |
714 | } |
715 | |
716 | if (Opcode == Instruction::Trunc && |
717 | TLI->isTruncateFree(SrcLT.second, DstLT.second)) |
718 | return 0; |
719 | |
720 | if (Opcode == Instruction::ZExt && |
721 | TLI->isZExtFree(SrcLT.second, DstLT.second)) |
722 | return 0; |
723 | |
724 | if (Opcode == Instruction::AddrSpaceCast && |
725 | TLI->isFreeAddrSpaceCast(Src->getPointerAddressSpace(), |
726 | Dst->getPointerAddressSpace())) |
727 | return 0; |
728 | |
729 | // If this is a zext/sext of a load, return 0 if the corresponding |
730 | // extending load exists on target. |
731 | if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) && |
732 | I && isa<LoadInst>(I->getOperand(0))) { |
733 | EVT ExtVT = EVT::getEVT(Dst); |
734 | EVT LoadVT = EVT::getEVT(Src); |
735 | unsigned LType = |
736 | ((Opcode == Instruction::ZExt) ? ISD::ZEXTLOAD : ISD::SEXTLOAD); |
737 | if (TLI->isLoadExtLegal(LType, ExtVT, LoadVT)) |
738 | return 0; |
739 | } |
740 | |
741 | // If the cast is marked as legal (or promote) then assume low cost. |
742 | if (SrcLT.first == DstLT.first && |
743 | TLI->isOperationLegalOrPromote(ISD, DstLT.second)) |
744 | return 1; |
745 | |
746 | // Handle scalar conversions. |
747 | if (!Src->isVectorTy() && !Dst->isVectorTy()) { |
748 | // Scalar bitcasts are usually free. |
749 | if (Opcode == Instruction::BitCast) |
750 | return 0; |
751 | |
752 | // Just check the op cost. If the operation is legal then assume it costs |
753 | // 1. |
754 | if (!TLI->isOperationExpand(ISD, DstLT.second)) |
755 | return 1; |
756 | |
757 | // Assume that illegal scalar instruction are expensive. |
758 | return 4; |
759 | } |
760 | |
761 | // Check vector-to-vector casts. |
762 | if (Dst->isVectorTy() && Src->isVectorTy()) { |
763 | // If the cast is between same-sized registers, then the check is simple. |
764 | if (SrcLT.first == DstLT.first && |
765 | SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) { |
766 | |
767 | // Assume that Zext is done using AND. |
768 | if (Opcode == Instruction::ZExt) |
769 | return 1; |
770 | |
771 | // Assume that sext is done using SHL and SRA. |
772 | if (Opcode == Instruction::SExt) |
773 | return 2; |
774 | |
775 | // Just check the op cost. If the operation is legal then assume it |
776 | // costs |
777 | // 1 and multiply by the type-legalization overhead. |
778 | if (!TLI->isOperationExpand(ISD, DstLT.second)) |
779 | return SrcLT.first * 1; |
780 | } |
781 | |
782 | // If we are legalizing by splitting, query the concrete TTI for the cost |
783 | // of casting the original vector twice. We also need to factor in the |
784 | // cost of the split itself. Count that as 1, to be consistent with |
785 | // TLI->getTypeLegalizationCost(). |
786 | if ((TLI->getTypeAction(Src->getContext(), TLI->getValueType(DL, Src)) == |
787 | TargetLowering::TypeSplitVector || |
788 | TLI->getTypeAction(Dst->getContext(), TLI->getValueType(DL, Dst)) == |
789 | TargetLowering::TypeSplitVector) && |
790 | Src->getVectorNumElements() > 1 && Dst->getVectorNumElements() > 1) { |
791 | Type *SplitDst = VectorType::get(Dst->getVectorElementType(), |
792 | Dst->getVectorNumElements() / 2); |
793 | Type *SplitSrc = VectorType::get(Src->getVectorElementType(), |
794 | Src->getVectorNumElements() / 2); |
795 | T *TTI = static_cast<T *>(this); |
796 | return TTI->getVectorSplitCost() + |
797 | (2 * TTI->getCastInstrCost(Opcode, SplitDst, SplitSrc, I)); |
798 | } |
799 | |
800 | // In other cases where the source or destination are illegal, assume |
801 | // the operation will get scalarized. |
802 | unsigned Num = Dst->getVectorNumElements(); |
803 | unsigned Cost = static_cast<T *>(this)->getCastInstrCost( |
804 | Opcode, Dst->getScalarType(), Src->getScalarType(), I); |
805 | |
806 | // Return the cost of multiple scalar invocation plus the cost of |
807 | // inserting and extracting the values. |
808 | return getScalarizationOverhead(Dst, true, true) + Num * Cost; |
809 | } |
810 | |
811 | // We already handled vector-to-vector and scalar-to-scalar conversions. |
812 | // This |
813 | // is where we handle bitcast between vectors and scalars. We need to assume |
814 | // that the conversion is scalarized in one way or another. |
815 | if (Opcode == Instruction::BitCast) |
816 | // Illegal bitcasts are done by storing and loading from a stack slot. |
817 | return (Src->isVectorTy() ? getScalarizationOverhead(Src, false, true) |
818 | : 0) + |
819 | (Dst->isVectorTy() ? getScalarizationOverhead(Dst, true, false) |
820 | : 0); |
821 | |
822 | llvm_unreachable("Unhandled cast")::llvm::llvm_unreachable_internal("Unhandled cast", "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 822); |
823 | } |
824 | |
825 | unsigned getExtractWithExtendCost(unsigned Opcode, Type *Dst, |
826 | VectorType *VecTy, unsigned Index) { |
827 | return static_cast<T *>(this)->getVectorInstrCost( |
828 | Instruction::ExtractElement, VecTy, Index) + |
829 | static_cast<T *>(this)->getCastInstrCost(Opcode, Dst, |
830 | VecTy->getElementType()); |
831 | } |
832 | |
833 | unsigned getCFInstrCost(unsigned Opcode) { |
834 | // Branches are assumed to be predicted. |
835 | return 0; |
836 | } |
837 | |
838 | unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, |
839 | const Instruction *I) { |
840 | const TargetLoweringBase *TLI = getTLI(); |
841 | int ISD = TLI->InstructionOpcodeToISD(Opcode); |
842 | assert(ISD && "Invalid opcode")((ISD && "Invalid opcode") ? static_cast<void> ( 0) : __assert_fail ("ISD && \"Invalid opcode\"", "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 842, __PRETTY_FUNCTION__)); |
843 | |
844 | // Selects on vectors are actually vector selects. |
845 | if (ISD == ISD::SELECT) { |
846 | assert(CondTy && "CondTy must exist")((CondTy && "CondTy must exist") ? static_cast<void > (0) : __assert_fail ("CondTy && \"CondTy must exist\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 846, __PRETTY_FUNCTION__)); |
847 | if (CondTy->isVectorTy()) |
848 | ISD = ISD::VSELECT; |
849 | } |
850 | std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy); |
851 | |
852 | if (!(ValTy->isVectorTy() && !LT.second.isVector()) && |
853 | !TLI->isOperationExpand(ISD, LT.second)) { |
854 | // The operation is legal. Assume it costs 1. Multiply |
855 | // by the type-legalization overhead. |
856 | return LT.first * 1; |
857 | } |
858 | |
859 | // Otherwise, assume that the cast is scalarized. |
860 | // TODO: If one of the types get legalized by splitting, handle this |
861 | // similarly to what getCastInstrCost() does. |
862 | if (ValTy->isVectorTy()) { |
863 | unsigned Num = ValTy->getVectorNumElements(); |
864 | if (CondTy) |
865 | CondTy = CondTy->getScalarType(); |
866 | unsigned Cost = static_cast<T *>(this)->getCmpSelInstrCost( |
867 | Opcode, ValTy->getScalarType(), CondTy, I); |
868 | |
869 | // Return the cost of multiple scalar invocation plus the cost of |
870 | // inserting and extracting the values. |
871 | return getScalarizationOverhead(ValTy, true, false) + Num * Cost; |
872 | } |
873 | |
874 | // Unknown scalar opcode. |
875 | return 1; |
876 | } |
877 | |
878 | unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) { |
879 | std::pair<unsigned, MVT> LT = |
880 | getTLI()->getTypeLegalizationCost(DL, Val->getScalarType()); |
881 | |
882 | return LT.first; |
883 | } |
884 | |
885 | unsigned getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, |
886 | unsigned AddressSpace, |
887 | const Instruction *I = nullptr) { |
888 | assert(!Src->isVoidTy() && "Invalid type")((!Src->isVoidTy() && "Invalid type") ? static_cast <void> (0) : __assert_fail ("!Src->isVoidTy() && \"Invalid type\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 888, __PRETTY_FUNCTION__)); |
889 | std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Src); |
890 | |
891 | // Assuming that all loads of legal types cost 1. |
892 | unsigned Cost = LT.first; |
893 | |
894 | if (Src->isVectorTy() && |
895 | Src->getPrimitiveSizeInBits() < LT.second.getSizeInBits()) { |
896 | // This is a vector load that legalizes to a larger type than the vector |
897 | // itself. Unless the corresponding extending load or truncating store is |
898 | // legal, then this will scalarize. |
899 | TargetLowering::LegalizeAction LA = TargetLowering::Expand; |
900 | EVT MemVT = getTLI()->getValueType(DL, Src); |
901 | if (Opcode == Instruction::Store) |
902 | LA = getTLI()->getTruncStoreAction(LT.second, MemVT); |
903 | else |
904 | LA = getTLI()->getLoadExtAction(ISD::EXTLOAD, LT.second, MemVT); |
905 | |
906 | if (LA != TargetLowering::Legal && LA != TargetLowering::Custom) { |
907 | // This is a vector load/store for some illegal type that is scalarized. |
908 | // We must account for the cost of building or decomposing the vector. |
909 | Cost += getScalarizationOverhead(Src, Opcode != Instruction::Store, |
910 | Opcode == Instruction::Store); |
911 | } |
912 | } |
913 | |
914 | return Cost; |
915 | } |
916 | |
917 | unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, |
918 | unsigned Factor, |
919 | ArrayRef<unsigned> Indices, |
920 | unsigned Alignment, unsigned AddressSpace, |
921 | bool UseMaskForCond = false, |
922 | bool UseMaskForGaps = false) { |
923 | VectorType *VT = dyn_cast<VectorType>(VecTy); |
924 | assert(VT && "Expect a vector type for interleaved memory op")((VT && "Expect a vector type for interleaved memory op" ) ? static_cast<void> (0) : __assert_fail ("VT && \"Expect a vector type for interleaved memory op\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 924, __PRETTY_FUNCTION__)); |
925 | |
926 | unsigned NumElts = VT->getNumElements(); |
927 | assert(Factor > 1 && NumElts % Factor == 0 && "Invalid interleave factor")((Factor > 1 && NumElts % Factor == 0 && "Invalid interleave factor" ) ? static_cast<void> (0) : __assert_fail ("Factor > 1 && NumElts % Factor == 0 && \"Invalid interleave factor\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 927, __PRETTY_FUNCTION__)); |
928 | |
929 | unsigned NumSubElts = NumElts / Factor; |
930 | VectorType *SubVT = VectorType::get(VT->getElementType(), NumSubElts); |
931 | |
932 | // Firstly, the cost of load/store operation. |
933 | unsigned Cost; |
934 | if (UseMaskForCond || UseMaskForGaps) |
935 | Cost = static_cast<T *>(this)->getMaskedMemoryOpCost( |
936 | Opcode, VecTy, Alignment, AddressSpace); |
937 | else |
938 | Cost = static_cast<T *>(this)->getMemoryOpCost( |
939 | Opcode, VecTy, MaybeAlign(Alignment), AddressSpace); |
940 | |
941 | // Legalize the vector type, and get the legalized and unlegalized type |
942 | // sizes. |
943 | MVT VecTyLT = getTLI()->getTypeLegalizationCost(DL, VecTy).second; |
944 | unsigned VecTySize = |
945 | static_cast<T *>(this)->getDataLayout().getTypeStoreSize(VecTy); |
946 | unsigned VecTyLTSize = VecTyLT.getStoreSize(); |
947 | |
948 | // Return the ceiling of dividing A by B. |
949 | auto ceil = [](unsigned A, unsigned B) { return (A + B - 1) / B; }; |
950 | |
951 | // Scale the cost of the memory operation by the fraction of legalized |
952 | // instructions that will actually be used. We shouldn't account for the |
953 | // cost of dead instructions since they will be removed. |
954 | // |
955 | // E.g., An interleaved load of factor 8: |
956 | // %vec = load <16 x i64>, <16 x i64>* %ptr |
957 | // %v0 = shufflevector %vec, undef, <0, 8> |
958 | // |
959 | // If <16 x i64> is legalized to 8 v2i64 loads, only 2 of the loads will be |
960 | // used (those corresponding to elements [0:1] and [8:9] of the unlegalized |
961 | // type). The other loads are unused. |
962 | // |
963 | // We only scale the cost of loads since interleaved store groups aren't |
964 | // allowed to have gaps. |
965 | if (Opcode == Instruction::Load && VecTySize > VecTyLTSize) { |
966 | // The number of loads of a legal type it will take to represent a load |
967 | // of the unlegalized vector type. |
968 | unsigned NumLegalInsts = ceil(VecTySize, VecTyLTSize); |
969 | |
970 | // The number of elements of the unlegalized type that correspond to a |
971 | // single legal instruction. |
972 | unsigned NumEltsPerLegalInst = ceil(NumElts, NumLegalInsts); |
973 | |
974 | // Determine which legal instructions will be used. |
975 | BitVector UsedInsts(NumLegalInsts, false); |
976 | for (unsigned Index : Indices) |
977 | for (unsigned Elt = 0; Elt < NumSubElts; ++Elt) |
978 | UsedInsts.set((Index + Elt * Factor) / NumEltsPerLegalInst); |
979 | |
980 | // Scale the cost of the load by the fraction of legal instructions that |
981 | // will be used. |
982 | Cost *= UsedInsts.count() / NumLegalInsts; |
983 | } |
984 | |
985 | // Then plus the cost of interleave operation. |
986 | if (Opcode == Instruction::Load) { |
987 | // The interleave cost is similar to extract sub vectors' elements |
988 | // from the wide vector, and insert them into sub vectors. |
989 | // |
990 | // E.g. An interleaved load of factor 2 (with one member of index 0): |
991 | // %vec = load <8 x i32>, <8 x i32>* %ptr |
992 | // %v0 = shuffle %vec, undef, <0, 2, 4, 6> ; Index 0 |
993 | // The cost is estimated as extract elements at 0, 2, 4, 6 from the |
994 | // <8 x i32> vector and insert them into a <4 x i32> vector. |
995 | |
996 | assert(Indices.size() <= Factor &&((Indices.size() <= Factor && "Interleaved memory op has too many members" ) ? static_cast<void> (0) : __assert_fail ("Indices.size() <= Factor && \"Interleaved memory op has too many members\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 997, __PRETTY_FUNCTION__)) |
997 | "Interleaved memory op has too many members")((Indices.size() <= Factor && "Interleaved memory op has too many members" ) ? static_cast<void> (0) : __assert_fail ("Indices.size() <= Factor && \"Interleaved memory op has too many members\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 997, __PRETTY_FUNCTION__)); |
998 | |
999 | for (unsigned Index : Indices) { |
1000 | assert(Index < Factor && "Invalid index for interleaved memory op")((Index < Factor && "Invalid index for interleaved memory op" ) ? static_cast<void> (0) : __assert_fail ("Index < Factor && \"Invalid index for interleaved memory op\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 1000, __PRETTY_FUNCTION__)); |
1001 | |
1002 | // Extract elements from loaded vector for each sub vector. |
1003 | for (unsigned i = 0; i < NumSubElts; i++) |
1004 | Cost += static_cast<T *>(this)->getVectorInstrCost( |
1005 | Instruction::ExtractElement, VT, Index + i * Factor); |
1006 | } |
1007 | |
1008 | unsigned InsSubCost = 0; |
1009 | for (unsigned i = 0; i < NumSubElts; i++) |
1010 | InsSubCost += static_cast<T *>(this)->getVectorInstrCost( |
1011 | Instruction::InsertElement, SubVT, i); |
1012 | |
1013 | Cost += Indices.size() * InsSubCost; |
1014 | } else { |
1015 | // The interleave cost is extract all elements from sub vectors, and |
1016 | // insert them into the wide vector. |
1017 | // |
1018 | // E.g. An interleaved store of factor 2: |
1019 | // %v0_v1 = shuffle %v0, %v1, <0, 4, 1, 5, 2, 6, 3, 7> |
1020 | // store <8 x i32> %interleaved.vec, <8 x i32>* %ptr |
1021 | // The cost is estimated as extract all elements from both <4 x i32> |
1022 | // vectors and insert into the <8 x i32> vector. |
1023 | |
1024 | unsigned ExtSubCost = 0; |
1025 | for (unsigned i = 0; i < NumSubElts; i++) |
1026 | ExtSubCost += static_cast<T *>(this)->getVectorInstrCost( |
1027 | Instruction::ExtractElement, SubVT, i); |
1028 | Cost += ExtSubCost * Factor; |
1029 | |
1030 | for (unsigned i = 0; i < NumElts; i++) |
1031 | Cost += static_cast<T *>(this) |
1032 | ->getVectorInstrCost(Instruction::InsertElement, VT, i); |
1033 | } |
1034 | |
1035 | if (!UseMaskForCond) |
1036 | return Cost; |
1037 | |
1038 | Type *I8Type = Type::getInt8Ty(VT->getContext()); |
1039 | VectorType *MaskVT = VectorType::get(I8Type, NumElts); |
1040 | SubVT = VectorType::get(I8Type, NumSubElts); |
1041 | |
1042 | // The Mask shuffling cost is extract all the elements of the Mask |
1043 | // and insert each of them Factor times into the wide vector: |
1044 | // |
1045 | // E.g. an interleaved group with factor 3: |
1046 | // %mask = icmp ult <8 x i32> %vec1, %vec2 |
1047 | // %interleaved.mask = shufflevector <8 x i1> %mask, <8 x i1> undef, |
1048 | // <24 x i32> <0,0,0,1,1,1,2,2,2,3,3,3,4,4,4,5,5,5,6,6,6,7,7,7> |
1049 | // The cost is estimated as extract all mask elements from the <8xi1> mask |
1050 | // vector and insert them factor times into the <24xi1> shuffled mask |
1051 | // vector. |
1052 | for (unsigned i = 0; i < NumSubElts; i++) |
1053 | Cost += static_cast<T *>(this)->getVectorInstrCost( |
1054 | Instruction::ExtractElement, SubVT, i); |
1055 | |
1056 | for (unsigned i = 0; i < NumElts; i++) |
1057 | Cost += static_cast<T *>(this)->getVectorInstrCost( |
1058 | Instruction::InsertElement, MaskVT, i); |
1059 | |
1060 | // The Gaps mask is invariant and created outside the loop, therefore the |
1061 | // cost of creating it is not accounted for here. However if we have both |
1062 | // a MaskForGaps and some other mask that guards the execution of the |
1063 | // memory access, we need to account for the cost of And-ing the two masks |
1064 | // inside the loop. |
1065 | if (UseMaskForGaps) |
1066 | Cost += static_cast<T *>(this)->getArithmeticInstrCost( |
1067 | BinaryOperator::And, MaskVT); |
1068 | |
1069 | return Cost; |
1070 | } |
1071 | |
1072 | /// Get intrinsic cost based on arguments. |
1073 | unsigned getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, |
1074 | ArrayRef<Value *> Args, FastMathFlags FMF, |
1075 | unsigned VF = 1) { |
1076 | unsigned RetVF = (RetTy->isVectorTy() ? RetTy->getVectorNumElements() : 1); |
1077 | assert((RetVF == 1 || VF == 1) && "VF > 1 and RetVF is a vector type")(((RetVF == 1 || VF == 1) && "VF > 1 and RetVF is a vector type" ) ? static_cast<void> (0) : __assert_fail ("(RetVF == 1 || VF == 1) && \"VF > 1 and RetVF is a vector type\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 1077, __PRETTY_FUNCTION__)); |
1078 | auto *ConcreteTTI = static_cast<T *>(this); |
1079 | |
1080 | switch (IID) { |
1081 | default: { |
1082 | // Assume that we need to scalarize this intrinsic. |
1083 | SmallVector<Type *, 4> Types; |
1084 | for (Value *Op : Args) { |
1085 | Type *OpTy = Op->getType(); |
1086 | assert(VF == 1 || !OpTy->isVectorTy())((VF == 1 || !OpTy->isVectorTy()) ? static_cast<void> (0) : __assert_fail ("VF == 1 || !OpTy->isVectorTy()", "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 1086, __PRETTY_FUNCTION__)); |
1087 | Types.push_back(VF == 1 ? OpTy : VectorType::get(OpTy, VF)); |
1088 | } |
1089 | |
1090 | if (VF > 1 && !RetTy->isVoidTy()) |
1091 | RetTy = VectorType::get(RetTy, VF); |
1092 | |
1093 | // Compute the scalarization overhead based on Args for a vector |
1094 | // intrinsic. A vectorizer will pass a scalar RetTy and VF > 1, while |
1095 | // CostModel will pass a vector RetTy and VF is 1. |
1096 | unsigned ScalarizationCost = std::numeric_limits<unsigned>::max(); |
1097 | if (RetVF > 1 || VF > 1) { |
1098 | ScalarizationCost = 0; |
1099 | if (!RetTy->isVoidTy()) |
1100 | ScalarizationCost += getScalarizationOverhead(RetTy, true, false); |
1101 | ScalarizationCost += getOperandsScalarizationOverhead(Args, VF); |
1102 | } |
1103 | |
1104 | return ConcreteTTI->getIntrinsicInstrCost(IID, RetTy, Types, FMF, |
1105 | ScalarizationCost); |
1106 | } |
1107 | case Intrinsic::masked_scatter: { |
1108 | assert(VF == 1 && "Can't vectorize types here.")((VF == 1 && "Can't vectorize types here.") ? static_cast <void> (0) : __assert_fail ("VF == 1 && \"Can't vectorize types here.\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 1108, __PRETTY_FUNCTION__)); |
1109 | Value *Mask = Args[3]; |
1110 | bool VarMask = !isa<Constant>(Mask); |
1111 | unsigned Alignment = cast<ConstantInt>(Args[2])->getZExtValue(); |
1112 | return ConcreteTTI->getGatherScatterOpCost( |
1113 | Instruction::Store, Args[0]->getType(), Args[1], VarMask, Alignment); |
1114 | } |
1115 | case Intrinsic::masked_gather: { |
1116 | assert(VF == 1 && "Can't vectorize types here.")((VF == 1 && "Can't vectorize types here.") ? static_cast <void> (0) : __assert_fail ("VF == 1 && \"Can't vectorize types here.\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 1116, __PRETTY_FUNCTION__)); |
1117 | Value *Mask = Args[2]; |
1118 | bool VarMask = !isa<Constant>(Mask); |
1119 | unsigned Alignment = cast<ConstantInt>(Args[1])->getZExtValue(); |
1120 | return ConcreteTTI->getGatherScatterOpCost(Instruction::Load, RetTy, |
1121 | Args[0], VarMask, Alignment); |
1122 | } |
1123 | case Intrinsic::experimental_vector_reduce_add: |
1124 | case Intrinsic::experimental_vector_reduce_mul: |
1125 | case Intrinsic::experimental_vector_reduce_and: |
1126 | case Intrinsic::experimental_vector_reduce_or: |
1127 | case Intrinsic::experimental_vector_reduce_xor: |
1128 | case Intrinsic::experimental_vector_reduce_v2_fadd: |
1129 | case Intrinsic::experimental_vector_reduce_v2_fmul: |
1130 | case Intrinsic::experimental_vector_reduce_smax: |
1131 | case Intrinsic::experimental_vector_reduce_smin: |
1132 | case Intrinsic::experimental_vector_reduce_fmax: |
1133 | case Intrinsic::experimental_vector_reduce_fmin: |
1134 | case Intrinsic::experimental_vector_reduce_umax: |
1135 | case Intrinsic::experimental_vector_reduce_umin: |
1136 | return getIntrinsicInstrCost(IID, RetTy, Args[0]->getType(), FMF); |
1137 | case Intrinsic::fshl: |
1138 | case Intrinsic::fshr: { |
1139 | Value *X = Args[0]; |
1140 | Value *Y = Args[1]; |
1141 | Value *Z = Args[2]; |
1142 | TTI::OperandValueProperties OpPropsX, OpPropsY, OpPropsZ, OpPropsBW; |
1143 | TTI::OperandValueKind OpKindX = TTI::getOperandInfo(X, OpPropsX); |
1144 | TTI::OperandValueKind OpKindY = TTI::getOperandInfo(Y, OpPropsY); |
1145 | TTI::OperandValueKind OpKindZ = TTI::getOperandInfo(Z, OpPropsZ); |
1146 | TTI::OperandValueKind OpKindBW = TTI::OK_UniformConstantValue; |
1147 | OpPropsBW = isPowerOf2_32(RetTy->getScalarSizeInBits()) ? TTI::OP_PowerOf2 |
1148 | : TTI::OP_None; |
1149 | // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW))) |
1150 | // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW)) |
1151 | unsigned Cost = 0; |
1152 | Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Or, RetTy); |
1153 | Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Sub, RetTy); |
1154 | Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Shl, RetTy, |
1155 | OpKindX, OpKindZ, OpPropsX); |
1156 | Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::LShr, RetTy, |
1157 | OpKindY, OpKindZ, OpPropsY); |
1158 | // Non-constant shift amounts requires a modulo. |
1159 | if (OpKindZ != TTI::OK_UniformConstantValue && |
1160 | OpKindZ != TTI::OK_NonUniformConstantValue) |
1161 | Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::URem, RetTy, |
1162 | OpKindZ, OpKindBW, OpPropsZ, |
1163 | OpPropsBW); |
1164 | // For non-rotates (X != Y) we must add shift-by-zero handling costs. |
1165 | if (X != Y) { |
1166 | Type *CondTy = RetTy->getWithNewBitWidth(1); |
1167 | Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, |
1168 | CondTy, nullptr); |
1169 | Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy, |
1170 | CondTy, nullptr); |
1171 | } |
1172 | return Cost; |
1173 | } |
1174 | } |
1175 | } |
1176 | |
1177 | /// Get intrinsic cost based on argument types. |
1178 | /// If ScalarizationCostPassed is std::numeric_limits<unsigned>::max(), the |
1179 | /// cost of scalarizing the arguments and the return value will be computed |
1180 | /// based on types. |
1181 | unsigned getIntrinsicInstrCost( |
1182 | Intrinsic::ID IID, Type *RetTy, ArrayRef<Type *> Tys, FastMathFlags FMF, |
1183 | unsigned ScalarizationCostPassed = std::numeric_limits<unsigned>::max()) { |
1184 | auto *ConcreteTTI = static_cast<T *>(this); |
1185 | |
1186 | SmallVector<unsigned, 2> ISDs; |
1187 | unsigned SingleCallCost = 10; // Library call cost. Make it expensive. |
1188 | switch (IID) { |
1189 | default: { |
1190 | // Assume that we need to scalarize this intrinsic. |
1191 | unsigned ScalarizationCost = ScalarizationCostPassed; |
1192 | unsigned ScalarCalls = 1; |
1193 | Type *ScalarRetTy = RetTy; |
1194 | if (RetTy->isVectorTy()) { |
1195 | if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max()) |
1196 | ScalarizationCost = getScalarizationOverhead(RetTy, true, false); |
1197 | ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements()); |
1198 | ScalarRetTy = RetTy->getScalarType(); |
1199 | } |
1200 | SmallVector<Type *, 4> ScalarTys; |
1201 | for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) { |
1202 | Type *Ty = Tys[i]; |
1203 | if (Ty->isVectorTy()) { |
1204 | if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max()) |
1205 | ScalarizationCost += getScalarizationOverhead(Ty, false, true); |
1206 | ScalarCalls = std::max(ScalarCalls, Ty->getVectorNumElements()); |
1207 | Ty = Ty->getScalarType(); |
1208 | } |
1209 | ScalarTys.push_back(Ty); |
1210 | } |
1211 | if (ScalarCalls == 1) |
1212 | return 1; // Return cost of a scalar intrinsic. Assume it to be cheap. |
1213 | |
1214 | unsigned ScalarCost = |
1215 | ConcreteTTI->getIntrinsicInstrCost(IID, ScalarRetTy, ScalarTys, FMF); |
1216 | |
1217 | return ScalarCalls * ScalarCost + ScalarizationCost; |
1218 | } |
1219 | // Look for intrinsics that can be lowered directly or turned into a scalar |
1220 | // intrinsic call. |
1221 | case Intrinsic::sqrt: |
1222 | ISDs.push_back(ISD::FSQRT); |
1223 | break; |
1224 | case Intrinsic::sin: |
1225 | ISDs.push_back(ISD::FSIN); |
1226 | break; |
1227 | case Intrinsic::cos: |
1228 | ISDs.push_back(ISD::FCOS); |
1229 | break; |
1230 | case Intrinsic::exp: |
1231 | ISDs.push_back(ISD::FEXP); |
1232 | break; |
1233 | case Intrinsic::exp2: |
1234 | ISDs.push_back(ISD::FEXP2); |
1235 | break; |
1236 | case Intrinsic::log: |
1237 | ISDs.push_back(ISD::FLOG); |
1238 | break; |
1239 | case Intrinsic::log10: |
1240 | ISDs.push_back(ISD::FLOG10); |
1241 | break; |
1242 | case Intrinsic::log2: |
1243 | ISDs.push_back(ISD::FLOG2); |
1244 | break; |
1245 | case Intrinsic::fabs: |
1246 | ISDs.push_back(ISD::FABS); |
1247 | break; |
1248 | case Intrinsic::canonicalize: |
1249 | ISDs.push_back(ISD::FCANONICALIZE); |
1250 | break; |
1251 | case Intrinsic::minnum: |
1252 | ISDs.push_back(ISD::FMINNUM); |
1253 | if (FMF.noNaNs()) |
1254 | ISDs.push_back(ISD::FMINIMUM); |
1255 | break; |
1256 | case Intrinsic::maxnum: |
1257 | ISDs.push_back(ISD::FMAXNUM); |
1258 | if (FMF.noNaNs()) |
1259 | ISDs.push_back(ISD::FMAXIMUM); |
1260 | break; |
1261 | case Intrinsic::copysign: |
1262 | ISDs.push_back(ISD::FCOPYSIGN); |
1263 | break; |
1264 | case Intrinsic::floor: |
1265 | ISDs.push_back(ISD::FFLOOR); |
1266 | break; |
1267 | case Intrinsic::ceil: |
1268 | ISDs.push_back(ISD::FCEIL); |
1269 | break; |
1270 | case Intrinsic::trunc: |
1271 | ISDs.push_back(ISD::FTRUNC); |
1272 | break; |
1273 | case Intrinsic::nearbyint: |
1274 | ISDs.push_back(ISD::FNEARBYINT); |
1275 | break; |
1276 | case Intrinsic::rint: |
1277 | ISDs.push_back(ISD::FRINT); |
1278 | break; |
1279 | case Intrinsic::round: |
1280 | ISDs.push_back(ISD::FROUND); |
1281 | break; |
1282 | case Intrinsic::pow: |
1283 | ISDs.push_back(ISD::FPOW); |
1284 | break; |
1285 | case Intrinsic::fma: |
1286 | ISDs.push_back(ISD::FMA); |
1287 | break; |
1288 | case Intrinsic::fmuladd: |
1289 | ISDs.push_back(ISD::FMA); |
1290 | break; |
1291 | case Intrinsic::experimental_constrained_fmuladd: |
1292 | ISDs.push_back(ISD::STRICT_FMA); |
1293 | break; |
1294 | // FIXME: We should return 0 whenever getIntrinsicCost == TCC_Free. |
1295 | case Intrinsic::lifetime_start: |
1296 | case Intrinsic::lifetime_end: |
1297 | case Intrinsic::sideeffect: |
1298 | return 0; |
1299 | case Intrinsic::masked_store: |
1300 | return ConcreteTTI->getMaskedMemoryOpCost(Instruction::Store, Tys[0], 0, |
1301 | 0); |
1302 | case Intrinsic::masked_load: |
1303 | return ConcreteTTI->getMaskedMemoryOpCost(Instruction::Load, RetTy, 0, 0); |
1304 | case Intrinsic::experimental_vector_reduce_add: |
1305 | return ConcreteTTI->getArithmeticReductionCost(Instruction::Add, Tys[0], |
1306 | /*IsPairwiseForm=*/false); |
1307 | case Intrinsic::experimental_vector_reduce_mul: |
1308 | return ConcreteTTI->getArithmeticReductionCost(Instruction::Mul, Tys[0], |
1309 | /*IsPairwiseForm=*/false); |
1310 | case Intrinsic::experimental_vector_reduce_and: |
1311 | return ConcreteTTI->getArithmeticReductionCost(Instruction::And, Tys[0], |
1312 | /*IsPairwiseForm=*/false); |
1313 | case Intrinsic::experimental_vector_reduce_or: |
1314 | return ConcreteTTI->getArithmeticReductionCost(Instruction::Or, Tys[0], |
1315 | /*IsPairwiseForm=*/false); |
1316 | case Intrinsic::experimental_vector_reduce_xor: |
1317 | return ConcreteTTI->getArithmeticReductionCost(Instruction::Xor, Tys[0], |
1318 | /*IsPairwiseForm=*/false); |
1319 | case Intrinsic::experimental_vector_reduce_v2_fadd: |
1320 | return ConcreteTTI->getArithmeticReductionCost( |
1321 | Instruction::FAdd, Tys[0], |
1322 | /*IsPairwiseForm=*/false); // FIXME: Add new flag for cost of strict |
1323 | // reductions. |
1324 | case Intrinsic::experimental_vector_reduce_v2_fmul: |
1325 | return ConcreteTTI->getArithmeticReductionCost( |
1326 | Instruction::FMul, Tys[0], |
1327 | /*IsPairwiseForm=*/false); // FIXME: Add new flag for cost of strict |
1328 | // reductions. |
1329 | case Intrinsic::experimental_vector_reduce_smax: |
1330 | case Intrinsic::experimental_vector_reduce_smin: |
1331 | case Intrinsic::experimental_vector_reduce_fmax: |
1332 | case Intrinsic::experimental_vector_reduce_fmin: |
1333 | return ConcreteTTI->getMinMaxReductionCost( |
1334 | Tys[0], CmpInst::makeCmpResultType(Tys[0]), /*IsPairwiseForm=*/false, |
1335 | /*IsUnsigned=*/true); |
1336 | case Intrinsic::experimental_vector_reduce_umax: |
1337 | case Intrinsic::experimental_vector_reduce_umin: |
1338 | return ConcreteTTI->getMinMaxReductionCost( |
1339 | Tys[0], CmpInst::makeCmpResultType(Tys[0]), /*IsPairwiseForm=*/false, |
1340 | /*IsUnsigned=*/false); |
1341 | case Intrinsic::sadd_sat: |
1342 | case Intrinsic::ssub_sat: { |
1343 | Type *CondTy = RetTy->getWithNewBitWidth(1); |
1344 | |
1345 | Type *OpTy = StructType::create({RetTy, CondTy}); |
1346 | Intrinsic::ID OverflowOp = IID == Intrinsic::sadd_sat |
1347 | ? Intrinsic::sadd_with_overflow |
1348 | : Intrinsic::ssub_with_overflow; |
1349 | |
1350 | // SatMax -> Overflow && SumDiff < 0 |
1351 | // SatMin -> Overflow && SumDiff >= 0 |
1352 | unsigned Cost = 0; |
1353 | Cost += ConcreteTTI->getIntrinsicInstrCost( |
1354 | OverflowOp, OpTy, {RetTy, RetTy}, FMF, ScalarizationCostPassed); |
1355 | Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, |
1356 | CondTy, nullptr); |
1357 | Cost += 2 * ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy, |
1358 | CondTy, nullptr); |
1359 | return Cost; |
1360 | } |
1361 | case Intrinsic::uadd_sat: |
1362 | case Intrinsic::usub_sat: { |
1363 | Type *CondTy = RetTy->getWithNewBitWidth(1); |
1364 | |
1365 | Type *OpTy = StructType::create({RetTy, CondTy}); |
1366 | Intrinsic::ID OverflowOp = IID == Intrinsic::uadd_sat |
1367 | ? Intrinsic::uadd_with_overflow |
1368 | : Intrinsic::usub_with_overflow; |
1369 | |
1370 | unsigned Cost = 0; |
1371 | Cost += ConcreteTTI->getIntrinsicInstrCost( |
1372 | OverflowOp, OpTy, {RetTy, RetTy}, FMF, ScalarizationCostPassed); |
1373 | Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy, |
1374 | CondTy, nullptr); |
1375 | return Cost; |
1376 | } |
1377 | case Intrinsic::smul_fix: |
1378 | case Intrinsic::umul_fix: { |
1379 | unsigned ExtSize = RetTy->getScalarSizeInBits() * 2; |
1380 | Type *ExtTy = RetTy->getWithNewBitWidth(ExtSize); |
1381 | |
1382 | unsigned ExtOp = |
1383 | IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt; |
1384 | |
1385 | unsigned Cost = 0; |
1386 | Cost += 2 * ConcreteTTI->getCastInstrCost(ExtOp, ExtTy, RetTy); |
1387 | Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Mul, ExtTy); |
1388 | Cost += |
1389 | 2 * ConcreteTTI->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy); |
1390 | Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::LShr, RetTy, |
1391 | TTI::OK_AnyValue, |
1392 | TTI::OK_UniformConstantValue); |
1393 | Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Shl, RetTy, |
1394 | TTI::OK_AnyValue, |
1395 | TTI::OK_UniformConstantValue); |
1396 | Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Or, RetTy); |
1397 | return Cost; |
1398 | } |
1399 | case Intrinsic::sadd_with_overflow: |
1400 | case Intrinsic::ssub_with_overflow: { |
1401 | Type *SumTy = RetTy->getContainedType(0); |
1402 | Type *OverflowTy = RetTy->getContainedType(1); |
1403 | unsigned Opcode = IID == Intrinsic::sadd_with_overflow |
1404 | ? BinaryOperator::Add |
1405 | : BinaryOperator::Sub; |
1406 | |
1407 | // LHSSign -> LHS >= 0 |
1408 | // RHSSign -> RHS >= 0 |
1409 | // SumSign -> Sum >= 0 |
1410 | // |
1411 | // Add: |
1412 | // Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign) |
1413 | // Sub: |
1414 | // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign) |
1415 | unsigned Cost = 0; |
1416 | Cost += ConcreteTTI->getArithmeticInstrCost(Opcode, SumTy); |
1417 | Cost += 3 * ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy, |
1418 | OverflowTy, nullptr); |
1419 | Cost += 2 * ConcreteTTI->getCmpSelInstrCost( |
1420 | BinaryOperator::ICmp, OverflowTy, OverflowTy, nullptr); |
1421 | Cost += |
1422 | ConcreteTTI->getArithmeticInstrCost(BinaryOperator::And, OverflowTy); |
1423 | return Cost; |
1424 | } |
1425 | case Intrinsic::uadd_with_overflow: |
1426 | case Intrinsic::usub_with_overflow: { |
1427 | Type *SumTy = RetTy->getContainedType(0); |
1428 | Type *OverflowTy = RetTy->getContainedType(1); |
1429 | unsigned Opcode = IID == Intrinsic::uadd_with_overflow |
1430 | ? BinaryOperator::Add |
1431 | : BinaryOperator::Sub; |
1432 | |
1433 | unsigned Cost = 0; |
1434 | Cost += ConcreteTTI->getArithmeticInstrCost(Opcode, SumTy); |
1435 | Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy, |
1436 | OverflowTy, nullptr); |
1437 | return Cost; |
1438 | } |
1439 | case Intrinsic::smul_with_overflow: |
1440 | case Intrinsic::umul_with_overflow: { |
1441 | Type *MulTy = RetTy->getContainedType(0); |
1442 | Type *OverflowTy = RetTy->getContainedType(1); |
1443 | unsigned ExtSize = MulTy->getScalarSizeInBits() * 2; |
1444 | Type *ExtTy = MulTy->getWithNewBitWidth(ExtSize); |
1445 | |
1446 | unsigned ExtOp = |
1447 | IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt; |
1448 | |
1449 | unsigned Cost = 0; |
1450 | Cost += 2 * ConcreteTTI->getCastInstrCost(ExtOp, ExtTy, MulTy); |
1451 | Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Mul, ExtTy); |
1452 | Cost += |
1453 | 2 * ConcreteTTI->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy); |
1454 | Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::LShr, MulTy, |
1455 | TTI::OK_AnyValue, |
1456 | TTI::OK_UniformConstantValue); |
1457 | |
1458 | if (IID == Intrinsic::smul_with_overflow) |
1459 | Cost += ConcreteTTI->getArithmeticInstrCost( |
1460 | Instruction::AShr, MulTy, TTI::OK_AnyValue, |
1461 | TTI::OK_UniformConstantValue); |
1462 | |
1463 | Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, MulTy, |
1464 | OverflowTy, nullptr); |
1465 | return Cost; |
1466 | } |
1467 | case Intrinsic::ctpop: |
1468 | ISDs.push_back(ISD::CTPOP); |
1469 | // In case of legalization use TCC_Expensive. This is cheaper than a |
1470 | // library call but still not a cheap instruction. |
1471 | SingleCallCost = TargetTransformInfo::TCC_Expensive; |
1472 | break; |
1473 | // FIXME: ctlz, cttz, ... |
1474 | case Intrinsic::bswap: |
1475 | ISDs.push_back(ISD::BSWAP); |
1476 | break; |
1477 | case Intrinsic::bitreverse: |
1478 | ISDs.push_back(ISD::BITREVERSE); |
1479 | break; |
1480 | } |
1481 | |
1482 | const TargetLoweringBase *TLI = getTLI(); |
1483 | std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, RetTy); |
1484 | |
1485 | SmallVector<unsigned, 2> LegalCost; |
1486 | SmallVector<unsigned, 2> CustomCost; |
1487 | for (unsigned ISD : ISDs) { |
1488 | if (TLI->isOperationLegalOrPromote(ISD, LT.second)) { |
1489 | if (IID == Intrinsic::fabs && LT.second.isFloatingPoint() && |
1490 | TLI->isFAbsFree(LT.second)) { |
1491 | return 0; |
1492 | } |
1493 | |
1494 | // The operation is legal. Assume it costs 1. |
1495 | // If the type is split to multiple registers, assume that there is some |
1496 | // overhead to this. |
1497 | // TODO: Once we have extract/insert subvector cost we need to use them. |
1498 | if (LT.first > 1) |
1499 | LegalCost.push_back(LT.first * 2); |
1500 | else |
1501 | LegalCost.push_back(LT.first * 1); |
1502 | } else if (!TLI->isOperationExpand(ISD, LT.second)) { |
1503 | // If the operation is custom lowered then assume |
1504 | // that the code is twice as expensive. |
1505 | CustomCost.push_back(LT.first * 2); |
1506 | } |
1507 | } |
1508 | |
1509 | auto MinLegalCostI = std::min_element(LegalCost.begin(), LegalCost.end()); |
1510 | if (MinLegalCostI != LegalCost.end()) |
1511 | return *MinLegalCostI; |
1512 | |
1513 | auto MinCustomCostI = |
1514 | std::min_element(CustomCost.begin(), CustomCost.end()); |
1515 | if (MinCustomCostI != CustomCost.end()) |
1516 | return *MinCustomCostI; |
1517 | |
1518 | // If we can't lower fmuladd into an FMA estimate the cost as a floating |
1519 | // point mul followed by an add. |
1520 | if (IID == Intrinsic::fmuladd) |
1521 | return ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FMul, RetTy) + |
1522 | ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FAdd, RetTy); |
1523 | if (IID == Intrinsic::experimental_constrained_fmuladd) |
1524 | return ConcreteTTI->getIntrinsicCost( |
1525 | Intrinsic::experimental_constrained_fmul, RetTy, Tys, |
1526 | nullptr) + |
1527 | ConcreteTTI->getIntrinsicCost( |
1528 | Intrinsic::experimental_constrained_fadd, RetTy, Tys, nullptr); |
1529 | |
1530 | // Else, assume that we need to scalarize this intrinsic. For math builtins |
1531 | // this will emit a costly libcall, adding call overhead and spills. Make it |
1532 | // very expensive. |
1533 | if (RetTy->isVectorTy()) { |
1534 | unsigned ScalarizationCost = |
1535 | ((ScalarizationCostPassed != std::numeric_limits<unsigned>::max()) |
1536 | ? ScalarizationCostPassed |
1537 | : getScalarizationOverhead(RetTy, true, false)); |
1538 | unsigned ScalarCalls = RetTy->getVectorNumElements(); |
1539 | SmallVector<Type *, 4> ScalarTys; |
1540 | for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) { |
1541 | Type *Ty = Tys[i]; |
1542 | if (Ty->isVectorTy()) |
1543 | Ty = Ty->getScalarType(); |
1544 | ScalarTys.push_back(Ty); |
1545 | } |
1546 | unsigned ScalarCost = ConcreteTTI->getIntrinsicInstrCost( |
1547 | IID, RetTy->getScalarType(), ScalarTys, FMF); |
1548 | for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) { |
1549 | if (Tys[i]->isVectorTy()) { |
1550 | if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max()) |
1551 | ScalarizationCost += getScalarizationOverhead(Tys[i], false, true); |
1552 | ScalarCalls = std::max(ScalarCalls, Tys[i]->getVectorNumElements()); |
1553 | } |
1554 | } |
1555 | |
1556 | return ScalarCalls * ScalarCost + ScalarizationCost; |
1557 | } |
1558 | |
1559 | // This is going to be turned into a library call, make it expensive. |
1560 | return SingleCallCost; |
1561 | } |
1562 | |
1563 | /// Compute a cost of the given call instruction. |
1564 | /// |
1565 | /// Compute the cost of calling function F with return type RetTy and |
1566 | /// argument types Tys. F might be nullptr, in this case the cost of an |
1567 | /// arbitrary call with the specified signature will be returned. |
1568 | /// This is used, for instance, when we estimate call of a vector |
1569 | /// counterpart of the given function. |
1570 | /// \param F Called function, might be nullptr. |
1571 | /// \param RetTy Return value types. |
1572 | /// \param Tys Argument types. |
1573 | /// \returns The cost of Call instruction. |
1574 | unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) { |
1575 | return 10; |
1576 | } |
1577 | |
1578 | unsigned getNumberOfParts(Type *Tp) { |
1579 | std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Tp); |
1580 | return LT.first; |
1581 | } |
1582 | |
1583 | unsigned getAddressComputationCost(Type *Ty, ScalarEvolution *, |
1584 | const SCEV *) { |
1585 | return 0; |
1586 | } |
1587 | |
1588 | /// Try to calculate arithmetic and shuffle op costs for reduction operations. |
1589 | /// We're assuming that reduction operation are performing the following way: |
1590 | /// 1. Non-pairwise reduction |
1591 | /// %val1 = shufflevector<n x t> %val, <n x t> %undef, |
1592 | /// <n x i32> <i32 n/2, i32 n/2 + 1, ..., i32 n, i32 undef, ..., i32 undef> |
1593 | /// \----------------v-------------/ \----------v------------/ |
1594 | /// n/2 elements n/2 elements |
1595 | /// %red1 = op <n x t> %val, <n x t> val1 |
1596 | /// After this operation we have a vector %red1 where only the first n/2 |
1597 | /// elements are meaningful, the second n/2 elements are undefined and can be |
1598 | /// dropped. All other operations are actually working with the vector of |
1599 | /// length n/2, not n, though the real vector length is still n. |
1600 | /// %val2 = shufflevector<n x t> %red1, <n x t> %undef, |
1601 | /// <n x i32> <i32 n/4, i32 n/4 + 1, ..., i32 n/2, i32 undef, ..., i32 undef> |
1602 | /// \----------------v-------------/ \----------v------------/ |
1603 | /// n/4 elements 3*n/4 elements |
1604 | /// %red2 = op <n x t> %red1, <n x t> val2 - working with the vector of |
1605 | /// length n/2, the resulting vector has length n/4 etc. |
1606 | /// 2. Pairwise reduction: |
1607 | /// Everything is the same except for an additional shuffle operation which |
1608 | /// is used to produce operands for pairwise kind of reductions. |
1609 | /// %val1 = shufflevector<n x t> %val, <n x t> %undef, |
1610 | /// <n x i32> <i32 0, i32 2, ..., i32 n-2, i32 undef, ..., i32 undef> |
1611 | /// \-------------v----------/ \----------v------------/ |
1612 | /// n/2 elements n/2 elements |
1613 | /// %val2 = shufflevector<n x t> %val, <n x t> %undef, |
1614 | /// <n x i32> <i32 1, i32 3, ..., i32 n-1, i32 undef, ..., i32 undef> |
1615 | /// \-------------v----------/ \----------v------------/ |
1616 | /// n/2 elements n/2 elements |
1617 | /// %red1 = op <n x t> %val1, <n x t> val2 |
1618 | /// Again, the operation is performed on <n x t> vector, but the resulting |
1619 | /// vector %red1 is <n/2 x t> vector. |
1620 | /// |
1621 | /// The cost model should take into account that the actual length of the |
1622 | /// vector is reduced on each iteration. |
1623 | unsigned getArithmeticReductionCost(unsigned Opcode, Type *Ty, |
1624 | bool IsPairwise) { |
1625 | assert(Ty->isVectorTy() && "Expect a vector type")((Ty->isVectorTy() && "Expect a vector type") ? static_cast <void> (0) : __assert_fail ("Ty->isVectorTy() && \"Expect a vector type\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 1625, __PRETTY_FUNCTION__)); |
1626 | Type *ScalarTy = Ty->getVectorElementType(); |
1627 | unsigned NumVecElts = Ty->getVectorNumElements(); |
1628 | unsigned NumReduxLevels = Log2_32(NumVecElts); |
1629 | unsigned ArithCost = 0; |
1630 | unsigned ShuffleCost = 0; |
1631 | auto *ConcreteTTI = static_cast<T *>(this); |
1632 | std::pair<unsigned, MVT> LT = |
1633 | ConcreteTTI->getTLI()->getTypeLegalizationCost(DL, Ty); |
1634 | unsigned LongVectorCount = 0; |
1635 | unsigned MVTLen = |
1636 | LT.second.isVector() ? LT.second.getVectorNumElements() : 1; |
1637 | while (NumVecElts > MVTLen) { |
1638 | NumVecElts /= 2; |
1639 | Type *SubTy = VectorType::get(ScalarTy, NumVecElts); |
1640 | // Assume the pairwise shuffles add a cost. |
1641 | ShuffleCost += (IsPairwise + 1) * |
1642 | ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty, |
1643 | NumVecElts, SubTy); |
1644 | ArithCost += ConcreteTTI->getArithmeticInstrCost(Opcode, SubTy); |
1645 | Ty = SubTy; |
1646 | ++LongVectorCount; |
1647 | } |
1648 | |
1649 | NumReduxLevels -= LongVectorCount; |
1650 | |
1651 | // The minimal length of the vector is limited by the real length of vector |
1652 | // operations performed on the current platform. That's why several final |
1653 | // reduction operations are performed on the vectors with the same |
1654 | // architecture-dependent length. |
1655 | |
1656 | // Non pairwise reductions need one shuffle per reduction level. Pairwise |
1657 | // reductions need two shuffles on every level, but the last one. On that |
1658 | // level one of the shuffles is <0, u, u, ...> which is identity. |
1659 | unsigned NumShuffles = NumReduxLevels; |
1660 | if (IsPairwise && NumReduxLevels >= 1) |
1661 | NumShuffles += NumReduxLevels - 1; |
1662 | ShuffleCost += NumShuffles * |
1663 | ConcreteTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty, |
1664 | 0, Ty); |
1665 | ArithCost += NumReduxLevels * |
1666 | ConcreteTTI->getArithmeticInstrCost(Opcode, Ty); |
1667 | return ShuffleCost + ArithCost + |
1668 | ConcreteTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, 0); |
1669 | } |
1670 | |
1671 | /// Try to calculate op costs for min/max reduction operations. |
1672 | /// \param CondTy Conditional type for the Select instruction. |
1673 | unsigned getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwise, |
1674 | bool) { |
1675 | assert(Ty->isVectorTy() && "Expect a vector type")((Ty->isVectorTy() && "Expect a vector type") ? static_cast <void> (0) : __assert_fail ("Ty->isVectorTy() && \"Expect a vector type\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 1675, __PRETTY_FUNCTION__)); |
1676 | Type *ScalarTy = Ty->getVectorElementType(); |
1677 | Type *ScalarCondTy = CondTy->getVectorElementType(); |
1678 | unsigned NumVecElts = Ty->getVectorNumElements(); |
1679 | unsigned NumReduxLevels = Log2_32(NumVecElts); |
1680 | unsigned CmpOpcode; |
1681 | if (Ty->isFPOrFPVectorTy()) { |
1682 | CmpOpcode = Instruction::FCmp; |
1683 | } else { |
1684 | assert(Ty->isIntOrIntVectorTy() &&((Ty->isIntOrIntVectorTy() && "expecting floating point or integer type for min/max reduction" ) ? static_cast<void> (0) : __assert_fail ("Ty->isIntOrIntVectorTy() && \"expecting floating point or integer type for min/max reduction\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 1685, __PRETTY_FUNCTION__)) |
1685 | "expecting floating point or integer type for min/max reduction")((Ty->isIntOrIntVectorTy() && "expecting floating point or integer type for min/max reduction" ) ? static_cast<void> (0) : __assert_fail ("Ty->isIntOrIntVectorTy() && \"expecting floating point or integer type for min/max reduction\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 1685, __PRETTY_FUNCTION__)); |
1686 | CmpOpcode = Instruction::ICmp; |
1687 | } |
1688 | unsigned MinMaxCost = 0; |
1689 | unsigned ShuffleCost = 0; |
1690 | auto *ConcreteTTI = static_cast<T *>(this); |
1691 | std::pair<unsigned, MVT> LT = |
1692 | ConcreteTTI->getTLI()->getTypeLegalizationCost(DL, Ty); |
1693 | unsigned LongVectorCount = 0; |
1694 | unsigned MVTLen = |
1695 | LT.second.isVector() ? LT.second.getVectorNumElements() : 1; |
1696 | while (NumVecElts > MVTLen) { |
1697 | NumVecElts /= 2; |
1698 | Type *SubTy = VectorType::get(ScalarTy, NumVecElts); |
1699 | CondTy = VectorType::get(ScalarCondTy, NumVecElts); |
1700 | |
1701 | // Assume the pairwise shuffles add a cost. |
1702 | ShuffleCost += (IsPairwise + 1) * |
1703 | ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty, |
1704 | NumVecElts, SubTy); |
1705 | MinMaxCost += |
1706 | ConcreteTTI->getCmpSelInstrCost(CmpOpcode, SubTy, CondTy, nullptr) + |
1707 | ConcreteTTI->getCmpSelInstrCost(Instruction::Select, SubTy, CondTy, |
1708 | nullptr); |
1709 | Ty = SubTy; |
1710 | ++LongVectorCount; |
1711 | } |
1712 | |
1713 | NumReduxLevels -= LongVectorCount; |
1714 | |
1715 | // The minimal length of the vector is limited by the real length of vector |
1716 | // operations performed on the current platform. That's why several final |
1717 | // reduction opertions are perfomed on the vectors with the same |
1718 | // architecture-dependent length. |
1719 | |
1720 | // Non pairwise reductions need one shuffle per reduction level. Pairwise |
1721 | // reductions need two shuffles on every level, but the last one. On that |
1722 | // level one of the shuffles is <0, u, u, ...> which is identity. |
1723 | unsigned NumShuffles = NumReduxLevels; |
1724 | if (IsPairwise && NumReduxLevels >= 1) |
1725 | NumShuffles += NumReduxLevels - 1; |
1726 | ShuffleCost += NumShuffles * |
1727 | ConcreteTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty, |
1728 | 0, Ty); |
1729 | MinMaxCost += |
1730 | NumReduxLevels * |
1731 | (ConcreteTTI->getCmpSelInstrCost(CmpOpcode, Ty, CondTy, nullptr) + |
1732 | ConcreteTTI->getCmpSelInstrCost(Instruction::Select, Ty, CondTy, |
1733 | nullptr)); |
1734 | // The last min/max should be in vector registers and we counted it above. |
1735 | // So just need a single extractelement. |
1736 | return ShuffleCost + MinMaxCost + |
1737 | ConcreteTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, 0); |
1738 | } |
1739 | |
1740 | unsigned getVectorSplitCost() { return 1; } |
1741 | |
1742 | /// @} |
1743 | }; |
1744 | |
1745 | /// Concrete BasicTTIImpl that can be used if no further customization |
1746 | /// is needed. |
1747 | class BasicTTIImpl : public BasicTTIImplBase<BasicTTIImpl> { |
1748 | using BaseT = BasicTTIImplBase<BasicTTIImpl>; |
1749 | |
1750 | friend class BasicTTIImplBase<BasicTTIImpl>; |
1751 | |
1752 | const TargetSubtargetInfo *ST; |
1753 | const TargetLoweringBase *TLI; |
1754 | |
1755 | const TargetSubtargetInfo *getST() const { return ST; } |
1756 | const TargetLoweringBase *getTLI() const { return TLI; } |
1757 | |
1758 | public: |
1759 | explicit BasicTTIImpl(const TargetMachine *TM, const Function &F); |
1760 | }; |
1761 | |
1762 | } // end namespace llvm |
1763 | |
1764 | #endif // LLVM_CODEGEN_BASICTTIIMPL_H |