File: | llvm/include/llvm/Analysis/TargetTransformInfoImpl.h |
Warning: | line 76, column 25 Called C++ object pointer is null |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===-- PPCTargetTransformInfo.cpp - PPC specific TTI ---------------------===// | |||
2 | // | |||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |||
4 | // See https://llvm.org/LICENSE.txt for license information. | |||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |||
6 | // | |||
7 | //===----------------------------------------------------------------------===// | |||
8 | ||||
9 | #include "PPCTargetTransformInfo.h" | |||
10 | #include "llvm/Analysis/CodeMetrics.h" | |||
11 | #include "llvm/Analysis/TargetTransformInfo.h" | |||
12 | #include "llvm/CodeGen/BasicTTIImpl.h" | |||
13 | #include "llvm/CodeGen/CostTable.h" | |||
14 | #include "llvm/CodeGen/TargetLowering.h" | |||
15 | #include "llvm/CodeGen/TargetSchedule.h" | |||
16 | #include "llvm/Support/CommandLine.h" | |||
17 | #include "llvm/Support/Debug.h" | |||
18 | using namespace llvm; | |||
19 | ||||
20 | #define DEBUG_TYPE"ppctti" "ppctti" | |||
21 | ||||
22 | static cl::opt<bool> DisablePPCConstHoist("disable-ppc-constant-hoisting", | |||
23 | cl::desc("disable constant hoisting on PPC"), cl::init(false), cl::Hidden); | |||
24 | ||||
25 | // This is currently only used for the data prefetch pass which is only enabled | |||
26 | // for BG/Q by default. | |||
27 | static cl::opt<unsigned> | |||
28 | CacheLineSize("ppc-loop-prefetch-cache-line", cl::Hidden, cl::init(64), | |||
29 | cl::desc("The loop prefetch cache line size")); | |||
30 | ||||
31 | static cl::opt<bool> | |||
32 | EnablePPCColdCC("ppc-enable-coldcc", cl::Hidden, cl::init(false), | |||
33 | cl::desc("Enable using coldcc calling conv for cold " | |||
34 | "internal functions")); | |||
35 | ||||
36 | static cl::opt<bool> | |||
37 | LsrNoInsnsCost("ppc-lsr-no-insns-cost", cl::Hidden, cl::init(false), | |||
38 | cl::desc("Do not add instruction count to lsr cost model")); | |||
39 | ||||
40 | // The latency of mtctr is only justified if there are more than 4 | |||
41 | // comparisons that will be removed as a result. | |||
42 | static cl::opt<unsigned> | |||
43 | SmallCTRLoopThreshold("min-ctr-loop-threshold", cl::init(4), cl::Hidden, | |||
44 | cl::desc("Loops with a constant trip count smaller than " | |||
45 | "this value will not use the count register.")); | |||
46 | ||||
47 | //===----------------------------------------------------------------------===// | |||
48 | // | |||
49 | // PPC cost model. | |||
50 | // | |||
51 | //===----------------------------------------------------------------------===// | |||
52 | ||||
53 | TargetTransformInfo::PopcntSupportKind | |||
54 | PPCTTIImpl::getPopcntSupport(unsigned TyWidth) { | |||
55 | assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2")((isPowerOf2_32(TyWidth) && "Ty width must be power of 2" ) ? static_cast<void> (0) : __assert_fail ("isPowerOf2_32(TyWidth) && \"Ty width must be power of 2\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp" , 55, __PRETTY_FUNCTION__)); | |||
56 | if (ST->hasPOPCNTD() != PPCSubtarget::POPCNTD_Unavailable && TyWidth <= 64) | |||
57 | return ST->hasPOPCNTD() == PPCSubtarget::POPCNTD_Slow ? | |||
58 | TTI::PSK_SlowHardware : TTI::PSK_FastHardware; | |||
59 | return TTI::PSK_Software; | |||
60 | } | |||
61 | ||||
62 | int PPCTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) { | |||
63 | if (DisablePPCConstHoist) | |||
64 | return BaseT::getIntImmCost(Imm, Ty); | |||
65 | ||||
66 | assert(Ty->isIntegerTy())((Ty->isIntegerTy()) ? static_cast<void> (0) : __assert_fail ("Ty->isIntegerTy()", "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp" , 66, __PRETTY_FUNCTION__)); | |||
67 | ||||
68 | unsigned BitSize = Ty->getPrimitiveSizeInBits(); | |||
69 | if (BitSize == 0) | |||
70 | return ~0U; | |||
71 | ||||
72 | if (Imm == 0) | |||
73 | return TTI::TCC_Free; | |||
74 | ||||
75 | if (Imm.getBitWidth() <= 64) { | |||
76 | if (isInt<16>(Imm.getSExtValue())) | |||
77 | return TTI::TCC_Basic; | |||
78 | ||||
79 | if (isInt<32>(Imm.getSExtValue())) { | |||
80 | // A constant that can be materialized using lis. | |||
81 | if ((Imm.getZExtValue() & 0xFFFF) == 0) | |||
82 | return TTI::TCC_Basic; | |||
83 | ||||
84 | return 2 * TTI::TCC_Basic; | |||
85 | } | |||
86 | } | |||
87 | ||||
88 | return 4 * TTI::TCC_Basic; | |||
89 | } | |||
90 | ||||
91 | int PPCTTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, | |||
92 | const APInt &Imm, Type *Ty) { | |||
93 | if (DisablePPCConstHoist) | |||
94 | return BaseT::getIntImmCostIntrin(IID, Idx, Imm, Ty); | |||
95 | ||||
96 | assert(Ty->isIntegerTy())((Ty->isIntegerTy()) ? static_cast<void> (0) : __assert_fail ("Ty->isIntegerTy()", "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp" , 96, __PRETTY_FUNCTION__)); | |||
97 | ||||
98 | unsigned BitSize = Ty->getPrimitiveSizeInBits(); | |||
99 | if (BitSize == 0) | |||
100 | return ~0U; | |||
101 | ||||
102 | switch (IID) { | |||
103 | default: | |||
104 | return TTI::TCC_Free; | |||
105 | case Intrinsic::sadd_with_overflow: | |||
106 | case Intrinsic::uadd_with_overflow: | |||
107 | case Intrinsic::ssub_with_overflow: | |||
108 | case Intrinsic::usub_with_overflow: | |||
109 | if ((Idx == 1) && Imm.getBitWidth() <= 64 && isInt<16>(Imm.getSExtValue())) | |||
110 | return TTI::TCC_Free; | |||
111 | break; | |||
112 | case Intrinsic::experimental_stackmap: | |||
113 | if ((Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue()))) | |||
114 | return TTI::TCC_Free; | |||
115 | break; | |||
116 | case Intrinsic::experimental_patchpoint_void: | |||
117 | case Intrinsic::experimental_patchpoint_i64: | |||
118 | if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue()))) | |||
119 | return TTI::TCC_Free; | |||
120 | break; | |||
121 | } | |||
122 | return PPCTTIImpl::getIntImmCost(Imm, Ty); | |||
123 | } | |||
124 | ||||
125 | int PPCTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, | |||
126 | const APInt &Imm, Type *Ty) { | |||
127 | if (DisablePPCConstHoist) | |||
128 | return BaseT::getIntImmCostInst(Opcode, Idx, Imm, Ty); | |||
129 | ||||
130 | assert(Ty->isIntegerTy())((Ty->isIntegerTy()) ? static_cast<void> (0) : __assert_fail ("Ty->isIntegerTy()", "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp" , 130, __PRETTY_FUNCTION__)); | |||
131 | ||||
132 | unsigned BitSize = Ty->getPrimitiveSizeInBits(); | |||
133 | if (BitSize == 0) | |||
134 | return ~0U; | |||
135 | ||||
136 | unsigned ImmIdx = ~0U; | |||
137 | bool ShiftedFree = false, RunFree = false, UnsignedFree = false, | |||
138 | ZeroFree = false; | |||
139 | switch (Opcode) { | |||
140 | default: | |||
141 | return TTI::TCC_Free; | |||
142 | case Instruction::GetElementPtr: | |||
143 | // Always hoist the base address of a GetElementPtr. This prevents the | |||
144 | // creation of new constants for every base constant that gets constant | |||
145 | // folded with the offset. | |||
146 | if (Idx == 0) | |||
147 | return 2 * TTI::TCC_Basic; | |||
148 | return TTI::TCC_Free; | |||
149 | case Instruction::And: | |||
150 | RunFree = true; // (for the rotate-and-mask instructions) | |||
151 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | |||
152 | case Instruction::Add: | |||
153 | case Instruction::Or: | |||
154 | case Instruction::Xor: | |||
155 | ShiftedFree = true; | |||
156 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | |||
157 | case Instruction::Sub: | |||
158 | case Instruction::Mul: | |||
159 | case Instruction::Shl: | |||
160 | case Instruction::LShr: | |||
161 | case Instruction::AShr: | |||
162 | ImmIdx = 1; | |||
163 | break; | |||
164 | case Instruction::ICmp: | |||
165 | UnsignedFree = true; | |||
166 | ImmIdx = 1; | |||
167 | // Zero comparisons can use record-form instructions. | |||
168 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | |||
169 | case Instruction::Select: | |||
170 | ZeroFree = true; | |||
171 | break; | |||
172 | case Instruction::PHI: | |||
173 | case Instruction::Call: | |||
174 | case Instruction::Ret: | |||
175 | case Instruction::Load: | |||
176 | case Instruction::Store: | |||
177 | break; | |||
178 | } | |||
179 | ||||
180 | if (ZeroFree && Imm == 0) | |||
181 | return TTI::TCC_Free; | |||
182 | ||||
183 | if (Idx == ImmIdx && Imm.getBitWidth() <= 64) { | |||
184 | if (isInt<16>(Imm.getSExtValue())) | |||
185 | return TTI::TCC_Free; | |||
186 | ||||
187 | if (RunFree) { | |||
188 | if (Imm.getBitWidth() <= 32 && | |||
189 | (isShiftedMask_32(Imm.getZExtValue()) || | |||
190 | isShiftedMask_32(~Imm.getZExtValue()))) | |||
191 | return TTI::TCC_Free; | |||
192 | ||||
193 | if (ST->isPPC64() && | |||
194 | (isShiftedMask_64(Imm.getZExtValue()) || | |||
195 | isShiftedMask_64(~Imm.getZExtValue()))) | |||
196 | return TTI::TCC_Free; | |||
197 | } | |||
198 | ||||
199 | if (UnsignedFree && isUInt<16>(Imm.getZExtValue())) | |||
200 | return TTI::TCC_Free; | |||
201 | ||||
202 | if (ShiftedFree && (Imm.getZExtValue() & 0xFFFF) == 0) | |||
203 | return TTI::TCC_Free; | |||
204 | } | |||
205 | ||||
206 | return PPCTTIImpl::getIntImmCost(Imm, Ty); | |||
207 | } | |||
208 | ||||
209 | unsigned PPCTTIImpl::getUserCost(const User *U, | |||
210 | ArrayRef<const Value *> Operands) { | |||
211 | if (U->getType()->isVectorTy()) { | |||
| ||||
212 | // Instructions that need to be split should cost more. | |||
213 | std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, U->getType()); | |||
214 | return LT.first * BaseT::getUserCost(U, Operands); | |||
215 | } | |||
216 | ||||
217 | return BaseT::getUserCost(U, Operands); | |||
218 | } | |||
219 | ||||
220 | bool PPCTTIImpl::mightUseCTR(BasicBlock *BB, | |||
221 | TargetLibraryInfo *LibInfo) { | |||
222 | const PPCTargetMachine &TM = ST->getTargetMachine(); | |||
223 | ||||
224 | // Loop through the inline asm constraints and look for something that | |||
225 | // clobbers ctr. | |||
226 | auto asmClobbersCTR = [](InlineAsm *IA) { | |||
227 | InlineAsm::ConstraintInfoVector CIV = IA->ParseConstraints(); | |||
228 | for (unsigned i = 0, ie = CIV.size(); i < ie; ++i) { | |||
229 | InlineAsm::ConstraintInfo &C = CIV[i]; | |||
230 | if (C.Type != InlineAsm::isInput) | |||
231 | for (unsigned j = 0, je = C.Codes.size(); j < je; ++j) | |||
232 | if (StringRef(C.Codes[j]).equals_lower("{ctr}")) | |||
233 | return true; | |||
234 | } | |||
235 | return false; | |||
236 | }; | |||
237 | ||||
238 | // Determining the address of a TLS variable results in a function call in | |||
239 | // certain TLS models. | |||
240 | std::function<bool(const Value*)> memAddrUsesCTR = | |||
241 | [&memAddrUsesCTR, &TM](const Value *MemAddr) -> bool { | |||
242 | const auto *GV = dyn_cast<GlobalValue>(MemAddr); | |||
243 | if (!GV) { | |||
244 | // Recurse to check for constants that refer to TLS global variables. | |||
245 | if (const auto *CV = dyn_cast<Constant>(MemAddr)) | |||
246 | for (const auto &CO : CV->operands()) | |||
247 | if (memAddrUsesCTR(CO)) | |||
248 | return true; | |||
249 | ||||
250 | return false; | |||
251 | } | |||
252 | ||||
253 | if (!GV->isThreadLocal()) | |||
254 | return false; | |||
255 | TLSModel::Model Model = TM.getTLSModel(GV); | |||
256 | return Model == TLSModel::GeneralDynamic || | |||
257 | Model == TLSModel::LocalDynamic; | |||
258 | }; | |||
259 | ||||
260 | auto isLargeIntegerTy = [](bool Is32Bit, Type *Ty) { | |||
261 | if (IntegerType *ITy = dyn_cast<IntegerType>(Ty)) | |||
262 | return ITy->getBitWidth() > (Is32Bit ? 32U : 64U); | |||
263 | ||||
264 | return false; | |||
265 | }; | |||
266 | ||||
267 | for (BasicBlock::iterator J = BB->begin(), JE = BB->end(); | |||
268 | J != JE; ++J) { | |||
269 | if (CallInst *CI = dyn_cast<CallInst>(J)) { | |||
270 | // Inline ASM is okay, unless it clobbers the ctr register. | |||
271 | if (InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue())) { | |||
272 | if (asmClobbersCTR(IA)) | |||
273 | return true; | |||
274 | continue; | |||
275 | } | |||
276 | ||||
277 | if (Function *F = CI->getCalledFunction()) { | |||
278 | // Most intrinsics don't become function calls, but some might. | |||
279 | // sin, cos, exp and log are always calls. | |||
280 | unsigned Opcode = 0; | |||
281 | if (F->getIntrinsicID() != Intrinsic::not_intrinsic) { | |||
282 | switch (F->getIntrinsicID()) { | |||
283 | default: continue; | |||
284 | // If we have a call to ppc_is_decremented_ctr_nonzero, or ppc_mtctr | |||
285 | // we're definitely using CTR. | |||
286 | case Intrinsic::set_loop_iterations: | |||
287 | case Intrinsic::loop_decrement: | |||
288 | return true; | |||
289 | ||||
290 | // Exclude eh_sjlj_setjmp; we don't need to exclude eh_sjlj_longjmp | |||
291 | // because, although it does clobber the counter register, the | |||
292 | // control can't then return to inside the loop unless there is also | |||
293 | // an eh_sjlj_setjmp. | |||
294 | case Intrinsic::eh_sjlj_setjmp: | |||
295 | ||||
296 | case Intrinsic::memcpy: | |||
297 | case Intrinsic::memmove: | |||
298 | case Intrinsic::memset: | |||
299 | case Intrinsic::powi: | |||
300 | case Intrinsic::log: | |||
301 | case Intrinsic::log2: | |||
302 | case Intrinsic::log10: | |||
303 | case Intrinsic::exp: | |||
304 | case Intrinsic::exp2: | |||
305 | case Intrinsic::pow: | |||
306 | case Intrinsic::sin: | |||
307 | case Intrinsic::cos: | |||
308 | return true; | |||
309 | case Intrinsic::copysign: | |||
310 | if (CI->getArgOperand(0)->getType()->getScalarType()-> | |||
311 | isPPC_FP128Ty()) | |||
312 | return true; | |||
313 | else | |||
314 | continue; // ISD::FCOPYSIGN is never a library call. | |||
315 | case Intrinsic::sqrt: Opcode = ISD::FSQRT; break; | |||
316 | case Intrinsic::floor: Opcode = ISD::FFLOOR; break; | |||
317 | case Intrinsic::ceil: Opcode = ISD::FCEIL; break; | |||
318 | case Intrinsic::trunc: Opcode = ISD::FTRUNC; break; | |||
319 | case Intrinsic::rint: Opcode = ISD::FRINT; break; | |||
320 | case Intrinsic::lrint: Opcode = ISD::LRINT; break; | |||
321 | case Intrinsic::llrint: Opcode = ISD::LLRINT; break; | |||
322 | case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break; | |||
323 | case Intrinsic::round: Opcode = ISD::FROUND; break; | |||
324 | case Intrinsic::lround: Opcode = ISD::LROUND; break; | |||
325 | case Intrinsic::llround: Opcode = ISD::LLROUND; break; | |||
326 | case Intrinsic::minnum: Opcode = ISD::FMINNUM; break; | |||
327 | case Intrinsic::maxnum: Opcode = ISD::FMAXNUM; break; | |||
328 | case Intrinsic::umul_with_overflow: Opcode = ISD::UMULO; break; | |||
329 | case Intrinsic::smul_with_overflow: Opcode = ISD::SMULO; break; | |||
330 | } | |||
331 | } | |||
332 | ||||
333 | // PowerPC does not use [US]DIVREM or other library calls for | |||
334 | // operations on regular types which are not otherwise library calls | |||
335 | // (i.e. soft float or atomics). If adapting for targets that do, | |||
336 | // additional care is required here. | |||
337 | ||||
338 | LibFunc Func; | |||
339 | if (!F->hasLocalLinkage() && F->hasName() && LibInfo && | |||
340 | LibInfo->getLibFunc(F->getName(), Func) && | |||
341 | LibInfo->hasOptimizedCodeGen(Func)) { | |||
342 | // Non-read-only functions are never treated as intrinsics. | |||
343 | if (!CI->onlyReadsMemory()) | |||
344 | return true; | |||
345 | ||||
346 | // Conversion happens only for FP calls. | |||
347 | if (!CI->getArgOperand(0)->getType()->isFloatingPointTy()) | |||
348 | return true; | |||
349 | ||||
350 | switch (Func) { | |||
351 | default: return true; | |||
352 | case LibFunc_copysign: | |||
353 | case LibFunc_copysignf: | |||
354 | continue; // ISD::FCOPYSIGN is never a library call. | |||
355 | case LibFunc_copysignl: | |||
356 | return true; | |||
357 | case LibFunc_fabs: | |||
358 | case LibFunc_fabsf: | |||
359 | case LibFunc_fabsl: | |||
360 | continue; // ISD::FABS is never a library call. | |||
361 | case LibFunc_sqrt: | |||
362 | case LibFunc_sqrtf: | |||
363 | case LibFunc_sqrtl: | |||
364 | Opcode = ISD::FSQRT; break; | |||
365 | case LibFunc_floor: | |||
366 | case LibFunc_floorf: | |||
367 | case LibFunc_floorl: | |||
368 | Opcode = ISD::FFLOOR; break; | |||
369 | case LibFunc_nearbyint: | |||
370 | case LibFunc_nearbyintf: | |||
371 | case LibFunc_nearbyintl: | |||
372 | Opcode = ISD::FNEARBYINT; break; | |||
373 | case LibFunc_ceil: | |||
374 | case LibFunc_ceilf: | |||
375 | case LibFunc_ceill: | |||
376 | Opcode = ISD::FCEIL; break; | |||
377 | case LibFunc_rint: | |||
378 | case LibFunc_rintf: | |||
379 | case LibFunc_rintl: | |||
380 | Opcode = ISD::FRINT; break; | |||
381 | case LibFunc_round: | |||
382 | case LibFunc_roundf: | |||
383 | case LibFunc_roundl: | |||
384 | Opcode = ISD::FROUND; break; | |||
385 | case LibFunc_trunc: | |||
386 | case LibFunc_truncf: | |||
387 | case LibFunc_truncl: | |||
388 | Opcode = ISD::FTRUNC; break; | |||
389 | case LibFunc_fmin: | |||
390 | case LibFunc_fminf: | |||
391 | case LibFunc_fminl: | |||
392 | Opcode = ISD::FMINNUM; break; | |||
393 | case LibFunc_fmax: | |||
394 | case LibFunc_fmaxf: | |||
395 | case LibFunc_fmaxl: | |||
396 | Opcode = ISD::FMAXNUM; break; | |||
397 | } | |||
398 | } | |||
399 | ||||
400 | if (Opcode) { | |||
401 | EVT EVTy = | |||
402 | TLI->getValueType(DL, CI->getArgOperand(0)->getType(), true); | |||
403 | ||||
404 | if (EVTy == MVT::Other) | |||
405 | return true; | |||
406 | ||||
407 | if (TLI->isOperationLegalOrCustom(Opcode, EVTy)) | |||
408 | continue; | |||
409 | else if (EVTy.isVector() && | |||
410 | TLI->isOperationLegalOrCustom(Opcode, EVTy.getScalarType())) | |||
411 | continue; | |||
412 | ||||
413 | return true; | |||
414 | } | |||
415 | } | |||
416 | ||||
417 | return true; | |||
418 | } else if (isa<BinaryOperator>(J) && | |||
419 | J->getType()->getScalarType()->isPPC_FP128Ty()) { | |||
420 | // Most operations on ppc_f128 values become calls. | |||
421 | return true; | |||
422 | } else if (isa<UIToFPInst>(J) || isa<SIToFPInst>(J) || | |||
423 | isa<FPToUIInst>(J) || isa<FPToSIInst>(J)) { | |||
424 | CastInst *CI = cast<CastInst>(J); | |||
425 | if (CI->getSrcTy()->getScalarType()->isPPC_FP128Ty() || | |||
426 | CI->getDestTy()->getScalarType()->isPPC_FP128Ty() || | |||
427 | isLargeIntegerTy(!TM.isPPC64(), CI->getSrcTy()->getScalarType()) || | |||
428 | isLargeIntegerTy(!TM.isPPC64(), CI->getDestTy()->getScalarType())) | |||
429 | return true; | |||
430 | } else if (isLargeIntegerTy(!TM.isPPC64(), | |||
431 | J->getType()->getScalarType()) && | |||
432 | (J->getOpcode() == Instruction::UDiv || | |||
433 | J->getOpcode() == Instruction::SDiv || | |||
434 | J->getOpcode() == Instruction::URem || | |||
435 | J->getOpcode() == Instruction::SRem)) { | |||
436 | return true; | |||
437 | } else if (!TM.isPPC64() && | |||
438 | isLargeIntegerTy(false, J->getType()->getScalarType()) && | |||
439 | (J->getOpcode() == Instruction::Shl || | |||
440 | J->getOpcode() == Instruction::AShr || | |||
441 | J->getOpcode() == Instruction::LShr)) { | |||
442 | // Only on PPC32, for 128-bit integers (specifically not 64-bit | |||
443 | // integers), these might be runtime calls. | |||
444 | return true; | |||
445 | } else if (isa<IndirectBrInst>(J) || isa<InvokeInst>(J)) { | |||
446 | // On PowerPC, indirect jumps use the counter register. | |||
447 | return true; | |||
448 | } else if (SwitchInst *SI = dyn_cast<SwitchInst>(J)) { | |||
449 | if (SI->getNumCases() + 1 >= (unsigned)TLI->getMinimumJumpTableEntries()) | |||
450 | return true; | |||
451 | } | |||
452 | ||||
453 | // FREM is always a call. | |||
454 | if (J->getOpcode() == Instruction::FRem) | |||
455 | return true; | |||
456 | ||||
457 | if (ST->useSoftFloat()) { | |||
458 | switch(J->getOpcode()) { | |||
459 | case Instruction::FAdd: | |||
460 | case Instruction::FSub: | |||
461 | case Instruction::FMul: | |||
462 | case Instruction::FDiv: | |||
463 | case Instruction::FPTrunc: | |||
464 | case Instruction::FPExt: | |||
465 | case Instruction::FPToUI: | |||
466 | case Instruction::FPToSI: | |||
467 | case Instruction::UIToFP: | |||
468 | case Instruction::SIToFP: | |||
469 | case Instruction::FCmp: | |||
470 | return true; | |||
471 | } | |||
472 | } | |||
473 | ||||
474 | for (Value *Operand : J->operands()) | |||
475 | if (memAddrUsesCTR(Operand)) | |||
476 | return true; | |||
477 | } | |||
478 | ||||
479 | return false; | |||
480 | } | |||
481 | ||||
482 | bool PPCTTIImpl::isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, | |||
483 | AssumptionCache &AC, | |||
484 | TargetLibraryInfo *LibInfo, | |||
485 | HardwareLoopInfo &HWLoopInfo) { | |||
486 | const PPCTargetMachine &TM = ST->getTargetMachine(); | |||
487 | TargetSchedModel SchedModel; | |||
488 | SchedModel.init(ST); | |||
489 | ||||
490 | // Do not convert small short loops to CTR loop. | |||
491 | unsigned ConstTripCount = SE.getSmallConstantTripCount(L); | |||
492 | if (ConstTripCount && ConstTripCount < SmallCTRLoopThreshold) { | |||
493 | SmallPtrSet<const Value *, 32> EphValues; | |||
494 | CodeMetrics::collectEphemeralValues(L, &AC, EphValues); | |||
495 | CodeMetrics Metrics; | |||
496 | for (BasicBlock *BB : L->blocks()) | |||
497 | Metrics.analyzeBasicBlock(BB, *this, EphValues); | |||
498 | // 6 is an approximate latency for the mtctr instruction. | |||
499 | if (Metrics.NumInsts <= (6 * SchedModel.getIssueWidth())) | |||
500 | return false; | |||
501 | } | |||
502 | ||||
503 | // We don't want to spill/restore the counter register, and so we don't | |||
504 | // want to use the counter register if the loop contains calls. | |||
505 | for (Loop::block_iterator I = L->block_begin(), IE = L->block_end(); | |||
506 | I != IE; ++I) | |||
507 | if (mightUseCTR(*I, LibInfo)) | |||
508 | return false; | |||
509 | ||||
510 | SmallVector<BasicBlock*, 4> ExitingBlocks; | |||
511 | L->getExitingBlocks(ExitingBlocks); | |||
512 | ||||
513 | // If there is an exit edge known to be frequently taken, | |||
514 | // we should not transform this loop. | |||
515 | for (auto &BB : ExitingBlocks) { | |||
516 | Instruction *TI = BB->getTerminator(); | |||
517 | if (!TI) continue; | |||
518 | ||||
519 | if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { | |||
520 | uint64_t TrueWeight = 0, FalseWeight = 0; | |||
521 | if (!BI->isConditional() || | |||
522 | !BI->extractProfMetadata(TrueWeight, FalseWeight)) | |||
523 | continue; | |||
524 | ||||
525 | // If the exit path is more frequent than the loop path, | |||
526 | // we return here without further analysis for this loop. | |||
527 | bool TrueIsExit = !L->contains(BI->getSuccessor(0)); | |||
528 | if (( TrueIsExit && FalseWeight < TrueWeight) || | |||
529 | (!TrueIsExit && FalseWeight > TrueWeight)) | |||
530 | return false; | |||
531 | } | |||
532 | } | |||
533 | ||||
534 | LLVMContext &C = L->getHeader()->getContext(); | |||
535 | HWLoopInfo.CountType = TM.isPPC64() ? | |||
536 | Type::getInt64Ty(C) : Type::getInt32Ty(C); | |||
537 | HWLoopInfo.LoopDecrement = ConstantInt::get(HWLoopInfo.CountType, 1); | |||
538 | return true; | |||
539 | } | |||
540 | ||||
541 | void PPCTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, | |||
542 | TTI::UnrollingPreferences &UP) { | |||
543 | if (ST->getCPUDirective() == PPC::DIR_A2) { | |||
544 | // The A2 is in-order with a deep pipeline, and concatenation unrolling | |||
545 | // helps expose latency-hiding opportunities to the instruction scheduler. | |||
546 | UP.Partial = UP.Runtime = true; | |||
547 | ||||
548 | // We unroll a lot on the A2 (hundreds of instructions), and the benefits | |||
549 | // often outweigh the cost of a division to compute the trip count. | |||
550 | UP.AllowExpensiveTripCount = true; | |||
551 | } | |||
552 | ||||
553 | BaseT::getUnrollingPreferences(L, SE, UP); | |||
554 | } | |||
555 | ||||
556 | // This function returns true to allow using coldcc calling convention. | |||
557 | // Returning true results in coldcc being used for functions which are cold at | |||
558 | // all call sites when the callers of the functions are not calling any other | |||
559 | // non coldcc functions. | |||
560 | bool PPCTTIImpl::useColdCCForColdCall(Function &F) { | |||
561 | return EnablePPCColdCC; | |||
562 | } | |||
563 | ||||
564 | bool PPCTTIImpl::enableAggressiveInterleaving(bool LoopHasReductions) { | |||
565 | // On the A2, always unroll aggressively. For QPX unaligned loads, we depend | |||
566 | // on combining the loads generated for consecutive accesses, and failure to | |||
567 | // do so is particularly expensive. This makes it much more likely (compared | |||
568 | // to only using concatenation unrolling). | |||
569 | if (ST->getCPUDirective() == PPC::DIR_A2) | |||
570 | return true; | |||
571 | ||||
572 | return LoopHasReductions; | |||
573 | } | |||
574 | ||||
575 | PPCTTIImpl::TTI::MemCmpExpansionOptions | |||
576 | PPCTTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const { | |||
577 | TTI::MemCmpExpansionOptions Options; | |||
578 | Options.LoadSizes = {8, 4, 2, 1}; | |||
579 | Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize); | |||
580 | return Options; | |||
581 | } | |||
582 | ||||
583 | bool PPCTTIImpl::enableInterleavedAccessVectorization() { | |||
584 | return true; | |||
585 | } | |||
586 | ||||
587 | unsigned PPCTTIImpl::getNumberOfRegisters(unsigned ClassID) const { | |||
588 | assert(ClassID == GPRRC || ClassID == FPRRC ||((ClassID == GPRRC || ClassID == FPRRC || ClassID == VRRC || ClassID == VSXRC) ? static_cast<void> (0) : __assert_fail ("ClassID == GPRRC || ClassID == FPRRC || ClassID == VRRC || ClassID == VSXRC" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp" , 589, __PRETTY_FUNCTION__)) | |||
589 | ClassID == VRRC || ClassID == VSXRC)((ClassID == GPRRC || ClassID == FPRRC || ClassID == VRRC || ClassID == VSXRC) ? static_cast<void> (0) : __assert_fail ("ClassID == GPRRC || ClassID == FPRRC || ClassID == VRRC || ClassID == VSXRC" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp" , 589, __PRETTY_FUNCTION__)); | |||
590 | if (ST->hasVSX()) { | |||
591 | assert(ClassID == GPRRC || ClassID == VSXRC || ClassID == VRRC)((ClassID == GPRRC || ClassID == VSXRC || ClassID == VRRC) ? static_cast <void> (0) : __assert_fail ("ClassID == GPRRC || ClassID == VSXRC || ClassID == VRRC" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp" , 591, __PRETTY_FUNCTION__)); | |||
592 | return ClassID == VSXRC ? 64 : 32; | |||
593 | } | |||
594 | assert(ClassID == GPRRC || ClassID == FPRRC || ClassID == VRRC)((ClassID == GPRRC || ClassID == FPRRC || ClassID == VRRC) ? static_cast <void> (0) : __assert_fail ("ClassID == GPRRC || ClassID == FPRRC || ClassID == VRRC" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp" , 594, __PRETTY_FUNCTION__)); | |||
595 | return 32; | |||
596 | } | |||
597 | ||||
598 | unsigned PPCTTIImpl::getRegisterClassForType(bool Vector, Type *Ty) const { | |||
599 | if (Vector) | |||
600 | return ST->hasVSX() ? VSXRC : VRRC; | |||
601 | else if (Ty && (Ty->getScalarType()->isFloatTy() || | |||
602 | Ty->getScalarType()->isDoubleTy())) | |||
603 | return ST->hasVSX() ? VSXRC : FPRRC; | |||
604 | else if (Ty && (Ty->getScalarType()->isFP128Ty() || | |||
605 | Ty->getScalarType()->isPPC_FP128Ty())) | |||
606 | return VRRC; | |||
607 | else if (Ty && Ty->getScalarType()->isHalfTy()) | |||
608 | return VSXRC; | |||
609 | else | |||
610 | return GPRRC; | |||
611 | } | |||
612 | ||||
613 | const char* PPCTTIImpl::getRegisterClassName(unsigned ClassID) const { | |||
614 | ||||
615 | switch (ClassID) { | |||
616 | default: | |||
617 | llvm_unreachable("unknown register class")::llvm::llvm_unreachable_internal("unknown register class", "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp" , 617); | |||
618 | return "PPC::unknown register class"; | |||
619 | case GPRRC: return "PPC::GPRRC"; | |||
620 | case FPRRC: return "PPC::FPRRC"; | |||
621 | case VRRC: return "PPC::VRRC"; | |||
622 | case VSXRC: return "PPC::VSXRC"; | |||
623 | } | |||
624 | } | |||
625 | ||||
626 | unsigned PPCTTIImpl::getRegisterBitWidth(bool Vector) const { | |||
627 | if (Vector) { | |||
628 | if (ST->hasQPX()) return 256; | |||
629 | if (ST->hasAltivec()) return 128; | |||
630 | return 0; | |||
631 | } | |||
632 | ||||
633 | if (ST->isPPC64()) | |||
634 | return 64; | |||
635 | return 32; | |||
636 | ||||
637 | } | |||
638 | ||||
639 | unsigned PPCTTIImpl::getCacheLineSize() const { | |||
640 | // Check first if the user specified a custom line size. | |||
641 | if (CacheLineSize.getNumOccurrences() > 0) | |||
642 | return CacheLineSize; | |||
643 | ||||
644 | // On P7, P8 or P9 we have a cache line size of 128. | |||
645 | unsigned Directive = ST->getCPUDirective(); | |||
646 | // Assume that Future CPU has the same cache line size as the others. | |||
647 | if (Directive == PPC::DIR_PWR7 || Directive == PPC::DIR_PWR8 || | |||
648 | Directive == PPC::DIR_PWR9 || Directive == PPC::DIR_PWR_FUTURE) | |||
649 | return 128; | |||
650 | ||||
651 | // On other processors return a default of 64 bytes. | |||
652 | return 64; | |||
653 | } | |||
654 | ||||
655 | unsigned PPCTTIImpl::getPrefetchDistance() const { | |||
656 | // This seems like a reasonable default for the BG/Q (this pass is enabled, by | |||
657 | // default, only on the BG/Q). | |||
658 | return 300; | |||
659 | } | |||
660 | ||||
661 | unsigned PPCTTIImpl::getMaxInterleaveFactor(unsigned VF) { | |||
662 | unsigned Directive = ST->getCPUDirective(); | |||
663 | // The 440 has no SIMD support, but floating-point instructions | |||
664 | // have a 5-cycle latency, so unroll by 5x for latency hiding. | |||
665 | if (Directive == PPC::DIR_440) | |||
666 | return 5; | |||
667 | ||||
668 | // The A2 has no SIMD support, but floating-point instructions | |||
669 | // have a 6-cycle latency, so unroll by 6x for latency hiding. | |||
670 | if (Directive == PPC::DIR_A2) | |||
671 | return 6; | |||
672 | ||||
673 | // FIXME: For lack of any better information, do no harm... | |||
674 | if (Directive == PPC::DIR_E500mc || Directive == PPC::DIR_E5500) | |||
675 | return 1; | |||
676 | ||||
677 | // For P7 and P8, floating-point instructions have a 6-cycle latency and | |||
678 | // there are two execution units, so unroll by 12x for latency hiding. | |||
679 | // FIXME: the same for P9 as previous gen until POWER9 scheduling is ready | |||
680 | // Assume that future is the same as the others. | |||
681 | if (Directive == PPC::DIR_PWR7 || Directive == PPC::DIR_PWR8 || | |||
682 | Directive == PPC::DIR_PWR9 || Directive == PPC::DIR_PWR_FUTURE) | |||
683 | return 12; | |||
684 | ||||
685 | // For most things, modern systems have two execution units (and | |||
686 | // out-of-order execution). | |||
687 | return 2; | |||
688 | } | |||
689 | ||||
690 | // Adjust the cost of vector instructions on targets which there is overlap | |||
691 | // between the vector and scalar units, thereby reducing the overall throughput | |||
692 | // of vector code wrt. scalar code. | |||
693 | int PPCTTIImpl::vectorCostAdjustment(int Cost, unsigned Opcode, Type *Ty1, | |||
694 | Type *Ty2) { | |||
695 | if (!ST->vectorsUseTwoUnits() || !Ty1->isVectorTy()) | |||
696 | return Cost; | |||
697 | ||||
698 | std::pair<int, MVT> LT1 = TLI->getTypeLegalizationCost(DL, Ty1); | |||
699 | // If type legalization involves splitting the vector, we don't want to | |||
700 | // double the cost at every step - only the last step. | |||
701 | if (LT1.first != 1 || !LT1.second.isVector()) | |||
702 | return Cost; | |||
703 | ||||
704 | int ISD = TLI->InstructionOpcodeToISD(Opcode); | |||
705 | if (TLI->isOperationExpand(ISD, LT1.second)) | |||
706 | return Cost; | |||
707 | ||||
708 | if (Ty2) { | |||
709 | std::pair<int, MVT> LT2 = TLI->getTypeLegalizationCost(DL, Ty2); | |||
710 | if (LT2.first != 1 || !LT2.second.isVector()) | |||
711 | return Cost; | |||
712 | } | |||
713 | ||||
714 | return Cost * 2; | |||
715 | } | |||
716 | ||||
717 | int PPCTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty, | |||
718 | TTI::OperandValueKind Op1Info, | |||
719 | TTI::OperandValueKind Op2Info, | |||
720 | TTI::OperandValueProperties Opd1PropInfo, | |||
721 | TTI::OperandValueProperties Opd2PropInfo, | |||
722 | ArrayRef<const Value *> Args, | |||
723 | const Instruction *CxtI) { | |||
724 | assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode")((TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode" ) ? static_cast<void> (0) : __assert_fail ("TLI->InstructionOpcodeToISD(Opcode) && \"Invalid opcode\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp" , 724, __PRETTY_FUNCTION__)); | |||
725 | ||||
726 | // Fallback to the default implementation. | |||
727 | int Cost = BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info, | |||
728 | Opd1PropInfo, Opd2PropInfo); | |||
729 | return vectorCostAdjustment(Cost, Opcode, Ty, nullptr); | |||
730 | } | |||
731 | ||||
732 | int PPCTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, | |||
733 | Type *SubTp) { | |||
734 | // Legalize the type. | |||
735 | std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp); | |||
736 | ||||
737 | // PPC, for both Altivec/VSX and QPX, support cheap arbitrary permutations | |||
738 | // (at least in the sense that there need only be one non-loop-invariant | |||
739 | // instruction). We need one such shuffle instruction for each actual | |||
740 | // register (this is not true for arbitrary shuffles, but is true for the | |||
741 | // structured types of shuffles covered by TTI::ShuffleKind). | |||
742 | return vectorCostAdjustment(LT.first, Instruction::ShuffleVector, Tp, | |||
743 | nullptr); | |||
744 | } | |||
745 | ||||
746 | int PPCTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, | |||
747 | const Instruction *I) { | |||
748 | assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode")((TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode" ) ? static_cast<void> (0) : __assert_fail ("TLI->InstructionOpcodeToISD(Opcode) && \"Invalid opcode\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp" , 748, __PRETTY_FUNCTION__)); | |||
749 | ||||
750 | int Cost = BaseT::getCastInstrCost(Opcode, Dst, Src); | |||
751 | return vectorCostAdjustment(Cost, Opcode, Dst, Src); | |||
752 | } | |||
753 | ||||
754 | int PPCTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, | |||
755 | const Instruction *I) { | |||
756 | int Cost = BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I); | |||
757 | return vectorCostAdjustment(Cost, Opcode, ValTy, nullptr); | |||
758 | } | |||
759 | ||||
760 | int PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) { | |||
761 | assert(Val->isVectorTy() && "This must be a vector type")((Val->isVectorTy() && "This must be a vector type" ) ? static_cast<void> (0) : __assert_fail ("Val->isVectorTy() && \"This must be a vector type\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp" , 761, __PRETTY_FUNCTION__)); | |||
762 | ||||
763 | int ISD = TLI->InstructionOpcodeToISD(Opcode); | |||
764 | assert(ISD && "Invalid opcode")((ISD && "Invalid opcode") ? static_cast<void> ( 0) : __assert_fail ("ISD && \"Invalid opcode\"", "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp" , 764, __PRETTY_FUNCTION__)); | |||
765 | ||||
766 | int Cost = BaseT::getVectorInstrCost(Opcode, Val, Index); | |||
767 | Cost = vectorCostAdjustment(Cost, Opcode, Val, nullptr); | |||
768 | ||||
769 | if (ST->hasVSX() && Val->getScalarType()->isDoubleTy()) { | |||
770 | // Double-precision scalars are already located in index #0 (or #1 if LE). | |||
771 | if (ISD == ISD::EXTRACT_VECTOR_ELT && | |||
772 | Index == (ST->isLittleEndian() ? 1 : 0)) | |||
773 | return 0; | |||
774 | ||||
775 | return Cost; | |||
776 | ||||
777 | } else if (ST->hasQPX() && Val->getScalarType()->isFloatingPointTy()) { | |||
778 | // Floating point scalars are already located in index #0. | |||
779 | if (Index == 0) | |||
780 | return 0; | |||
781 | ||||
782 | return Cost; | |||
783 | ||||
784 | } else if (Val->getScalarType()->isIntegerTy() && Index != -1U) { | |||
785 | if (ST->hasP9Altivec()) { | |||
786 | if (ISD == ISD::INSERT_VECTOR_ELT) | |||
787 | // A move-to VSR and a permute/insert. Assume vector operation cost | |||
788 | // for both (cost will be 2x on P9). | |||
789 | return vectorCostAdjustment(2, Opcode, Val, nullptr); | |||
790 | ||||
791 | // It's an extract. Maybe we can do a cheap move-from VSR. | |||
792 | unsigned EltSize = Val->getScalarSizeInBits(); | |||
793 | if (EltSize == 64) { | |||
794 | unsigned MfvsrdIndex = ST->isLittleEndian() ? 1 : 0; | |||
795 | if (Index == MfvsrdIndex) | |||
796 | return 1; | |||
797 | } else if (EltSize == 32) { | |||
798 | unsigned MfvsrwzIndex = ST->isLittleEndian() ? 2 : 1; | |||
799 | if (Index == MfvsrwzIndex) | |||
800 | return 1; | |||
801 | } | |||
802 | ||||
803 | // We need a vector extract (or mfvsrld). Assume vector operation cost. | |||
804 | // The cost of the load constant for a vector extract is disregarded | |||
805 | // (invariant, easily schedulable). | |||
806 | return vectorCostAdjustment(1, Opcode, Val, nullptr); | |||
807 | ||||
808 | } else if (ST->hasDirectMove()) | |||
809 | // Assume permute has standard cost. | |||
810 | // Assume move-to/move-from VSR have 2x standard cost. | |||
811 | return 3; | |||
812 | } | |||
813 | ||||
814 | // Estimated cost of a load-hit-store delay. This was obtained | |||
815 | // experimentally as a minimum needed to prevent unprofitable | |||
816 | // vectorization for the paq8p benchmark. It may need to be | |||
817 | // raised further if other unprofitable cases remain. | |||
818 | unsigned LHSPenalty = 2; | |||
819 | if (ISD == ISD::INSERT_VECTOR_ELT) | |||
820 | LHSPenalty += 7; | |||
821 | ||||
822 | // Vector element insert/extract with Altivec is very expensive, | |||
823 | // because they require store and reload with the attendant | |||
824 | // processor stall for load-hit-store. Until VSX is available, | |||
825 | // these need to be estimated as very costly. | |||
826 | if (ISD == ISD::EXTRACT_VECTOR_ELT || | |||
827 | ISD == ISD::INSERT_VECTOR_ELT) | |||
828 | return LHSPenalty + Cost; | |||
829 | ||||
830 | return Cost; | |||
831 | } | |||
832 | ||||
833 | int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, | |||
834 | MaybeAlign Alignment, unsigned AddressSpace, | |||
835 | const Instruction *I) { | |||
836 | // Legalize the type. | |||
837 | std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src); | |||
838 | assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&(((Opcode == Instruction::Load || Opcode == Instruction::Store ) && "Invalid Opcode") ? static_cast<void> (0) : __assert_fail ("(Opcode == Instruction::Load || Opcode == Instruction::Store) && \"Invalid Opcode\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp" , 839, __PRETTY_FUNCTION__)) | |||
839 | "Invalid Opcode")(((Opcode == Instruction::Load || Opcode == Instruction::Store ) && "Invalid Opcode") ? static_cast<void> (0) : __assert_fail ("(Opcode == Instruction::Load || Opcode == Instruction::Store) && \"Invalid Opcode\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp" , 839, __PRETTY_FUNCTION__)); | |||
840 | ||||
841 | int Cost = BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace); | |||
842 | Cost = vectorCostAdjustment(Cost, Opcode, Src, nullptr); | |||
843 | ||||
844 | bool IsAltivecType = ST->hasAltivec() && | |||
845 | (LT.second == MVT::v16i8 || LT.second == MVT::v8i16 || | |||
846 | LT.second == MVT::v4i32 || LT.second == MVT::v4f32); | |||
847 | bool IsVSXType = ST->hasVSX() && | |||
848 | (LT.second == MVT::v2f64 || LT.second == MVT::v2i64); | |||
849 | bool IsQPXType = ST->hasQPX() && | |||
850 | (LT.second == MVT::v4f64 || LT.second == MVT::v4f32); | |||
851 | ||||
852 | // VSX has 32b/64b load instructions. Legalization can handle loading of | |||
853 | // 32b/64b to VSR correctly and cheaply. But BaseT::getMemoryOpCost and | |||
854 | // PPCTargetLowering can't compute the cost appropriately. So here we | |||
855 | // explicitly check this case. | |||
856 | unsigned MemBytes = Src->getPrimitiveSizeInBits(); | |||
857 | if (Opcode == Instruction::Load && ST->hasVSX() && IsAltivecType && | |||
858 | (MemBytes == 64 || (ST->hasP8Vector() && MemBytes == 32))) | |||
859 | return 1; | |||
860 | ||||
861 | // Aligned loads and stores are easy. | |||
862 | unsigned SrcBytes = LT.second.getStoreSize(); | |||
863 | if (!SrcBytes || !Alignment || Alignment >= SrcBytes) | |||
864 | return Cost; | |||
865 | ||||
866 | // If we can use the permutation-based load sequence, then this is also | |||
867 | // relatively cheap (not counting loop-invariant instructions): one load plus | |||
868 | // one permute (the last load in a series has extra cost, but we're | |||
869 | // neglecting that here). Note that on the P7, we could do unaligned loads | |||
870 | // for Altivec types using the VSX instructions, but that's more expensive | |||
871 | // than using the permutation-based load sequence. On the P8, that's no | |||
872 | // longer true. | |||
873 | if (Opcode == Instruction::Load && | |||
874 | ((!ST->hasP8Vector() && IsAltivecType) || IsQPXType) && | |||
875 | Alignment >= LT.second.getScalarType().getStoreSize()) | |||
876 | return Cost + LT.first; // Add the cost of the permutations. | |||
877 | ||||
878 | // For VSX, we can do unaligned loads and stores on Altivec/VSX types. On the | |||
879 | // P7, unaligned vector loads are more expensive than the permutation-based | |||
880 | // load sequence, so that might be used instead, but regardless, the net cost | |||
881 | // is about the same (not counting loop-invariant instructions). | |||
882 | if (IsVSXType || (ST->hasVSX() && IsAltivecType)) | |||
883 | return Cost; | |||
884 | ||||
885 | // Newer PPC supports unaligned memory access. | |||
886 | if (TLI->allowsMisalignedMemoryAccesses(LT.second, 0)) | |||
887 | return Cost; | |||
888 | ||||
889 | // PPC in general does not support unaligned loads and stores. They'll need | |||
890 | // to be decomposed based on the alignment factor. | |||
891 | ||||
892 | // Add the cost of each scalar load or store. | |||
893 | assert(Alignment)((Alignment) ? static_cast<void> (0) : __assert_fail ("Alignment" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp" , 893, __PRETTY_FUNCTION__)); | |||
894 | Cost += LT.first * ((SrcBytes / Alignment->value()) - 1); | |||
895 | ||||
896 | // For a vector type, there is also scalarization overhead (only for | |||
897 | // stores, loads are expanded using the vector-load + permutation sequence, | |||
898 | // which is much less expensive). | |||
899 | if (Src->isVectorTy() && Opcode == Instruction::Store) | |||
900 | for (int i = 0, e = Src->getVectorNumElements(); i < e; ++i) | |||
901 | Cost += getVectorInstrCost(Instruction::ExtractElement, Src, i); | |||
902 | ||||
903 | return Cost; | |||
904 | } | |||
905 | ||||
906 | int PPCTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, | |||
907 | unsigned Factor, | |||
908 | ArrayRef<unsigned> Indices, | |||
909 | unsigned Alignment, | |||
910 | unsigned AddressSpace, | |||
911 | bool UseMaskForCond, | |||
912 | bool UseMaskForGaps) { | |||
913 | if (UseMaskForCond || UseMaskForGaps) | |||
914 | return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, | |||
915 | Alignment, AddressSpace, | |||
916 | UseMaskForCond, UseMaskForGaps); | |||
917 | ||||
918 | assert(isa<VectorType>(VecTy) &&((isa<VectorType>(VecTy) && "Expect a vector type for interleaved memory op" ) ? static_cast<void> (0) : __assert_fail ("isa<VectorType>(VecTy) && \"Expect a vector type for interleaved memory op\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp" , 919, __PRETTY_FUNCTION__)) | |||
919 | "Expect a vector type for interleaved memory op")((isa<VectorType>(VecTy) && "Expect a vector type for interleaved memory op" ) ? static_cast<void> (0) : __assert_fail ("isa<VectorType>(VecTy) && \"Expect a vector type for interleaved memory op\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp" , 919, __PRETTY_FUNCTION__)); | |||
920 | ||||
921 | // Legalize the type. | |||
922 | std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, VecTy); | |||
923 | ||||
924 | // Firstly, the cost of load/store operation. | |||
925 | int Cost = | |||
926 | getMemoryOpCost(Opcode, VecTy, MaybeAlign(Alignment), AddressSpace); | |||
927 | ||||
928 | // PPC, for both Altivec/VSX and QPX, support cheap arbitrary permutations | |||
929 | // (at least in the sense that there need only be one non-loop-invariant | |||
930 | // instruction). For each result vector, we need one shuffle per incoming | |||
931 | // vector (except that the first shuffle can take two incoming vectors | |||
932 | // because it does not need to take itself). | |||
933 | Cost += Factor*(LT.first-1); | |||
934 | ||||
935 | return Cost; | |||
936 | } | |||
937 | ||||
938 | unsigned PPCTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, | |||
939 | ArrayRef<Value*> Args, FastMathFlags FMF, unsigned VF) { | |||
940 | return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF); | |||
941 | } | |||
942 | ||||
943 | unsigned PPCTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, | |||
944 | ArrayRef<Type*> Tys, FastMathFlags FMF, | |||
945 | unsigned ScalarizationCostPassed) { | |||
946 | if (ID == Intrinsic::bswap && ST->hasP9Vector()) | |||
947 | return TLI->getTypeLegalizationCost(DL, RetTy).first; | |||
948 | return BaseT::getIntrinsicInstrCost(ID, RetTy, Tys, FMF, | |||
949 | ScalarizationCostPassed); | |||
950 | } | |||
951 | ||||
952 | bool PPCTTIImpl::canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, | |||
953 | LoopInfo *LI, DominatorTree *DT, | |||
954 | AssumptionCache *AC, TargetLibraryInfo *LibInfo) { | |||
955 | // Process nested loops first. | |||
956 | for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) | |||
957 | if (canSaveCmp(*I, BI, SE, LI, DT, AC, LibInfo)) | |||
958 | return false; // Stop search. | |||
959 | ||||
960 | HardwareLoopInfo HWLoopInfo(L); | |||
961 | ||||
962 | if (!HWLoopInfo.canAnalyze(*LI)) | |||
963 | return false; | |||
964 | ||||
965 | if (!isHardwareLoopProfitable(L, *SE, *AC, LibInfo, HWLoopInfo)) | |||
966 | return false; | |||
967 | ||||
968 | if (!HWLoopInfo.isHardwareLoopCandidate(*SE, *LI, *DT)) | |||
969 | return false; | |||
970 | ||||
971 | *BI = HWLoopInfo.ExitBranch; | |||
972 | return true; | |||
973 | } | |||
974 | ||||
975 | bool PPCTTIImpl::isLSRCostLess(TargetTransformInfo::LSRCost &C1, | |||
976 | TargetTransformInfo::LSRCost &C2) { | |||
977 | // PowerPC default behaviour here is "instruction number 1st priority". | |||
978 | // If LsrNoInsnsCost is set, call default implementation. | |||
979 | if (!LsrNoInsnsCost) | |||
980 | return std::tie(C1.Insns, C1.NumRegs, C1.AddRecCost, C1.NumIVMuls, | |||
981 | C1.NumBaseAdds, C1.ScaleCost, C1.ImmCost, C1.SetupCost) < | |||
982 | std::tie(C2.Insns, C2.NumRegs, C2.AddRecCost, C2.NumIVMuls, | |||
983 | C2.NumBaseAdds, C2.ScaleCost, C2.ImmCost, C2.SetupCost); | |||
984 | else | |||
985 | return TargetTransformInfoImplBase::isLSRCostLess(C1, C2); | |||
986 | } |
1 | //===- TargetTransformInfoImpl.h --------------------------------*- C++ -*-===// | ||||||||||||||
2 | // | ||||||||||||||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||||||||||||||
4 | // See https://llvm.org/LICENSE.txt for license information. | ||||||||||||||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||||||||||||||
6 | // | ||||||||||||||
7 | //===----------------------------------------------------------------------===// | ||||||||||||||
8 | /// \file | ||||||||||||||
9 | /// This file provides helpers for the implementation of | ||||||||||||||
10 | /// a TargetTransformInfo-conforming class. | ||||||||||||||
11 | /// | ||||||||||||||
12 | //===----------------------------------------------------------------------===// | ||||||||||||||
13 | |||||||||||||||
14 | #ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H | ||||||||||||||
15 | #define LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H | ||||||||||||||
16 | |||||||||||||||
17 | #include "llvm/Analysis/ScalarEvolutionExpressions.h" | ||||||||||||||
18 | #include "llvm/Analysis/TargetTransformInfo.h" | ||||||||||||||
19 | #include "llvm/Analysis/VectorUtils.h" | ||||||||||||||
20 | #include "llvm/IR/CallSite.h" | ||||||||||||||
21 | #include "llvm/IR/DataLayout.h" | ||||||||||||||
22 | #include "llvm/IR/Function.h" | ||||||||||||||
23 | #include "llvm/IR/GetElementPtrTypeIterator.h" | ||||||||||||||
24 | #include "llvm/IR/Operator.h" | ||||||||||||||
25 | #include "llvm/IR/Type.h" | ||||||||||||||
26 | |||||||||||||||
27 | namespace llvm { | ||||||||||||||
28 | |||||||||||||||
29 | /// Base class for use as a mix-in that aids implementing | ||||||||||||||
30 | /// a TargetTransformInfo-compatible class. | ||||||||||||||
31 | class TargetTransformInfoImplBase { | ||||||||||||||
32 | protected: | ||||||||||||||
33 | typedef TargetTransformInfo TTI; | ||||||||||||||
34 | |||||||||||||||
35 | const DataLayout &DL; | ||||||||||||||
36 | |||||||||||||||
37 | explicit TargetTransformInfoImplBase(const DataLayout &DL) : DL(DL) {} | ||||||||||||||
38 | |||||||||||||||
39 | public: | ||||||||||||||
40 | // Provide value semantics. MSVC requires that we spell all of these out. | ||||||||||||||
41 | TargetTransformInfoImplBase(const TargetTransformInfoImplBase &Arg) | ||||||||||||||
42 | : DL(Arg.DL) {} | ||||||||||||||
43 | TargetTransformInfoImplBase(TargetTransformInfoImplBase &&Arg) : DL(Arg.DL) {} | ||||||||||||||
44 | |||||||||||||||
45 | const DataLayout &getDataLayout() const { return DL; } | ||||||||||||||
46 | |||||||||||||||
47 | unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) { | ||||||||||||||
48 | switch (Opcode) { | ||||||||||||||
49 | default: | ||||||||||||||
50 | // By default, just classify everything as 'basic'. | ||||||||||||||
51 | return TTI::TCC_Basic; | ||||||||||||||
52 | |||||||||||||||
53 | case Instruction::GetElementPtr: | ||||||||||||||
54 | llvm_unreachable("Use getGEPCost for GEP operations!")::llvm::llvm_unreachable_internal("Use getGEPCost for GEP operations!" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h" , 54); | ||||||||||||||
55 | |||||||||||||||
56 | case Instruction::BitCast: | ||||||||||||||
57 | assert(OpTy && "Cast instructions must provide the operand type")((OpTy && "Cast instructions must provide the operand type" ) ? static_cast<void> (0) : __assert_fail ("OpTy && \"Cast instructions must provide the operand type\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h" , 57, __PRETTY_FUNCTION__)); | ||||||||||||||
58 | if (Ty == OpTy || (Ty->isPointerTy() && OpTy->isPointerTy())) | ||||||||||||||
59 | // Identity and pointer-to-pointer casts are free. | ||||||||||||||
60 | return TTI::TCC_Free; | ||||||||||||||
61 | |||||||||||||||
62 | // Otherwise, the default basic cost is used. | ||||||||||||||
63 | return TTI::TCC_Basic; | ||||||||||||||
64 | |||||||||||||||
65 | case Instruction::FDiv: | ||||||||||||||
66 | case Instruction::FRem: | ||||||||||||||
67 | case Instruction::SDiv: | ||||||||||||||
68 | case Instruction::SRem: | ||||||||||||||
69 | case Instruction::UDiv: | ||||||||||||||
70 | case Instruction::URem: | ||||||||||||||
71 | return TTI::TCC_Expensive; | ||||||||||||||
72 | |||||||||||||||
73 | case Instruction::IntToPtr: { | ||||||||||||||
74 | // An inttoptr cast is free so long as the input is a legal integer type | ||||||||||||||
75 | // which doesn't contain values outside the range of a pointer. | ||||||||||||||
76 | unsigned OpSize = OpTy->getScalarSizeInBits(); | ||||||||||||||
| |||||||||||||||
77 | if (DL.isLegalInteger(OpSize) && | ||||||||||||||
78 | OpSize <= DL.getPointerTypeSizeInBits(Ty)) | ||||||||||||||
79 | return TTI::TCC_Free; | ||||||||||||||
80 | |||||||||||||||
81 | // Otherwise it's not a no-op. | ||||||||||||||
82 | return TTI::TCC_Basic; | ||||||||||||||
83 | } | ||||||||||||||
84 | case Instruction::PtrToInt: { | ||||||||||||||
85 | // A ptrtoint cast is free so long as the result is large enough to store | ||||||||||||||
86 | // the pointer, and a legal integer type. | ||||||||||||||
87 | unsigned DestSize = Ty->getScalarSizeInBits(); | ||||||||||||||
88 | if (DL.isLegalInteger(DestSize) && | ||||||||||||||
89 | DestSize >= DL.getPointerTypeSizeInBits(OpTy)) | ||||||||||||||
90 | return TTI::TCC_Free; | ||||||||||||||
91 | |||||||||||||||
92 | // Otherwise it's not a no-op. | ||||||||||||||
93 | return TTI::TCC_Basic; | ||||||||||||||
94 | } | ||||||||||||||
95 | case Instruction::Trunc: | ||||||||||||||
96 | // trunc to a native type is free (assuming the target has compare and | ||||||||||||||
97 | // shift-right of the same width). | ||||||||||||||
98 | if (DL.isLegalInteger(DL.getTypeSizeInBits(Ty))) | ||||||||||||||
99 | return TTI::TCC_Free; | ||||||||||||||
100 | |||||||||||||||
101 | return TTI::TCC_Basic; | ||||||||||||||
102 | } | ||||||||||||||
103 | } | ||||||||||||||
104 | |||||||||||||||
105 | int getGEPCost(Type *PointeeType, const Value *Ptr, | ||||||||||||||
106 | ArrayRef<const Value *> Operands) { | ||||||||||||||
107 | // In the basic model, we just assume that all-constant GEPs will be folded | ||||||||||||||
108 | // into their uses via addressing modes. | ||||||||||||||
109 | for (unsigned Idx = 0, Size = Operands.size(); Idx != Size; ++Idx) | ||||||||||||||
110 | if (!isa<Constant>(Operands[Idx])) | ||||||||||||||
111 | return TTI::TCC_Basic; | ||||||||||||||
112 | |||||||||||||||
113 | return TTI::TCC_Free; | ||||||||||||||
114 | } | ||||||||||||||
115 | |||||||||||||||
116 | unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, | ||||||||||||||
117 | unsigned &JTSize, | ||||||||||||||
118 | ProfileSummaryInfo *PSI, | ||||||||||||||
119 | BlockFrequencyInfo *BFI) { | ||||||||||||||
120 | (void)PSI; | ||||||||||||||
121 | (void)BFI; | ||||||||||||||
122 | JTSize = 0; | ||||||||||||||
123 | return SI.getNumCases(); | ||||||||||||||
124 | } | ||||||||||||||
125 | |||||||||||||||
126 | int getExtCost(const Instruction *I, const Value *Src) { | ||||||||||||||
127 | return TTI::TCC_Basic; | ||||||||||||||
128 | } | ||||||||||||||
129 | |||||||||||||||
130 | unsigned getCallCost(FunctionType *FTy, int NumArgs, const User *U) { | ||||||||||||||
131 | assert(FTy && "FunctionType must be provided to this routine.")((FTy && "FunctionType must be provided to this routine." ) ? static_cast<void> (0) : __assert_fail ("FTy && \"FunctionType must be provided to this routine.\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h" , 131, __PRETTY_FUNCTION__)); | ||||||||||||||
132 | |||||||||||||||
133 | // The target-independent implementation just measures the size of the | ||||||||||||||
134 | // function by approximating that each argument will take on average one | ||||||||||||||
135 | // instruction to prepare. | ||||||||||||||
136 | |||||||||||||||
137 | if (NumArgs < 0) | ||||||||||||||
138 | // Set the argument number to the number of explicit arguments in the | ||||||||||||||
139 | // function. | ||||||||||||||
140 | NumArgs = FTy->getNumParams(); | ||||||||||||||
141 | |||||||||||||||
142 | return TTI::TCC_Basic * (NumArgs + 1); | ||||||||||||||
143 | } | ||||||||||||||
144 | |||||||||||||||
145 | unsigned getInliningThresholdMultiplier() { return 1; } | ||||||||||||||
146 | |||||||||||||||
147 | int getInlinerVectorBonusPercent() { return 150; } | ||||||||||||||
148 | |||||||||||||||
149 | unsigned getMemcpyCost(const Instruction *I) { | ||||||||||||||
150 | return TTI::TCC_Expensive; | ||||||||||||||
151 | } | ||||||||||||||
152 | |||||||||||||||
153 | bool hasBranchDivergence() { return false; } | ||||||||||||||
154 | |||||||||||||||
155 | bool useGPUDivergenceAnalysis() { return false; } | ||||||||||||||
156 | |||||||||||||||
157 | bool isSourceOfDivergence(const Value *V) { return false; } | ||||||||||||||
158 | |||||||||||||||
159 | bool isAlwaysUniform(const Value *V) { return false; } | ||||||||||||||
160 | |||||||||||||||
161 | unsigned getFlatAddressSpace () { | ||||||||||||||
162 | return -1; | ||||||||||||||
163 | } | ||||||||||||||
164 | |||||||||||||||
165 | bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes, | ||||||||||||||
166 | Intrinsic::ID IID) const { | ||||||||||||||
167 | return false; | ||||||||||||||
168 | } | ||||||||||||||
169 | |||||||||||||||
170 | bool rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, | ||||||||||||||
171 | Value *OldV, Value *NewV) const { | ||||||||||||||
172 | return false; | ||||||||||||||
173 | } | ||||||||||||||
174 | |||||||||||||||
175 | bool isLoweredToCall(const Function *F) { | ||||||||||||||
176 | assert(F && "A concrete function must be provided to this routine.")((F && "A concrete function must be provided to this routine." ) ? static_cast<void> (0) : __assert_fail ("F && \"A concrete function must be provided to this routine.\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h" , 176, __PRETTY_FUNCTION__)); | ||||||||||||||
177 | |||||||||||||||
178 | // FIXME: These should almost certainly not be handled here, and instead | ||||||||||||||
179 | // handled with the help of TLI or the target itself. This was largely | ||||||||||||||
180 | // ported from existing analysis heuristics here so that such refactorings | ||||||||||||||
181 | // can take place in the future. | ||||||||||||||
182 | |||||||||||||||
183 | if (F->isIntrinsic()) | ||||||||||||||
184 | return false; | ||||||||||||||
185 | |||||||||||||||
186 | if (F->hasLocalLinkage() || !F->hasName()) | ||||||||||||||
187 | return true; | ||||||||||||||
188 | |||||||||||||||
189 | StringRef Name = F->getName(); | ||||||||||||||
190 | |||||||||||||||
191 | // These will all likely lower to a single selection DAG node. | ||||||||||||||
192 | if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" || | ||||||||||||||
193 | Name == "fabs" || Name == "fabsf" || Name == "fabsl" || Name == "sin" || | ||||||||||||||
194 | Name == "fmin" || Name == "fminf" || Name == "fminl" || | ||||||||||||||
195 | Name == "fmax" || Name == "fmaxf" || Name == "fmaxl" || | ||||||||||||||
196 | Name == "sinf" || Name == "sinl" || Name == "cos" || Name == "cosf" || | ||||||||||||||
197 | Name == "cosl" || Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl") | ||||||||||||||
198 | return false; | ||||||||||||||
199 | |||||||||||||||
200 | // These are all likely to be optimized into something smaller. | ||||||||||||||
201 | if (Name == "pow" || Name == "powf" || Name == "powl" || Name == "exp2" || | ||||||||||||||
202 | Name == "exp2l" || Name == "exp2f" || Name == "floor" || | ||||||||||||||
203 | Name == "floorf" || Name == "ceil" || Name == "round" || | ||||||||||||||
204 | Name == "ffs" || Name == "ffsl" || Name == "abs" || Name == "labs" || | ||||||||||||||
205 | Name == "llabs") | ||||||||||||||
206 | return false; | ||||||||||||||
207 | |||||||||||||||
208 | return true; | ||||||||||||||
209 | } | ||||||||||||||
210 | |||||||||||||||
211 | bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, | ||||||||||||||
212 | AssumptionCache &AC, | ||||||||||||||
213 | TargetLibraryInfo *LibInfo, | ||||||||||||||
214 | HardwareLoopInfo &HWLoopInfo) { | ||||||||||||||
215 | return false; | ||||||||||||||
216 | } | ||||||||||||||
217 | |||||||||||||||
218 | bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE, | ||||||||||||||
219 | AssumptionCache &AC, TargetLibraryInfo *TLI, | ||||||||||||||
220 | DominatorTree *DT, | ||||||||||||||
221 | const LoopAccessInfo *LAI) const { | ||||||||||||||
222 | return false; | ||||||||||||||
223 | } | ||||||||||||||
224 | |||||||||||||||
225 | void getUnrollingPreferences(Loop *, ScalarEvolution &, | ||||||||||||||
226 | TTI::UnrollingPreferences &) {} | ||||||||||||||
227 | |||||||||||||||
228 | bool isLegalAddImmediate(int64_t Imm) { return false; } | ||||||||||||||
229 | |||||||||||||||
230 | bool isLegalICmpImmediate(int64_t Imm) { return false; } | ||||||||||||||
231 | |||||||||||||||
232 | bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, | ||||||||||||||
233 | bool HasBaseReg, int64_t Scale, | ||||||||||||||
234 | unsigned AddrSpace, Instruction *I = nullptr) { | ||||||||||||||
235 | // Guess that only reg and reg+reg addressing is allowed. This heuristic is | ||||||||||||||
236 | // taken from the implementation of LSR. | ||||||||||||||
237 | return !BaseGV && BaseOffset == 0 && (Scale == 0 || Scale == 1); | ||||||||||||||
238 | } | ||||||||||||||
239 | |||||||||||||||
240 | bool isLSRCostLess(TTI::LSRCost &C1, TTI::LSRCost &C2) { | ||||||||||||||
241 | return std::tie(C1.NumRegs, C1.AddRecCost, C1.NumIVMuls, C1.NumBaseAdds, | ||||||||||||||
242 | C1.ScaleCost, C1.ImmCost, C1.SetupCost) < | ||||||||||||||
243 | std::tie(C2.NumRegs, C2.AddRecCost, C2.NumIVMuls, C2.NumBaseAdds, | ||||||||||||||
244 | C2.ScaleCost, C2.ImmCost, C2.SetupCost); | ||||||||||||||
245 | } | ||||||||||||||
246 | |||||||||||||||
247 | bool canMacroFuseCmp() { return false; } | ||||||||||||||
248 | |||||||||||||||
249 | bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, | ||||||||||||||
250 | DominatorTree *DT, AssumptionCache *AC, | ||||||||||||||
251 | TargetLibraryInfo *LibInfo) { | ||||||||||||||
252 | return false; | ||||||||||||||
253 | } | ||||||||||||||
254 | |||||||||||||||
255 | bool shouldFavorPostInc() const { return false; } | ||||||||||||||
256 | |||||||||||||||
257 | bool shouldFavorBackedgeIndex(const Loop *L) const { return false; } | ||||||||||||||
258 | |||||||||||||||
259 | bool isLegalMaskedStore(Type *DataType, MaybeAlign Alignment) { return false; } | ||||||||||||||
260 | |||||||||||||||
261 | bool isLegalMaskedLoad(Type *DataType, MaybeAlign Alignment) { return false; } | ||||||||||||||
262 | |||||||||||||||
263 | bool isLegalNTStore(Type *DataType, Align Alignment) { | ||||||||||||||
264 | // By default, assume nontemporal memory stores are available for stores | ||||||||||||||
265 | // that are aligned and have a size that is a power of 2. | ||||||||||||||
266 | unsigned DataSize = DL.getTypeStoreSize(DataType); | ||||||||||||||
267 | return Alignment >= DataSize && isPowerOf2_32(DataSize); | ||||||||||||||
268 | } | ||||||||||||||
269 | |||||||||||||||
270 | bool isLegalNTLoad(Type *DataType, Align Alignment) { | ||||||||||||||
271 | // By default, assume nontemporal memory loads are available for loads that | ||||||||||||||
272 | // are aligned and have a size that is a power of 2. | ||||||||||||||
273 | unsigned DataSize = DL.getTypeStoreSize(DataType); | ||||||||||||||
274 | return Alignment >= DataSize && isPowerOf2_32(DataSize); | ||||||||||||||
275 | } | ||||||||||||||
276 | |||||||||||||||
277 | bool isLegalMaskedScatter(Type *DataType, MaybeAlign Alignment) { | ||||||||||||||
278 | return false; | ||||||||||||||
279 | } | ||||||||||||||
280 | |||||||||||||||
281 | bool isLegalMaskedGather(Type *DataType, MaybeAlign Alignment) { | ||||||||||||||
282 | return false; | ||||||||||||||
283 | } | ||||||||||||||
284 | |||||||||||||||
285 | bool isLegalMaskedCompressStore(Type *DataType) { return false; } | ||||||||||||||
286 | |||||||||||||||
287 | bool isLegalMaskedExpandLoad(Type *DataType) { return false; } | ||||||||||||||
288 | |||||||||||||||
289 | bool hasDivRemOp(Type *DataType, bool IsSigned) { return false; } | ||||||||||||||
290 | |||||||||||||||
291 | bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) { return false; } | ||||||||||||||
292 | |||||||||||||||
293 | bool prefersVectorizedAddressing() { return true; } | ||||||||||||||
294 | |||||||||||||||
295 | int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, | ||||||||||||||
296 | bool HasBaseReg, int64_t Scale, unsigned AddrSpace) { | ||||||||||||||
297 | // Guess that all legal addressing mode are free. | ||||||||||||||
298 | if (isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, | ||||||||||||||
299 | Scale, AddrSpace)) | ||||||||||||||
300 | return 0; | ||||||||||||||
301 | return -1; | ||||||||||||||
302 | } | ||||||||||||||
303 | |||||||||||||||
304 | bool LSRWithInstrQueries() { return false; } | ||||||||||||||
305 | |||||||||||||||
306 | bool isTruncateFree(Type *Ty1, Type *Ty2) { return false; } | ||||||||||||||
307 | |||||||||||||||
308 | bool isProfitableToHoist(Instruction *I) { return true; } | ||||||||||||||
309 | |||||||||||||||
310 | bool useAA() { return false; } | ||||||||||||||
311 | |||||||||||||||
312 | bool isTypeLegal(Type *Ty) { return false; } | ||||||||||||||
313 | |||||||||||||||
314 | bool shouldBuildLookupTables() { return true; } | ||||||||||||||
315 | bool shouldBuildLookupTablesForConstant(Constant *C) { return true; } | ||||||||||||||
316 | |||||||||||||||
317 | bool useColdCCForColdCall(Function &F) { return false; } | ||||||||||||||
318 | |||||||||||||||
319 | unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) { | ||||||||||||||
320 | return 0; | ||||||||||||||
321 | } | ||||||||||||||
322 | |||||||||||||||
323 | unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args, | ||||||||||||||
324 | unsigned VF) { return 0; } | ||||||||||||||
325 | |||||||||||||||
326 | bool supportsEfficientVectorElementLoadStore() { return false; } | ||||||||||||||
327 | |||||||||||||||
328 | bool enableAggressiveInterleaving(bool LoopHasReductions) { return false; } | ||||||||||||||
329 | |||||||||||||||
330 | TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, | ||||||||||||||
331 | bool IsZeroCmp) const { | ||||||||||||||
332 | return {}; | ||||||||||||||
333 | } | ||||||||||||||
334 | |||||||||||||||
335 | bool enableInterleavedAccessVectorization() { return false; } | ||||||||||||||
336 | |||||||||||||||
337 | bool enableMaskedInterleavedAccessVectorization() { return false; } | ||||||||||||||
338 | |||||||||||||||
339 | bool isFPVectorizationPotentiallyUnsafe() { return false; } | ||||||||||||||
340 | |||||||||||||||
341 | bool allowsMisalignedMemoryAccesses(LLVMContext &Context, | ||||||||||||||
342 | unsigned BitWidth, | ||||||||||||||
343 | unsigned AddressSpace, | ||||||||||||||
344 | unsigned Alignment, | ||||||||||||||
345 | bool *Fast) { return false; } | ||||||||||||||
346 | |||||||||||||||
347 | TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) { | ||||||||||||||
348 | return TTI::PSK_Software; | ||||||||||||||
349 | } | ||||||||||||||
350 | |||||||||||||||
351 | bool haveFastSqrt(Type *Ty) { return false; } | ||||||||||||||
352 | |||||||||||||||
353 | bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) { return true; } | ||||||||||||||
354 | |||||||||||||||
355 | unsigned getFPOpCost(Type *Ty) { return TargetTransformInfo::TCC_Basic; } | ||||||||||||||
356 | |||||||||||||||
357 | int getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, const APInt &Imm, | ||||||||||||||
358 | Type *Ty) { | ||||||||||||||
359 | return 0; | ||||||||||||||
360 | } | ||||||||||||||
361 | |||||||||||||||
362 | unsigned getIntImmCost(const APInt &Imm, Type *Ty) { return TTI::TCC_Basic; } | ||||||||||||||
363 | |||||||||||||||
364 | unsigned getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, | ||||||||||||||
365 | Type *Ty) { | ||||||||||||||
366 | return TTI::TCC_Free; | ||||||||||||||
367 | } | ||||||||||||||
368 | |||||||||||||||
369 | unsigned getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, | ||||||||||||||
370 | const APInt &Imm, Type *Ty) { | ||||||||||||||
371 | return TTI::TCC_Free; | ||||||||||||||
372 | } | ||||||||||||||
373 | |||||||||||||||
374 | unsigned getNumberOfRegisters(unsigned ClassID) const { return 8; } | ||||||||||||||
375 | |||||||||||||||
376 | unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const { | ||||||||||||||
377 | return Vector ? 1 : 0; | ||||||||||||||
378 | }; | ||||||||||||||
379 | |||||||||||||||
380 | const char* getRegisterClassName(unsigned ClassID) const { | ||||||||||||||
381 | switch (ClassID) { | ||||||||||||||
382 | default: | ||||||||||||||
383 | return "Generic::Unknown Register Class"; | ||||||||||||||
384 | case 0: return "Generic::ScalarRC"; | ||||||||||||||
385 | case 1: return "Generic::VectorRC"; | ||||||||||||||
386 | } | ||||||||||||||
387 | } | ||||||||||||||
388 | |||||||||||||||
389 | unsigned getRegisterBitWidth(bool Vector) const { return 32; } | ||||||||||||||
390 | |||||||||||||||
391 | unsigned getMinVectorRegisterBitWidth() { return 128; } | ||||||||||||||
392 | |||||||||||||||
393 | bool shouldMaximizeVectorBandwidth(bool OptSize) const { return false; } | ||||||||||||||
394 | |||||||||||||||
395 | unsigned getMinimumVF(unsigned ElemWidth) const { return 0; } | ||||||||||||||
396 | |||||||||||||||
397 | bool | ||||||||||||||
398 | shouldConsiderAddressTypePromotion(const Instruction &I, | ||||||||||||||
399 | bool &AllowPromotionWithoutCommonHeader) { | ||||||||||||||
400 | AllowPromotionWithoutCommonHeader = false; | ||||||||||||||
401 | return false; | ||||||||||||||
402 | } | ||||||||||||||
403 | |||||||||||||||
404 | unsigned getCacheLineSize() const { return 0; } | ||||||||||||||
405 | |||||||||||||||
406 | llvm::Optional<unsigned> getCacheSize(TargetTransformInfo::CacheLevel Level) const { | ||||||||||||||
407 | switch (Level) { | ||||||||||||||
408 | case TargetTransformInfo::CacheLevel::L1D: | ||||||||||||||
409 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | ||||||||||||||
410 | case TargetTransformInfo::CacheLevel::L2D: | ||||||||||||||
411 | return llvm::Optional<unsigned>(); | ||||||||||||||
412 | } | ||||||||||||||
413 | llvm_unreachable("Unknown TargetTransformInfo::CacheLevel")::llvm::llvm_unreachable_internal("Unknown TargetTransformInfo::CacheLevel" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h" , 413); | ||||||||||||||
414 | } | ||||||||||||||
415 | |||||||||||||||
416 | llvm::Optional<unsigned> getCacheAssociativity( | ||||||||||||||
417 | TargetTransformInfo::CacheLevel Level) const { | ||||||||||||||
418 | switch (Level) { | ||||||||||||||
419 | case TargetTransformInfo::CacheLevel::L1D: | ||||||||||||||
420 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | ||||||||||||||
421 | case TargetTransformInfo::CacheLevel::L2D: | ||||||||||||||
422 | return llvm::Optional<unsigned>(); | ||||||||||||||
423 | } | ||||||||||||||
424 | |||||||||||||||
425 | llvm_unreachable("Unknown TargetTransformInfo::CacheLevel")::llvm::llvm_unreachable_internal("Unknown TargetTransformInfo::CacheLevel" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h" , 425); | ||||||||||||||
426 | } | ||||||||||||||
427 | |||||||||||||||
428 | unsigned getPrefetchDistance() const { return 0; } | ||||||||||||||
429 | unsigned getMinPrefetchStride() const { return 1; } | ||||||||||||||
430 | unsigned getMaxPrefetchIterationsAhead() const { return UINT_MAX(2147483647 *2U +1U); } | ||||||||||||||
431 | |||||||||||||||
432 | unsigned getMaxInterleaveFactor(unsigned VF) { return 1; } | ||||||||||||||
433 | |||||||||||||||
434 | unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, | ||||||||||||||
435 | TTI::OperandValueKind Opd1Info, | ||||||||||||||
436 | TTI::OperandValueKind Opd2Info, | ||||||||||||||
437 | TTI::OperandValueProperties Opd1PropInfo, | ||||||||||||||
438 | TTI::OperandValueProperties Opd2PropInfo, | ||||||||||||||
439 | ArrayRef<const Value *> Args, | ||||||||||||||
440 | const Instruction *CxtI = nullptr) { | ||||||||||||||
441 | return 1; | ||||||||||||||
442 | } | ||||||||||||||
443 | |||||||||||||||
444 | unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Ty, int Index, | ||||||||||||||
445 | Type *SubTp) { | ||||||||||||||
446 | return 1; | ||||||||||||||
447 | } | ||||||||||||||
448 | |||||||||||||||
449 | unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, | ||||||||||||||
450 | const Instruction *I) { return 1; } | ||||||||||||||
451 | |||||||||||||||
452 | unsigned getExtractWithExtendCost(unsigned Opcode, Type *Dst, | ||||||||||||||
453 | VectorType *VecTy, unsigned Index) { | ||||||||||||||
454 | return 1; | ||||||||||||||
455 | } | ||||||||||||||
456 | |||||||||||||||
457 | unsigned getCFInstrCost(unsigned Opcode) { return 1; } | ||||||||||||||
458 | |||||||||||||||
459 | unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, | ||||||||||||||
460 | const Instruction *I) { | ||||||||||||||
461 | return 1; | ||||||||||||||
462 | } | ||||||||||||||
463 | |||||||||||||||
464 | unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) { | ||||||||||||||
465 | return 1; | ||||||||||||||
466 | } | ||||||||||||||
467 | |||||||||||||||
468 | unsigned getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, | ||||||||||||||
469 | unsigned AddressSpace, const Instruction *I) { | ||||||||||||||
470 | return 1; | ||||||||||||||
471 | } | ||||||||||||||
472 | |||||||||||||||
473 | unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, | ||||||||||||||
474 | unsigned AddressSpace) { | ||||||||||||||
475 | return 1; | ||||||||||||||
476 | } | ||||||||||||||
477 | |||||||||||||||
478 | unsigned getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr, | ||||||||||||||
479 | bool VariableMask, | ||||||||||||||
480 | unsigned Alignment) { | ||||||||||||||
481 | return 1; | ||||||||||||||
482 | } | ||||||||||||||
483 | |||||||||||||||
484 | unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, | ||||||||||||||
485 | unsigned Factor, | ||||||||||||||
486 | ArrayRef<unsigned> Indices, | ||||||||||||||
487 | unsigned Alignment, unsigned AddressSpace, | ||||||||||||||
488 | bool UseMaskForCond = false, | ||||||||||||||
489 | bool UseMaskForGaps = false) { | ||||||||||||||
490 | return 1; | ||||||||||||||
491 | } | ||||||||||||||
492 | |||||||||||||||
493 | unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, | ||||||||||||||
494 | ArrayRef<Type *> Tys, FastMathFlags FMF, | ||||||||||||||
495 | unsigned ScalarizationCostPassed) { | ||||||||||||||
496 | return 1; | ||||||||||||||
497 | } | ||||||||||||||
498 | unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, | ||||||||||||||
499 | ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) { | ||||||||||||||
500 | return 1; | ||||||||||||||
501 | } | ||||||||||||||
502 | |||||||||||||||
503 | unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) { | ||||||||||||||
504 | return 1; | ||||||||||||||
505 | } | ||||||||||||||
506 | |||||||||||||||
507 | unsigned getNumberOfParts(Type *Tp) { return 0; } | ||||||||||||||
508 | |||||||||||||||
509 | unsigned getAddressComputationCost(Type *Tp, ScalarEvolution *, | ||||||||||||||
510 | const SCEV *) { | ||||||||||||||
511 | return 0; | ||||||||||||||
512 | } | ||||||||||||||
513 | |||||||||||||||
514 | unsigned getArithmeticReductionCost(unsigned, Type *, bool) { return 1; } | ||||||||||||||
515 | |||||||||||||||
516 | unsigned getMinMaxReductionCost(Type *, Type *, bool, bool) { return 1; } | ||||||||||||||
517 | |||||||||||||||
518 | unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) { return 0; } | ||||||||||||||
519 | |||||||||||||||
520 | bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) { | ||||||||||||||
521 | return false; | ||||||||||||||
522 | } | ||||||||||||||
523 | |||||||||||||||
524 | unsigned getAtomicMemIntrinsicMaxElementSize() const { | ||||||||||||||
525 | // Note for overrides: You must ensure for all element unordered-atomic | ||||||||||||||
526 | // memory intrinsics that all power-of-2 element sizes up to, and | ||||||||||||||
527 | // including, the return value of this method have a corresponding | ||||||||||||||
528 | // runtime lib call. These runtime lib call definitions can be found | ||||||||||||||
529 | // in RuntimeLibcalls.h | ||||||||||||||
530 | return 0; | ||||||||||||||
531 | } | ||||||||||||||
532 | |||||||||||||||
533 | Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, | ||||||||||||||
534 | Type *ExpectedType) { | ||||||||||||||
535 | return nullptr; | ||||||||||||||
536 | } | ||||||||||||||
537 | |||||||||||||||
538 | Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, | ||||||||||||||
539 | unsigned SrcAlign, unsigned DestAlign) const { | ||||||||||||||
540 | return Type::getInt8Ty(Context); | ||||||||||||||
541 | } | ||||||||||||||
542 | |||||||||||||||
543 | void getMemcpyLoopResidualLoweringType(SmallVectorImpl<Type *> &OpsOut, | ||||||||||||||
544 | LLVMContext &Context, | ||||||||||||||
545 | unsigned RemainingBytes, | ||||||||||||||
546 | unsigned SrcAlign, | ||||||||||||||
547 | unsigned DestAlign) const { | ||||||||||||||
548 | for (unsigned i = 0; i != RemainingBytes; ++i) | ||||||||||||||
549 | OpsOut.push_back(Type::getInt8Ty(Context)); | ||||||||||||||
550 | } | ||||||||||||||
551 | |||||||||||||||
552 | bool areInlineCompatible(const Function *Caller, | ||||||||||||||
553 | const Function *Callee) const { | ||||||||||||||
554 | return (Caller->getFnAttribute("target-cpu") == | ||||||||||||||
555 | Callee->getFnAttribute("target-cpu")) && | ||||||||||||||
556 | (Caller->getFnAttribute("target-features") == | ||||||||||||||
557 | Callee->getFnAttribute("target-features")); | ||||||||||||||
558 | } | ||||||||||||||
559 | |||||||||||||||
560 | bool areFunctionArgsABICompatible(const Function *Caller, const Function *Callee, | ||||||||||||||
561 | SmallPtrSetImpl<Argument *> &Args) const { | ||||||||||||||
562 | return (Caller->getFnAttribute("target-cpu") == | ||||||||||||||
563 | Callee->getFnAttribute("target-cpu")) && | ||||||||||||||
564 | (Caller->getFnAttribute("target-features") == | ||||||||||||||
565 | Callee->getFnAttribute("target-features")); | ||||||||||||||
566 | } | ||||||||||||||
567 | |||||||||||||||
568 | bool isIndexedLoadLegal(TTI::MemIndexedMode Mode, Type *Ty, | ||||||||||||||
569 | const DataLayout &DL) const { | ||||||||||||||
570 | return false; | ||||||||||||||
571 | } | ||||||||||||||
572 | |||||||||||||||
573 | bool isIndexedStoreLegal(TTI::MemIndexedMode Mode, Type *Ty, | ||||||||||||||
574 | const DataLayout &DL) const { | ||||||||||||||
575 | return false; | ||||||||||||||
576 | } | ||||||||||||||
577 | |||||||||||||||
578 | unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const { return 128; } | ||||||||||||||
579 | |||||||||||||||
580 | bool isLegalToVectorizeLoad(LoadInst *LI) const { return true; } | ||||||||||||||
581 | |||||||||||||||
582 | bool isLegalToVectorizeStore(StoreInst *SI) const { return true; } | ||||||||||||||
583 | |||||||||||||||
584 | bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, | ||||||||||||||
585 | unsigned Alignment, | ||||||||||||||
586 | unsigned AddrSpace) const { | ||||||||||||||
587 | return true; | ||||||||||||||
588 | } | ||||||||||||||
589 | |||||||||||||||
590 | bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, | ||||||||||||||
591 | unsigned Alignment, | ||||||||||||||
592 | unsigned AddrSpace) const { | ||||||||||||||
593 | return true; | ||||||||||||||
594 | } | ||||||||||||||
595 | |||||||||||||||
596 | unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, | ||||||||||||||
597 | unsigned ChainSizeInBytes, | ||||||||||||||
598 | VectorType *VecTy) const { | ||||||||||||||
599 | return VF; | ||||||||||||||
600 | } | ||||||||||||||
601 | |||||||||||||||
602 | unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, | ||||||||||||||
603 | unsigned ChainSizeInBytes, | ||||||||||||||
604 | VectorType *VecTy) const { | ||||||||||||||
605 | return VF; | ||||||||||||||
606 | } | ||||||||||||||
607 | |||||||||||||||
608 | bool useReductionIntrinsic(unsigned Opcode, Type *Ty, | ||||||||||||||
609 | TTI::ReductionFlags Flags) const { | ||||||||||||||
610 | return false; | ||||||||||||||
611 | } | ||||||||||||||
612 | |||||||||||||||
613 | bool shouldExpandReduction(const IntrinsicInst *II) const { | ||||||||||||||
614 | return true; | ||||||||||||||
615 | } | ||||||||||||||
616 | |||||||||||||||
617 | unsigned getGISelRematGlobalCost() const { | ||||||||||||||
618 | return 1; | ||||||||||||||
619 | } | ||||||||||||||
620 | |||||||||||||||
621 | protected: | ||||||||||||||
622 | // Obtain the minimum required size to hold the value (without the sign) | ||||||||||||||
623 | // In case of a vector it returns the min required size for one element. | ||||||||||||||
624 | unsigned minRequiredElementSize(const Value* Val, bool &isSigned) { | ||||||||||||||
625 | if (isa<ConstantDataVector>(Val) || isa<ConstantVector>(Val)) { | ||||||||||||||
626 | const auto* VectorValue = cast<Constant>(Val); | ||||||||||||||
627 | |||||||||||||||
628 | // In case of a vector need to pick the max between the min | ||||||||||||||
629 | // required size for each element | ||||||||||||||
630 | auto *VT = cast<VectorType>(Val->getType()); | ||||||||||||||
631 | |||||||||||||||
632 | // Assume unsigned elements | ||||||||||||||
633 | isSigned = false; | ||||||||||||||
634 | |||||||||||||||
635 | // The max required size is the total vector width divided by num | ||||||||||||||
636 | // of elements in the vector | ||||||||||||||
637 | unsigned MaxRequiredSize = VT->getBitWidth() / VT->getNumElements(); | ||||||||||||||
638 | |||||||||||||||
639 | unsigned MinRequiredSize = 0; | ||||||||||||||
640 | for(unsigned i = 0, e = VT->getNumElements(); i < e; ++i) { | ||||||||||||||
641 | if (auto* IntElement = | ||||||||||||||
642 | dyn_cast<ConstantInt>(VectorValue->getAggregateElement(i))) { | ||||||||||||||
643 | bool signedElement = IntElement->getValue().isNegative(); | ||||||||||||||
644 | // Get the element min required size. | ||||||||||||||
645 | unsigned ElementMinRequiredSize = | ||||||||||||||
646 | IntElement->getValue().getMinSignedBits() - 1; | ||||||||||||||
647 | // In case one element is signed then all the vector is signed. | ||||||||||||||
648 | isSigned |= signedElement; | ||||||||||||||
649 | // Save the max required bit size between all the elements. | ||||||||||||||
650 | MinRequiredSize = std::max(MinRequiredSize, ElementMinRequiredSize); | ||||||||||||||
651 | } | ||||||||||||||
652 | else { | ||||||||||||||
653 | // not an int constant element | ||||||||||||||
654 | return MaxRequiredSize; | ||||||||||||||
655 | } | ||||||||||||||
656 | } | ||||||||||||||
657 | return MinRequiredSize; | ||||||||||||||
658 | } | ||||||||||||||
659 | |||||||||||||||
660 | if (const auto* CI = dyn_cast<ConstantInt>(Val)) { | ||||||||||||||
661 | isSigned = CI->getValue().isNegative(); | ||||||||||||||
662 | return CI->getValue().getMinSignedBits() - 1; | ||||||||||||||
663 | } | ||||||||||||||
664 | |||||||||||||||
665 | if (const auto* Cast = dyn_cast<SExtInst>(Val)) { | ||||||||||||||
666 | isSigned = true; | ||||||||||||||
667 | return Cast->getSrcTy()->getScalarSizeInBits() - 1; | ||||||||||||||
668 | } | ||||||||||||||
669 | |||||||||||||||
670 | if (const auto* Cast = dyn_cast<ZExtInst>(Val)) { | ||||||||||||||
671 | isSigned = false; | ||||||||||||||
672 | return Cast->getSrcTy()->getScalarSizeInBits(); | ||||||||||||||
673 | } | ||||||||||||||
674 | |||||||||||||||
675 | isSigned = false; | ||||||||||||||
676 | return Val->getType()->getScalarSizeInBits(); | ||||||||||||||
677 | } | ||||||||||||||
678 | |||||||||||||||
679 | bool isStridedAccess(const SCEV *Ptr) { | ||||||||||||||
680 | return Ptr && isa<SCEVAddRecExpr>(Ptr); | ||||||||||||||
681 | } | ||||||||||||||
682 | |||||||||||||||
683 | const SCEVConstant *getConstantStrideStep(ScalarEvolution *SE, | ||||||||||||||
684 | const SCEV *Ptr) { | ||||||||||||||
685 | if (!isStridedAccess(Ptr)) | ||||||||||||||
686 | return nullptr; | ||||||||||||||
687 | const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ptr); | ||||||||||||||
688 | return dyn_cast<SCEVConstant>(AddRec->getStepRecurrence(*SE)); | ||||||||||||||
689 | } | ||||||||||||||
690 | |||||||||||||||
691 | bool isConstantStridedAccessLessThan(ScalarEvolution *SE, const SCEV *Ptr, | ||||||||||||||
692 | int64_t MergeDistance) { | ||||||||||||||
693 | const SCEVConstant *Step = getConstantStrideStep(SE, Ptr); | ||||||||||||||
694 | if (!Step) | ||||||||||||||
695 | return false; | ||||||||||||||
696 | APInt StrideVal = Step->getAPInt(); | ||||||||||||||
697 | if (StrideVal.getBitWidth() > 64) | ||||||||||||||
698 | return false; | ||||||||||||||
699 | // FIXME: Need to take absolute value for negative stride case. | ||||||||||||||
700 | return StrideVal.getSExtValue() < MergeDistance; | ||||||||||||||
701 | } | ||||||||||||||
702 | }; | ||||||||||||||
703 | |||||||||||||||
704 | /// CRTP base class for use as a mix-in that aids implementing | ||||||||||||||
705 | /// a TargetTransformInfo-compatible class. | ||||||||||||||
706 | template <typename T> | ||||||||||||||
707 | class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase { | ||||||||||||||
708 | private: | ||||||||||||||
709 | typedef TargetTransformInfoImplBase BaseT; | ||||||||||||||
710 | |||||||||||||||
711 | protected: | ||||||||||||||
712 | explicit TargetTransformInfoImplCRTPBase(const DataLayout &DL) : BaseT(DL) {} | ||||||||||||||
713 | |||||||||||||||
714 | public: | ||||||||||||||
715 | using BaseT::getCallCost; | ||||||||||||||
716 | |||||||||||||||
717 | unsigned getCallCost(const Function *F, int NumArgs, const User *U) { | ||||||||||||||
718 | assert(F && "A concrete function must be provided to this routine.")((F && "A concrete function must be provided to this routine." ) ? static_cast<void> (0) : __assert_fail ("F && \"A concrete function must be provided to this routine.\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h" , 718, __PRETTY_FUNCTION__)); | ||||||||||||||
719 | |||||||||||||||
720 | if (NumArgs < 0) | ||||||||||||||
721 | // Set the argument number to the number of explicit arguments in the | ||||||||||||||
722 | // function. | ||||||||||||||
723 | NumArgs = F->arg_size(); | ||||||||||||||
724 | |||||||||||||||
725 | if (Intrinsic::ID IID = F->getIntrinsicID()) { | ||||||||||||||
726 | FunctionType *FTy = F->getFunctionType(); | ||||||||||||||
727 | SmallVector<Type *, 8> ParamTys(FTy->param_begin(), FTy->param_end()); | ||||||||||||||
728 | return static_cast<T *>(this) | ||||||||||||||
729 | ->getIntrinsicCost(IID, FTy->getReturnType(), ParamTys, U); | ||||||||||||||
730 | } | ||||||||||||||
731 | |||||||||||||||
732 | if (!static_cast<T *>(this)->isLoweredToCall(F)) | ||||||||||||||
733 | return TTI::TCC_Basic; // Give a basic cost if it will be lowered | ||||||||||||||
734 | // directly. | ||||||||||||||
735 | |||||||||||||||
736 | return static_cast<T *>(this)->getCallCost(F->getFunctionType(), NumArgs, U); | ||||||||||||||
737 | } | ||||||||||||||
738 | |||||||||||||||
739 | unsigned getCallCost(const Function *F, ArrayRef<const Value *> Arguments, | ||||||||||||||
740 | const User *U) { | ||||||||||||||
741 | // Simply delegate to generic handling of the call. | ||||||||||||||
742 | // FIXME: We should use instsimplify or something else to catch calls which | ||||||||||||||
743 | // will constant fold with these arguments. | ||||||||||||||
744 | return static_cast<T *>(this)->getCallCost(F, Arguments.size(), U); | ||||||||||||||
745 | } | ||||||||||||||
746 | |||||||||||||||
747 | using BaseT::getGEPCost; | ||||||||||||||
748 | |||||||||||||||
749 | int getGEPCost(Type *PointeeType, const Value *Ptr, | ||||||||||||||
750 | ArrayRef<const Value *> Operands) { | ||||||||||||||
751 | assert(PointeeType && Ptr && "can't get GEPCost of nullptr")((PointeeType && Ptr && "can't get GEPCost of nullptr" ) ? static_cast<void> (0) : __assert_fail ("PointeeType && Ptr && \"can't get GEPCost of nullptr\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h" , 751, __PRETTY_FUNCTION__)); | ||||||||||||||
752 | // TODO: will remove this when pointers have an opaque type. | ||||||||||||||
753 | assert(Ptr->getType()->getScalarType()->getPointerElementType() ==((Ptr->getType()->getScalarType()->getPointerElementType () == PointeeType && "explicit pointee type doesn't match operand's pointee type" ) ? static_cast<void> (0) : __assert_fail ("Ptr->getType()->getScalarType()->getPointerElementType() == PointeeType && \"explicit pointee type doesn't match operand's pointee type\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h" , 755, __PRETTY_FUNCTION__)) | ||||||||||||||
754 | PointeeType &&((Ptr->getType()->getScalarType()->getPointerElementType () == PointeeType && "explicit pointee type doesn't match operand's pointee type" ) ? static_cast<void> (0) : __assert_fail ("Ptr->getType()->getScalarType()->getPointerElementType() == PointeeType && \"explicit pointee type doesn't match operand's pointee type\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h" , 755, __PRETTY_FUNCTION__)) | ||||||||||||||
755 | "explicit pointee type doesn't match operand's pointee type")((Ptr->getType()->getScalarType()->getPointerElementType () == PointeeType && "explicit pointee type doesn't match operand's pointee type" ) ? static_cast<void> (0) : __assert_fail ("Ptr->getType()->getScalarType()->getPointerElementType() == PointeeType && \"explicit pointee type doesn't match operand's pointee type\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h" , 755, __PRETTY_FUNCTION__)); | ||||||||||||||
756 | auto *BaseGV = dyn_cast<GlobalValue>(Ptr->stripPointerCasts()); | ||||||||||||||
757 | bool HasBaseReg = (BaseGV == nullptr); | ||||||||||||||
758 | |||||||||||||||
759 | auto PtrSizeBits = DL.getPointerTypeSizeInBits(Ptr->getType()); | ||||||||||||||
760 | APInt BaseOffset(PtrSizeBits, 0); | ||||||||||||||
761 | int64_t Scale = 0; | ||||||||||||||
762 | |||||||||||||||
763 | auto GTI = gep_type_begin(PointeeType, Operands); | ||||||||||||||
764 | Type *TargetType = nullptr; | ||||||||||||||
765 | |||||||||||||||
766 | // Handle the case where the GEP instruction has a single operand, | ||||||||||||||
767 | // the basis, therefore TargetType is a nullptr. | ||||||||||||||
768 | if (Operands.empty()) | ||||||||||||||
769 | return !BaseGV ? TTI::TCC_Free : TTI::TCC_Basic; | ||||||||||||||
770 | |||||||||||||||
771 | for (auto I = Operands.begin(); I != Operands.end(); ++I, ++GTI) { | ||||||||||||||
772 | TargetType = GTI.getIndexedType(); | ||||||||||||||
773 | // We assume that the cost of Scalar GEP with constant index and the | ||||||||||||||
774 | // cost of Vector GEP with splat constant index are the same. | ||||||||||||||
775 | const ConstantInt *ConstIdx = dyn_cast<ConstantInt>(*I); | ||||||||||||||
776 | if (!ConstIdx) | ||||||||||||||
777 | if (auto Splat = getSplatValue(*I)) | ||||||||||||||
778 | ConstIdx = dyn_cast<ConstantInt>(Splat); | ||||||||||||||
779 | if (StructType *STy = GTI.getStructTypeOrNull()) { | ||||||||||||||
780 | // For structures the index is always splat or scalar constant | ||||||||||||||
781 | assert(ConstIdx && "Unexpected GEP index")((ConstIdx && "Unexpected GEP index") ? static_cast< void> (0) : __assert_fail ("ConstIdx && \"Unexpected GEP index\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h" , 781, __PRETTY_FUNCTION__)); | ||||||||||||||
782 | uint64_t Field = ConstIdx->getZExtValue(); | ||||||||||||||
783 | BaseOffset += DL.getStructLayout(STy)->getElementOffset(Field); | ||||||||||||||
784 | } else { | ||||||||||||||
785 | int64_t ElementSize = DL.getTypeAllocSize(GTI.getIndexedType()); | ||||||||||||||
786 | if (ConstIdx) { | ||||||||||||||
787 | BaseOffset += | ||||||||||||||
788 | ConstIdx->getValue().sextOrTrunc(PtrSizeBits) * ElementSize; | ||||||||||||||
789 | } else { | ||||||||||||||
790 | // Needs scale register. | ||||||||||||||
791 | if (Scale != 0) | ||||||||||||||
792 | // No addressing mode takes two scale registers. | ||||||||||||||
793 | return TTI::TCC_Basic; | ||||||||||||||
794 | Scale = ElementSize; | ||||||||||||||
795 | } | ||||||||||||||
796 | } | ||||||||||||||
797 | } | ||||||||||||||
798 | |||||||||||||||
799 | if (static_cast<T *>(this)->isLegalAddressingMode( | ||||||||||||||
800 | TargetType, const_cast<GlobalValue *>(BaseGV), | ||||||||||||||
801 | BaseOffset.sextOrTrunc(64).getSExtValue(), HasBaseReg, Scale, | ||||||||||||||
802 | Ptr->getType()->getPointerAddressSpace())) | ||||||||||||||
803 | return TTI::TCC_Free; | ||||||||||||||
804 | return TTI::TCC_Basic; | ||||||||||||||
805 | } | ||||||||||||||
806 | |||||||||||||||
807 | unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, | ||||||||||||||
808 | ArrayRef<Type *> ParamTys, const User *U) { | ||||||||||||||
809 | switch (IID) { | ||||||||||||||
810 | default: | ||||||||||||||
811 | // Intrinsics rarely (if ever) have normal argument setup constraints. | ||||||||||||||
812 | // Model them as having a basic instruction cost. | ||||||||||||||
813 | return TTI::TCC_Basic; | ||||||||||||||
814 | |||||||||||||||
815 | // TODO: other libc intrinsics. | ||||||||||||||
816 | case Intrinsic::memcpy: | ||||||||||||||
817 | return static_cast<T *>(this)->getMemcpyCost(dyn_cast<Instruction>(U)); | ||||||||||||||
818 | |||||||||||||||
819 | case Intrinsic::annotation: | ||||||||||||||
820 | case Intrinsic::assume: | ||||||||||||||
821 | case Intrinsic::sideeffect: | ||||||||||||||
822 | case Intrinsic::dbg_declare: | ||||||||||||||
823 | case Intrinsic::dbg_value: | ||||||||||||||
824 | case Intrinsic::dbg_label: | ||||||||||||||
825 | case Intrinsic::invariant_start: | ||||||||||||||
826 | case Intrinsic::invariant_end: | ||||||||||||||
827 | case Intrinsic::launder_invariant_group: | ||||||||||||||
828 | case Intrinsic::strip_invariant_group: | ||||||||||||||
829 | case Intrinsic::is_constant: | ||||||||||||||
830 | case Intrinsic::lifetime_start: | ||||||||||||||
831 | case Intrinsic::lifetime_end: | ||||||||||||||
832 | case Intrinsic::objectsize: | ||||||||||||||
833 | case Intrinsic::ptr_annotation: | ||||||||||||||
834 | case Intrinsic::var_annotation: | ||||||||||||||
835 | case Intrinsic::experimental_gc_result: | ||||||||||||||
836 | case Intrinsic::experimental_gc_relocate: | ||||||||||||||
837 | case Intrinsic::coro_alloc: | ||||||||||||||
838 | case Intrinsic::coro_begin: | ||||||||||||||
839 | case Intrinsic::coro_free: | ||||||||||||||
840 | case Intrinsic::coro_end: | ||||||||||||||
841 | case Intrinsic::coro_frame: | ||||||||||||||
842 | case Intrinsic::coro_size: | ||||||||||||||
843 | case Intrinsic::coro_suspend: | ||||||||||||||
844 | case Intrinsic::coro_param: | ||||||||||||||
845 | case Intrinsic::coro_subfn_addr: | ||||||||||||||
846 | // These intrinsics don't actually represent code after lowering. | ||||||||||||||
847 | return TTI::TCC_Free; | ||||||||||||||
848 | } | ||||||||||||||
849 | } | ||||||||||||||
850 | |||||||||||||||
851 | unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, | ||||||||||||||
852 | ArrayRef<const Value *> Arguments, const User *U) { | ||||||||||||||
853 | // Delegate to the generic intrinsic handling code. This mostly provides an | ||||||||||||||
854 | // opportunity for targets to (for example) special case the cost of | ||||||||||||||
855 | // certain intrinsics based on constants used as arguments. | ||||||||||||||
856 | SmallVector<Type *, 8> ParamTys; | ||||||||||||||
857 | ParamTys.reserve(Arguments.size()); | ||||||||||||||
858 | for (unsigned Idx = 0, Size = Arguments.size(); Idx != Size; ++Idx) | ||||||||||||||
859 | ParamTys.push_back(Arguments[Idx]->getType()); | ||||||||||||||
860 | return static_cast<T *>(this)->getIntrinsicCost(IID, RetTy, ParamTys, U); | ||||||||||||||
861 | } | ||||||||||||||
862 | |||||||||||||||
863 | unsigned getUserCost(const User *U, ArrayRef<const Value *> Operands) { | ||||||||||||||
864 | if (isa<PHINode>(U)) | ||||||||||||||
865 | return TTI::TCC_Free; // Model all PHI nodes as free. | ||||||||||||||
866 | |||||||||||||||
867 | if (isa<ExtractValueInst>(U)) | ||||||||||||||
868 | return TTI::TCC_Free; // Model all ExtractValue nodes as free. | ||||||||||||||
869 | |||||||||||||||
870 | if (isa<FreezeInst>(U)) | ||||||||||||||
871 | return TTI::TCC_Free; // Model all Freeze nodes as free. | ||||||||||||||
872 | |||||||||||||||
873 | // Static alloca doesn't generate target instructions. | ||||||||||||||
874 | if (auto *A
| ||||||||||||||
875 | if (A->isStaticAlloca()) | ||||||||||||||
876 | return TTI::TCC_Free; | ||||||||||||||
877 | |||||||||||||||
878 | if (const GEPOperator *GEP
| ||||||||||||||
879 | return static_cast<T *>(this)->getGEPCost(GEP->getSourceElementType(), | ||||||||||||||
880 | GEP->getPointerOperand(), | ||||||||||||||
881 | Operands.drop_front()); | ||||||||||||||
882 | } | ||||||||||||||
883 | |||||||||||||||
884 | if (auto CS = ImmutableCallSite(U)) { | ||||||||||||||
885 | const Function *F = CS.getCalledFunction(); | ||||||||||||||
886 | if (!F) { | ||||||||||||||
887 | // Just use the called value type. | ||||||||||||||
888 | Type *FTy = CS.getCalledValue()->getType()->getPointerElementType(); | ||||||||||||||
889 | return static_cast<T *>(this) | ||||||||||||||
890 | ->getCallCost(cast<FunctionType>(FTy), CS.arg_size(), U); | ||||||||||||||
891 | } | ||||||||||||||
892 | |||||||||||||||
893 | SmallVector<const Value *, 8> Arguments(CS.arg_begin(), CS.arg_end()); | ||||||||||||||
894 | return static_cast<T *>(this)->getCallCost(F, Arguments, U); | ||||||||||||||
895 | } | ||||||||||||||
896 | |||||||||||||||
897 | if (isa<SExtInst>(U) || isa<ZExtInst>(U) || isa<FPExtInst>(U)) | ||||||||||||||
898 | // The old behaviour of generally treating extensions of icmp to be free | ||||||||||||||
899 | // has been removed. A target that needs it should override getUserCost(). | ||||||||||||||
900 | return static_cast<T *>(this)->getExtCost(cast<Instruction>(U), | ||||||||||||||
901 | Operands.back()); | ||||||||||||||
902 | |||||||||||||||
903 | return static_cast<T *>(this)->getOperationCost( | ||||||||||||||
904 | Operator::getOpcode(U), U->getType(), | ||||||||||||||
905 | U->getNumOperands() == 1 ? U->getOperand(0)->getType() : nullptr); | ||||||||||||||
906 | } | ||||||||||||||
907 | |||||||||||||||
908 | int getInstructionLatency(const Instruction *I) { | ||||||||||||||
909 | SmallVector<const Value *, 4> Operands(I->value_op_begin(), | ||||||||||||||
910 | I->value_op_end()); | ||||||||||||||
911 | if (getUserCost(I, Operands) == TTI::TCC_Free) | ||||||||||||||
912 | return 0; | ||||||||||||||
913 | |||||||||||||||
914 | if (isa<LoadInst>(I)) | ||||||||||||||
915 | return 4; | ||||||||||||||
916 | |||||||||||||||
917 | Type *DstTy = I->getType(); | ||||||||||||||
918 | |||||||||||||||
919 | // Usually an intrinsic is a simple instruction. | ||||||||||||||
920 | // A real function call is much slower. | ||||||||||||||
921 | if (auto *CI = dyn_cast<CallInst>(I)) { | ||||||||||||||
922 | const Function *F = CI->getCalledFunction(); | ||||||||||||||
923 | if (!F || static_cast<T *>(this)->isLoweredToCall(F)) | ||||||||||||||
924 | return 40; | ||||||||||||||
925 | // Some intrinsics return a value and a flag, we use the value type | ||||||||||||||
926 | // to decide its latency. | ||||||||||||||
927 | if (StructType* StructTy = dyn_cast<StructType>(DstTy)) | ||||||||||||||
928 | DstTy = StructTy->getElementType(0); | ||||||||||||||
929 | // Fall through to simple instructions. | ||||||||||||||
930 | } | ||||||||||||||
931 | |||||||||||||||
932 | if (VectorType *VectorTy = dyn_cast<VectorType>(DstTy)) | ||||||||||||||
933 | DstTy = VectorTy->getElementType(); | ||||||||||||||
934 | if (DstTy->isFloatingPointTy()) | ||||||||||||||
935 | return 3; | ||||||||||||||
936 | |||||||||||||||
937 | return 1; | ||||||||||||||
938 | } | ||||||||||||||
939 | }; | ||||||||||||||
940 | } | ||||||||||||||
941 | |||||||||||||||
942 | #endif |
1 | //===- CallSite.h - Abstract Call & Invoke instrs ---------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file defines the CallSite class, which is a handy wrapper for code that |
10 | // wants to treat Call, Invoke and CallBr instructions in a generic way. When |
11 | // in non-mutation context (e.g. an analysis) ImmutableCallSite should be used. |
12 | // Finally, when some degree of customization is necessary between these two |
13 | // extremes, CallSiteBase<> can be supplied with fine-tuned parameters. |
14 | // |
15 | // NOTE: These classes are supposed to have "value semantics". So they should be |
16 | // passed by value, not by reference; they should not be "new"ed or "delete"d. |
17 | // They are efficiently copyable, assignable and constructable, with cost |
18 | // equivalent to copying a pointer (notice that they have only a single data |
19 | // member). The internal representation carries a flag which indicates which of |
20 | // the three variants is enclosed. This allows for cheaper checks when various |
21 | // accessors of CallSite are employed. |
22 | // |
23 | //===----------------------------------------------------------------------===// |
24 | |
25 | #ifndef LLVM_IR_CALLSITE_H |
26 | #define LLVM_IR_CALLSITE_H |
27 | |
28 | #include "llvm/ADT/Optional.h" |
29 | #include "llvm/ADT/PointerIntPair.h" |
30 | #include "llvm/ADT/iterator_range.h" |
31 | #include "llvm/IR/Attributes.h" |
32 | #include "llvm/IR/CallingConv.h" |
33 | #include "llvm/IR/Function.h" |
34 | #include "llvm/IR/InstrTypes.h" |
35 | #include "llvm/IR/Instruction.h" |
36 | #include "llvm/IR/Instructions.h" |
37 | #include "llvm/IR/Use.h" |
38 | #include "llvm/IR/User.h" |
39 | #include "llvm/IR/Value.h" |
40 | #include "llvm/Support/Casting.h" |
41 | #include <cassert> |
42 | #include <cstdint> |
43 | #include <iterator> |
44 | |
45 | namespace llvm { |
46 | |
47 | namespace Intrinsic { |
48 | typedef unsigned ID; |
49 | } |
50 | |
51 | template <typename FunTy = const Function, typename BBTy = const BasicBlock, |
52 | typename ValTy = const Value, typename UserTy = const User, |
53 | typename UseTy = const Use, typename InstrTy = const Instruction, |
54 | typename CallTy = const CallInst, |
55 | typename InvokeTy = const InvokeInst, |
56 | typename CallBrTy = const CallBrInst, |
57 | typename IterTy = User::const_op_iterator> |
58 | class CallSiteBase { |
59 | protected: |
60 | PointerIntPair<InstrTy *, 2, int> I; |
61 | |
62 | CallSiteBase() = default; |
63 | CallSiteBase(CallTy *CI) : I(CI, 1) { assert(CI)((CI) ? static_cast<void> (0) : __assert_fail ("CI", "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/IR/CallSite.h" , 63, __PRETTY_FUNCTION__)); } |
64 | CallSiteBase(InvokeTy *II) : I(II, 0) { assert(II)((II) ? static_cast<void> (0) : __assert_fail ("II", "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/IR/CallSite.h" , 64, __PRETTY_FUNCTION__)); } |
65 | CallSiteBase(CallBrTy *CBI) : I(CBI, 2) { assert(CBI)((CBI) ? static_cast<void> (0) : __assert_fail ("CBI", "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/IR/CallSite.h" , 65, __PRETTY_FUNCTION__)); } |
66 | explicit CallSiteBase(ValTy *II) { *this = get(II); } |
67 | |
68 | private: |
69 | /// This static method is like a constructor. It will create an appropriate |
70 | /// call site for a Call, Invoke or CallBr instruction, but it can also create |
71 | /// a null initialized CallSiteBase object for something which is NOT a call |
72 | /// site. |
73 | static CallSiteBase get(ValTy *V) { |
74 | if (InstrTy *II = dyn_cast<InstrTy>(V)) { |
75 | if (II->getOpcode() == Instruction::Call) |
76 | return CallSiteBase(static_cast<CallTy*>(II)); |
77 | if (II->getOpcode() == Instruction::Invoke) |
78 | return CallSiteBase(static_cast<InvokeTy*>(II)); |
79 | if (II->getOpcode() == Instruction::CallBr) |
80 | return CallSiteBase(static_cast<CallBrTy *>(II)); |
81 | } |
82 | return CallSiteBase(); |
83 | } |
84 | |
85 | public: |
86 | /// Return true if a CallInst is enclosed. |
87 | bool isCall() const { return I.getInt() == 1; } |
88 | |
89 | /// Return true if a InvokeInst is enclosed. !I.getInt() may also signify a |
90 | /// NULL instruction pointer, so check that. |
91 | bool isInvoke() const { return getInstruction() && I.getInt() == 0; } |
92 | |
93 | /// Return true if a CallBrInst is enclosed. |
94 | bool isCallBr() const { return I.getInt() == 2; } |
95 | |
96 | InstrTy *getInstruction() const { return I.getPointer(); } |
97 | InstrTy *operator->() const { return I.getPointer(); } |
98 | explicit operator bool() const { return I.getPointer(); } |
99 | |
100 | /// Get the basic block containing the call site. |
101 | BBTy* getParent() const { return getInstruction()->getParent(); } |
102 | |
103 | /// Return the pointer to function that is being called. |
104 | ValTy *getCalledValue() const { |
105 | assert(getInstruction() && "Not a call, invoke or callbr instruction!")((getInstruction() && "Not a call, invoke or callbr instruction!" ) ? static_cast<void> (0) : __assert_fail ("getInstruction() && \"Not a call, invoke or callbr instruction!\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/IR/CallSite.h" , 105, __PRETTY_FUNCTION__)); |
106 | return *getCallee(); |
107 | } |
108 | |
109 | /// Return the function being called if this is a direct call, otherwise |
110 | /// return null (if it's an indirect call). |
111 | FunTy *getCalledFunction() const { |
112 | return dyn_cast<FunTy>(getCalledValue()); |
113 | } |
114 | |
115 | /// Return true if the callsite is an indirect call. |
116 | bool isIndirectCall() const { |
117 | const Value *V = getCalledValue(); |
118 | if (!V) |
119 | return false; |
120 | if (isa<FunTy>(V) || isa<Constant>(V)) |
121 | return false; |
122 | if (const CallBase *CB = dyn_cast<CallBase>(getInstruction())) |
123 | if (CB->isInlineAsm()) |
124 | return false; |
125 | return true; |
126 | } |
127 | |
128 | /// Set the callee to the specified value. Unlike the function of the same |
129 | /// name on CallBase, does not modify the type! |
130 | void setCalledFunction(Value *V) { |
131 | assert(getInstruction() && "Not a call, callbr, or invoke instruction!")((getInstruction() && "Not a call, callbr, or invoke instruction!" ) ? static_cast<void> (0) : __assert_fail ("getInstruction() && \"Not a call, callbr, or invoke instruction!\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/IR/CallSite.h" , 131, __PRETTY_FUNCTION__)); |
132 | assert(cast<PointerType>(V->getType())->getElementType() ==((cast<PointerType>(V->getType())->getElementType () == cast<CallBase>(getInstruction())->getFunctionType () && "New callee type does not match FunctionType on call" ) ? static_cast<void> (0) : __assert_fail ("cast<PointerType>(V->getType())->getElementType() == cast<CallBase>(getInstruction())->getFunctionType() && \"New callee type does not match FunctionType on call\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/IR/CallSite.h" , 134, __PRETTY_FUNCTION__)) |
133 | cast<CallBase>(getInstruction())->getFunctionType() &&((cast<PointerType>(V->getType())->getElementType () == cast<CallBase>(getInstruction())->getFunctionType () && "New callee type does not match FunctionType on call" ) ? static_cast<void> (0) : __assert_fail ("cast<PointerType>(V->getType())->getElementType() == cast<CallBase>(getInstruction())->getFunctionType() && \"New callee type does not match FunctionType on call\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/IR/CallSite.h" , 134, __PRETTY_FUNCTION__)) |
134 | "New callee type does not match FunctionType on call")((cast<PointerType>(V->getType())->getElementType () == cast<CallBase>(getInstruction())->getFunctionType () && "New callee type does not match FunctionType on call" ) ? static_cast<void> (0) : __assert_fail ("cast<PointerType>(V->getType())->getElementType() == cast<CallBase>(getInstruction())->getFunctionType() && \"New callee type does not match FunctionType on call\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/IR/CallSite.h" , 134, __PRETTY_FUNCTION__)); |
135 | *getCallee() = V; |
136 | } |
137 | |
138 | /// Return the intrinsic ID of the intrinsic called by this CallSite, |
139 | /// or Intrinsic::not_intrinsic if the called function is not an |
140 | /// intrinsic, or if this CallSite is an indirect call. |
141 | Intrinsic::ID getIntrinsicID() const { |
142 | if (auto *F = getCalledFunction()) |
143 | return F->getIntrinsicID(); |
144 | // Don't use Intrinsic::not_intrinsic, as it will require pulling |
145 | // Intrinsics.h into every header that uses CallSite. |
146 | return static_cast<Intrinsic::ID>(0); |
147 | } |
148 | |
149 | /// Determine whether the passed iterator points to the callee operand's Use. |
150 | bool isCallee(Value::const_user_iterator UI) const { |
151 | return isCallee(&UI.getUse()); |
152 | } |
153 | |
154 | /// Determine whether this Use is the callee operand's Use. |
155 | bool isCallee(const Use *U) const { return getCallee() == U; } |
156 | |
157 | /// Determine whether the passed iterator points to an argument operand. |
158 | bool isArgOperand(Value::const_user_iterator UI) const { |
159 | return isArgOperand(&UI.getUse()); |
160 | } |
161 | |
162 | /// Determine whether the passed use points to an argument operand. |
163 | bool isArgOperand(const Use *U) const { |
164 | assert(getInstruction() == U->getUser())((getInstruction() == U->getUser()) ? static_cast<void> (0) : __assert_fail ("getInstruction() == U->getUser()", "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/IR/CallSite.h" , 164, __PRETTY_FUNCTION__)); |
165 | return arg_begin() <= U && U < arg_end(); |
166 | } |
167 | |
168 | /// Determine whether the passed iterator points to a bundle operand. |
169 | bool isBundleOperand(Value::const_user_iterator UI) const { |
170 | return isBundleOperand(&UI.getUse()); |
171 | } |
172 | |
173 | /// Determine whether the passed use points to a bundle operand. |
174 | bool isBundleOperand(const Use *U) const { |
175 | assert(getInstruction() == U->getUser())((getInstruction() == U->getUser()) ? static_cast<void> (0) : __assert_fail ("getInstruction() == U->getUser()", "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/IR/CallSite.h" , 175, __PRETTY_FUNCTION__)); |
176 | if (!hasOperandBundles()) |
177 | return false; |
178 | unsigned OperandNo = U - (*this)->op_begin(); |
179 | return getBundleOperandsStartIndex() <= OperandNo && |
180 | OperandNo < getBundleOperandsEndIndex(); |
181 | } |
182 | |
183 | /// Determine whether the passed iterator points to a data operand. |
184 | bool isDataOperand(Value::const_user_iterator UI) const { |
185 | return isDataOperand(&UI.getUse()); |
186 | } |
187 | |
188 | /// Determine whether the passed use points to a data operand. |
189 | bool isDataOperand(const Use *U) const { |
190 | return data_operands_begin() <= U && U < data_operands_end(); |
191 | } |
192 | |
193 | ValTy *getArgument(unsigned ArgNo) const { |
194 | assert(arg_begin() + ArgNo < arg_end() && "Argument # out of range!")((arg_begin() + ArgNo < arg_end() && "Argument # out of range!" ) ? static_cast<void> (0) : __assert_fail ("arg_begin() + ArgNo < arg_end() && \"Argument # out of range!\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/IR/CallSite.h" , 194, __PRETTY_FUNCTION__)); |
195 | return *(arg_begin() + ArgNo); |
196 | } |
197 | |
198 | void setArgument(unsigned ArgNo, Value* newVal) { |
199 | assert(getInstruction() && "Not a call, invoke or callbr instruction!")((getInstruction() && "Not a call, invoke or callbr instruction!" ) ? static_cast<void> (0) : __assert_fail ("getInstruction() && \"Not a call, invoke or callbr instruction!\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/IR/CallSite.h" , 199, __PRETTY_FUNCTION__)); |
200 | assert(arg_begin() + ArgNo < arg_end() && "Argument # out of range!")((arg_begin() + ArgNo < arg_end() && "Argument # out of range!" ) ? static_cast<void> (0) : __assert_fail ("arg_begin() + ArgNo < arg_end() && \"Argument # out of range!\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/IR/CallSite.h" , 200, __PRETTY_FUNCTION__)); |
201 | getInstruction()->setOperand(ArgNo, newVal); |
202 | } |
203 | |
204 | /// Given a value use iterator, returns the argument that corresponds to it. |
205 | /// Iterator must actually correspond to an argument. |
206 | unsigned getArgumentNo(Value::const_user_iterator I) const { |
207 | return getArgumentNo(&I.getUse()); |
208 | } |
209 | |
210 | /// Given a use for an argument, get the argument number that corresponds to |
211 | /// it. |
212 | unsigned getArgumentNo(const Use *U) const { |
213 | assert(getInstruction() && "Not a call, invoke or callbr instruction!")((getInstruction() && "Not a call, invoke or callbr instruction!" ) ? static_cast<void> (0) : __assert_fail ("getInstruction() && \"Not a call, invoke or callbr instruction!\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/IR/CallSite.h" , 213, __PRETTY_FUNCTION__)); |
214 | assert(isArgOperand(U) && "Argument # out of range!")((isArgOperand(U) && "Argument # out of range!") ? static_cast <void> (0) : __assert_fail ("isArgOperand(U) && \"Argument # out of range!\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/IR/CallSite.h" , 214, __PRETTY_FUNCTION__)); |
215 | return U - arg_begin(); |
216 | } |
217 | |
218 | /// The type of iterator to use when looping over actual arguments at this |
219 | /// call site. |
220 | using arg_iterator = IterTy; |
221 | |
222 | iterator_range<IterTy> args() const { |
223 | return make_range(arg_begin(), arg_end()); |
224 | } |
225 | bool arg_empty() const { return arg_end() == arg_begin(); } |
226 | unsigned arg_size() const { return unsigned(arg_end() - arg_begin()); } |
227 | |
228 | /// Given a value use iterator, return the data operand corresponding to it. |
229 | /// Iterator must actually correspond to a data operand. |
230 | unsigned getDataOperandNo(Value::const_user_iterator UI) const { |
231 | return getDataOperandNo(&UI.getUse()); |
232 | } |
233 | |
234 | /// Given a use for a data operand, get the data operand number that |
235 | /// corresponds to it. |
236 | unsigned getDataOperandNo(const Use *U) const { |
237 | assert(getInstruction() && "Not a call, invoke or callbr instruction!")((getInstruction() && "Not a call, invoke or callbr instruction!" ) ? static_cast<void> (0) : __assert_fail ("getInstruction() && \"Not a call, invoke or callbr instruction!\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/IR/CallSite.h" , 237, __PRETTY_FUNCTION__)); |
238 | assert(isDataOperand(U) && "Data operand # out of range!")((isDataOperand(U) && "Data operand # out of range!") ? static_cast<void> (0) : __assert_fail ("isDataOperand(U) && \"Data operand # out of range!\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/IR/CallSite.h" , 238, __PRETTY_FUNCTION__)); |
239 | return U - data_operands_begin(); |
240 | } |
241 | |
242 | /// Type of iterator to use when looping over data operands at this call site |
243 | /// (see below). |
244 | using data_operand_iterator = IterTy; |
245 | |
246 | /// data_operands_begin/data_operands_end - Return iterators iterating over |
247 | /// the call / invoke / callbr argument list and bundle operands. For invokes, |
248 | /// this is the set of instruction operands except the invoke target and the |
249 | /// two successor blocks; for calls this is the set of instruction operands |
250 | /// except the call target; for callbrs the number of labels to skip must be |
251 | /// determined first. |
252 | |
253 | IterTy data_operands_begin() const { |
254 | assert(getInstruction() && "Not a call or invoke instruction!")((getInstruction() && "Not a call or invoke instruction!" ) ? static_cast<void> (0) : __assert_fail ("getInstruction() && \"Not a call or invoke instruction!\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/IR/CallSite.h" , 254, __PRETTY_FUNCTION__)); |
255 | return cast<CallBase>(getInstruction())->data_operands_begin(); |
256 | } |
257 | IterTy data_operands_end() const { |
258 | assert(getInstruction() && "Not a call or invoke instruction!")((getInstruction() && "Not a call or invoke instruction!" ) ? static_cast<void> (0) : __assert_fail ("getInstruction() && \"Not a call or invoke instruction!\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/IR/CallSite.h" , 258, __PRETTY_FUNCTION__)); |
259 | return cast<CallBase>(getInstruction())->data_operands_end(); |
260 | } |
261 | iterator_range<IterTy> data_ops() const { |
262 | return make_range(data_operands_begin(), data_operands_end()); |
263 | } |
264 | bool data_operands_empty() const { |
265 | return data_operands_end() == data_operands_begin(); |
266 | } |
267 | unsigned data_operands_size() const { |
268 | return std::distance(data_operands_begin(), data_operands_end()); |
269 | } |
270 | |
271 | /// Return the type of the instruction that generated this call site. |
272 | Type *getType() const { return (*this)->getType(); } |
273 | |
274 | /// Return the caller function for this call site. |
275 | FunTy *getCaller() const { return (*this)->getParent()->getParent(); } |
276 | |
277 | /// Tests if this call site must be tail call optimized. Only a CallInst can |
278 | /// be tail call optimized. |
279 | bool isMustTailCall() const { |
280 | return isCall() && cast<CallInst>(getInstruction())->isMustTailCall(); |
281 | } |
282 | |
283 | /// Tests if this call site is marked as a tail call. |
284 | bool isTailCall() const { |
285 | return isCall() && cast<CallInst>(getInstruction())->isTailCall(); |
286 | } |
287 | |
288 | #define CALLSITE_DELEGATE_GETTER(METHOD) \ |
289 | InstrTy *II = getInstruction(); \ |
290 | return isCall() ? cast<CallInst>(II)->METHOD \ |
291 | : isCallBr() ? cast<CallBrInst>(II)->METHOD \ |
292 | : cast<InvokeInst>(II)->METHOD |
293 | |
294 | #define CALLSITE_DELEGATE_SETTER(METHOD) \ |
295 | InstrTy *II = getInstruction(); \ |
296 | if (isCall()) \ |
297 | cast<CallInst>(II)->METHOD; \ |
298 | else if (isCallBr()) \ |
299 | cast<CallBrInst>(II)->METHOD; \ |
300 | else \ |
301 | cast<InvokeInst>(II)->METHOD |
302 | |
303 | unsigned getNumArgOperands() const { |
304 | CALLSITE_DELEGATE_GETTER(getNumArgOperands()); |
305 | } |
306 | |
307 | ValTy *getArgOperand(unsigned i) const { |
308 | CALLSITE_DELEGATE_GETTER(getArgOperand(i)); |
309 | } |
310 | |
311 | ValTy *getReturnedArgOperand() const { |
312 | CALLSITE_DELEGATE_GETTER(getReturnedArgOperand()); |
313 | } |
314 | |
315 | bool isInlineAsm() const { |
316 | return cast<CallBase>(getInstruction())->isInlineAsm(); |
317 | } |
318 | |
319 | /// Get the calling convention of the call. |
320 | CallingConv::ID getCallingConv() const { |
321 | CALLSITE_DELEGATE_GETTER(getCallingConv()); |
322 | } |
323 | /// Set the calling convention of the call. |
324 | void setCallingConv(CallingConv::ID CC) { |
325 | CALLSITE_DELEGATE_SETTER(setCallingConv(CC)); |
326 | } |
327 | |
328 | FunctionType *getFunctionType() const { |
329 | CALLSITE_DELEGATE_GETTER(getFunctionType()); |
330 | } |
331 | |
332 | void mutateFunctionType(FunctionType *Ty) const { |
333 | CALLSITE_DELEGATE_SETTER(mutateFunctionType(Ty)); |
334 | } |
335 | |
336 | /// Get the parameter attributes of the call. |
337 | AttributeList getAttributes() const { |
338 | CALLSITE_DELEGATE_GETTER(getAttributes()); |
339 | } |
340 | /// Set the parameter attributes of the call. |
341 | void setAttributes(AttributeList PAL) { |
342 | CALLSITE_DELEGATE_SETTER(setAttributes(PAL)); |
343 | } |
344 | |
345 | void addAttribute(unsigned i, Attribute::AttrKind Kind) { |
346 | CALLSITE_DELEGATE_SETTER(addAttribute(i, Kind)); |
347 | } |
348 | |
349 | void addAttribute(unsigned i, Attribute Attr) { |
350 | CALLSITE_DELEGATE_SETTER(addAttribute(i, Attr)); |
351 | } |
352 | |
353 | void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) { |
354 | CALLSITE_DELEGATE_SETTER(addParamAttr(ArgNo, Kind)); |
355 | } |
356 | |
357 | void removeAttribute(unsigned i, Attribute::AttrKind Kind) { |
358 | CALLSITE_DELEGATE_SETTER(removeAttribute(i, Kind)); |
359 | } |
360 | |
361 | void removeAttribute(unsigned i, StringRef Kind) { |
362 | CALLSITE_DELEGATE_SETTER(removeAttribute(i, Kind)); |
363 | } |
364 | |
365 | void removeParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) { |
366 | CALLSITE_DELEGATE_SETTER(removeParamAttr(ArgNo, Kind)); |
367 | } |
368 | |
369 | /// Return true if this function has the given attribute. |
370 | bool hasFnAttr(Attribute::AttrKind Kind) const { |
371 | CALLSITE_DELEGATE_GETTER(hasFnAttr(Kind)); |
372 | } |
373 | |
374 | /// Return true if this function has the given attribute. |
375 | bool hasFnAttr(StringRef Kind) const { |
376 | CALLSITE_DELEGATE_GETTER(hasFnAttr(Kind)); |
377 | } |
378 | |
379 | /// Return true if this return value has the given attribute. |
380 | bool hasRetAttr(Attribute::AttrKind Kind) const { |
381 | CALLSITE_DELEGATE_GETTER(hasRetAttr(Kind)); |
382 | } |
383 | |
384 | /// Return true if the call or the callee has the given attribute. |
385 | bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const { |
386 | CALLSITE_DELEGATE_GETTER(paramHasAttr(ArgNo, Kind)); |
387 | } |
388 | |
389 | Attribute getAttribute(unsigned i, Attribute::AttrKind Kind) const { |
390 | CALLSITE_DELEGATE_GETTER(getAttribute(i, Kind)); |
391 | } |
392 | |
393 | Attribute getAttribute(unsigned i, StringRef Kind) const { |
394 | CALLSITE_DELEGATE_GETTER(getAttribute(i, Kind)); |
395 | } |
396 | |
397 | /// Return true if the data operand at index \p i directly or indirectly has |
398 | /// the attribute \p A. |
399 | /// |
400 | /// Normal call, invoke or callbr arguments have per operand attributes, as |
401 | /// specified in the attribute set attached to this instruction, while operand |
402 | /// bundle operands may have some attributes implied by the type of its |
403 | /// containing operand bundle. |
404 | bool dataOperandHasImpliedAttr(unsigned i, Attribute::AttrKind Kind) const { |
405 | CALLSITE_DELEGATE_GETTER(dataOperandHasImpliedAttr(i, Kind)); |
406 | } |
407 | |
408 | /// Extract the alignment of the return value. |
409 | unsigned getRetAlignment() const { |
410 | CALLSITE_DELEGATE_GETTER(getRetAlignment()); |
411 | } |
412 | |
413 | /// Extract the alignment for a call or parameter (0=unknown). |
414 | unsigned getParamAlignment(unsigned ArgNo) const { |
415 | CALLSITE_DELEGATE_GETTER(getParamAlignment(ArgNo)); |
416 | } |
417 | |
418 | /// Extract the byval type for a call or parameter (nullptr=unknown). |
419 | Type *getParamByValType(unsigned ArgNo) const { |
420 | CALLSITE_DELEGATE_GETTER(getParamByValType(ArgNo)); |
421 | } |
422 | |
423 | /// Extract the number of dereferenceable bytes for a call or parameter |
424 | /// (0=unknown). |
425 | uint64_t getDereferenceableBytes(unsigned i) const { |
426 | CALLSITE_DELEGATE_GETTER(getDereferenceableBytes(i)); |
427 | } |
428 | |
429 | /// Extract the number of dereferenceable_or_null bytes for a call or |
430 | /// parameter (0=unknown). |
431 | uint64_t getDereferenceableOrNullBytes(unsigned i) const { |
432 | CALLSITE_DELEGATE_GETTER(getDereferenceableOrNullBytes(i)); |
433 | } |
434 | |
435 | /// Determine if the return value is marked with NoAlias attribute. |
436 | bool returnDoesNotAlias() const { |
437 | CALLSITE_DELEGATE_GETTER(returnDoesNotAlias()); |
438 | } |
439 | |
440 | /// Return true if the call should not be treated as a call to a builtin. |
441 | bool isNoBuiltin() const { |
442 | CALLSITE_DELEGATE_GETTER(isNoBuiltin()); |
443 | } |
444 | |
445 | /// Return true if the call requires strict floating point semantics. |
446 | bool isStrictFP() const { |
447 | CALLSITE_DELEGATE_GETTER(isStrictFP()); |
448 | } |
449 | |
450 | /// Return true if the call should not be inlined. |
451 | bool isNoInline() const { |
452 | CALLSITE_DELEGATE_GETTER(isNoInline()); |
453 | } |
454 | void setIsNoInline(bool Value = true) { |
455 | CALLSITE_DELEGATE_SETTER(setIsNoInline(Value)); |
456 | } |
457 | |
458 | /// Determine if the call does not access memory. |
459 | bool doesNotAccessMemory() const { |
460 | CALLSITE_DELEGATE_GETTER(doesNotAccessMemory()); |
461 | } |
462 | void setDoesNotAccessMemory() { |
463 | CALLSITE_DELEGATE_SETTER(setDoesNotAccessMemory()); |
464 | } |
465 | |
466 | /// Determine if the call does not access or only reads memory. |
467 | bool onlyReadsMemory() const { |
468 | CALLSITE_DELEGATE_GETTER(onlyReadsMemory()); |
469 | } |
470 | void setOnlyReadsMemory() { |
471 | CALLSITE_DELEGATE_SETTER(setOnlyReadsMemory()); |
472 | } |
473 | |
474 | /// Determine if the call does not access or only writes memory. |
475 | bool doesNotReadMemory() const { |
476 | CALLSITE_DELEGATE_GETTER(doesNotReadMemory()); |
477 | } |
478 | void setDoesNotReadMemory() { |
479 | CALLSITE_DELEGATE_SETTER(setDoesNotReadMemory()); |
480 | } |
481 | |
482 | /// Determine if the call can access memmory only using pointers based |
483 | /// on its arguments. |
484 | bool onlyAccessesArgMemory() const { |
485 | CALLSITE_DELEGATE_GETTER(onlyAccessesArgMemory()); |
486 | } |
487 | void setOnlyAccessesArgMemory() { |
488 | CALLSITE_DELEGATE_SETTER(setOnlyAccessesArgMemory()); |
489 | } |
490 | |
491 | /// Determine if the function may only access memory that is |
492 | /// inaccessible from the IR. |
493 | bool onlyAccessesInaccessibleMemory() const { |
494 | CALLSITE_DELEGATE_GETTER(onlyAccessesInaccessibleMemory()); |
495 | } |
496 | void setOnlyAccessesInaccessibleMemory() { |
497 | CALLSITE_DELEGATE_SETTER(setOnlyAccessesInaccessibleMemory()); |
498 | } |
499 | |
500 | /// Determine if the function may only access memory that is |
501 | /// either inaccessible from the IR or pointed to by its arguments. |
502 | bool onlyAccessesInaccessibleMemOrArgMem() const { |
503 | CALLSITE_DELEGATE_GETTER(onlyAccessesInaccessibleMemOrArgMem()); |
504 | } |
505 | void setOnlyAccessesInaccessibleMemOrArgMem() { |
506 | CALLSITE_DELEGATE_SETTER(setOnlyAccessesInaccessibleMemOrArgMem()); |
507 | } |
508 | |
509 | /// Determine if the call cannot return. |
510 | bool doesNotReturn() const { |
511 | CALLSITE_DELEGATE_GETTER(doesNotReturn()); |
512 | } |
513 | void setDoesNotReturn() { |
514 | CALLSITE_DELEGATE_SETTER(setDoesNotReturn()); |
515 | } |
516 | |
517 | /// Determine if the call cannot unwind. |
518 | bool doesNotThrow() const { |
519 | CALLSITE_DELEGATE_GETTER(doesNotThrow()); |
520 | } |
521 | void setDoesNotThrow() { |
522 | CALLSITE_DELEGATE_SETTER(setDoesNotThrow()); |
523 | } |
524 | |
525 | /// Determine if the call can be duplicated. |
526 | bool cannotDuplicate() const { |
527 | CALLSITE_DELEGATE_GETTER(cannotDuplicate()); |
528 | } |
529 | void setCannotDuplicate() { |
530 | CALLSITE_DELEGATE_SETTER(setCannotDuplicate()); |
531 | } |
532 | |
533 | /// Determine if the call is convergent. |
534 | bool isConvergent() const { |
535 | CALLSITE_DELEGATE_GETTER(isConvergent()); |
536 | } |
537 | void setConvergent() { |
538 | CALLSITE_DELEGATE_SETTER(setConvergent()); |
539 | } |
540 | void setNotConvergent() { |
541 | CALLSITE_DELEGATE_SETTER(setNotConvergent()); |
542 | } |
543 | |
544 | unsigned getNumOperandBundles() const { |
545 | CALLSITE_DELEGATE_GETTER(getNumOperandBundles()); |
546 | } |
547 | |
548 | bool hasOperandBundles() const { |
549 | CALLSITE_DELEGATE_GETTER(hasOperandBundles()); |
550 | } |
551 | |
552 | unsigned getBundleOperandsStartIndex() const { |
553 | CALLSITE_DELEGATE_GETTER(getBundleOperandsStartIndex()); |
554 | } |
555 | |
556 | unsigned getBundleOperandsEndIndex() const { |
557 | CALLSITE_DELEGATE_GETTER(getBundleOperandsEndIndex()); |
558 | } |
559 | |
560 | unsigned getNumTotalBundleOperands() const { |
561 | CALLSITE_DELEGATE_GETTER(getNumTotalBundleOperands()); |
562 | } |
563 | |
564 | OperandBundleUse getOperandBundleAt(unsigned Index) const { |
565 | CALLSITE_DELEGATE_GETTER(getOperandBundleAt(Index)); |
566 | } |
567 | |
568 | Optional<OperandBundleUse> getOperandBundle(StringRef Name) const { |
569 | CALLSITE_DELEGATE_GETTER(getOperandBundle(Name)); |
570 | } |
571 | |
572 | Optional<OperandBundleUse> getOperandBundle(uint32_t ID) const { |
573 | CALLSITE_DELEGATE_GETTER(getOperandBundle(ID)); |
574 | } |
575 | |
576 | unsigned countOperandBundlesOfType(uint32_t ID) const { |
577 | CALLSITE_DELEGATE_GETTER(countOperandBundlesOfType(ID)); |
578 | } |
579 | |
580 | bool isBundleOperand(unsigned Idx) const { |
581 | CALLSITE_DELEGATE_GETTER(isBundleOperand(Idx)); |
582 | } |
583 | |
584 | IterTy arg_begin() const { |
585 | CALLSITE_DELEGATE_GETTER(arg_begin()); |
586 | } |
587 | |
588 | IterTy arg_end() const { |
589 | CALLSITE_DELEGATE_GETTER(arg_end()); |
590 | } |
591 | |
592 | #undef CALLSITE_DELEGATE_GETTER |
593 | #undef CALLSITE_DELEGATE_SETTER |
594 | |
595 | void getOperandBundlesAsDefs(SmallVectorImpl<OperandBundleDef> &Defs) const { |
596 | // Since this is actually a getter that "looks like" a setter, don't use the |
597 | // above macros to avoid confusion. |
598 | cast<CallBase>(getInstruction())->getOperandBundlesAsDefs(Defs); |
599 | } |
600 | |
601 | /// Determine whether this data operand is not captured. |
602 | bool doesNotCapture(unsigned OpNo) const { |
603 | return dataOperandHasImpliedAttr(OpNo + 1, Attribute::NoCapture); |
604 | } |
605 | |
606 | /// Determine whether this argument is passed by value. |
607 | bool isByValArgument(unsigned ArgNo) const { |
608 | return paramHasAttr(ArgNo, Attribute::ByVal); |
609 | } |
610 | |
611 | /// Determine whether this argument is passed in an alloca. |
612 | bool isInAllocaArgument(unsigned ArgNo) const { |
613 | return paramHasAttr(ArgNo, Attribute::InAlloca); |
614 | } |
615 | |
616 | /// Determine whether this argument is passed by value or in an alloca. |
617 | bool isByValOrInAllocaArgument(unsigned ArgNo) const { |
618 | return paramHasAttr(ArgNo, Attribute::ByVal) || |
619 | paramHasAttr(ArgNo, Attribute::InAlloca); |
620 | } |
621 | |
622 | /// Determine if there are is an inalloca argument. Only the last argument can |
623 | /// have the inalloca attribute. |
624 | bool hasInAllocaArgument() const { |
625 | return !arg_empty() && paramHasAttr(arg_size() - 1, Attribute::InAlloca); |
626 | } |
627 | |
628 | bool doesNotAccessMemory(unsigned OpNo) const { |
629 | return dataOperandHasImpliedAttr(OpNo + 1, Attribute::ReadNone); |
630 | } |
631 | |
632 | bool onlyReadsMemory(unsigned OpNo) const { |
633 | return dataOperandHasImpliedAttr(OpNo + 1, Attribute::ReadOnly) || |
634 | dataOperandHasImpliedAttr(OpNo + 1, Attribute::ReadNone); |
635 | } |
636 | |
637 | bool doesNotReadMemory(unsigned OpNo) const { |
638 | return dataOperandHasImpliedAttr(OpNo + 1, Attribute::WriteOnly) || |
639 | dataOperandHasImpliedAttr(OpNo + 1, Attribute::ReadNone); |
640 | } |
641 | |
642 | /// Return true if the return value is known to be not null. |
643 | /// This may be because it has the nonnull attribute, or because at least |
644 | /// one byte is dereferenceable and the pointer is in addrspace(0). |
645 | bool isReturnNonNull() const { |
646 | if (hasRetAttr(Attribute::NonNull)) |
647 | return true; |
648 | else if (getDereferenceableBytes(AttributeList::ReturnIndex) > 0 && |
649 | !NullPointerIsDefined(getCaller(), |
650 | getType()->getPointerAddressSpace())) |
651 | return true; |
652 | |
653 | return false; |
654 | } |
655 | |
656 | /// Returns true if this CallSite passes the given Value* as an argument to |
657 | /// the called function. |
658 | bool hasArgument(const Value *Arg) const { |
659 | for (arg_iterator AI = this->arg_begin(), E = this->arg_end(); AI != E; |
660 | ++AI) |
661 | if (AI->get() == Arg) |
662 | return true; |
663 | return false; |
664 | } |
665 | |
666 | private: |
667 | IterTy getCallee() const { |
668 | return cast<CallBase>(getInstruction())->op_end() - 1; |
669 | } |
670 | }; |
671 | |
672 | class CallSite : public CallSiteBase<Function, BasicBlock, Value, User, Use, |
673 | Instruction, CallInst, InvokeInst, |
674 | CallBrInst, User::op_iterator> { |
675 | public: |
676 | CallSite() = default; |
677 | CallSite(CallSiteBase B) : CallSiteBase(B) {} |
678 | CallSite(CallInst *CI) : CallSiteBase(CI) {} |
679 | CallSite(InvokeInst *II) : CallSiteBase(II) {} |
680 | CallSite(CallBrInst *CBI) : CallSiteBase(CBI) {} |
681 | explicit CallSite(Instruction *II) : CallSiteBase(II) {} |
682 | explicit CallSite(Value *V) : CallSiteBase(V) {} |
683 | |
684 | bool operator==(const CallSite &CS) const { return I == CS.I; } |
685 | bool operator!=(const CallSite &CS) const { return I != CS.I; } |
686 | bool operator<(const CallSite &CS) const { |
687 | return getInstruction() < CS.getInstruction(); |
688 | } |
689 | |
690 | private: |
691 | friend struct DenseMapInfo<CallSite>; |
692 | |
693 | User::op_iterator getCallee() const; |
694 | }; |
695 | |
696 | /// Establish a view to a call site for examination. |
697 | class ImmutableCallSite : public CallSiteBase<> { |
698 | public: |
699 | ImmutableCallSite() = default; |
700 | ImmutableCallSite(const CallInst *CI) : CallSiteBase(CI) {} |
701 | ImmutableCallSite(const InvokeInst *II) : CallSiteBase(II) {} |
702 | ImmutableCallSite(const CallBrInst *CBI) : CallSiteBase(CBI) {} |
703 | explicit ImmutableCallSite(const Instruction *II) : CallSiteBase(II) {} |
704 | explicit ImmutableCallSite(const Value *V) : CallSiteBase(V) {} |
705 | ImmutableCallSite(CallSite CS) : CallSiteBase(CS.getInstruction()) {} |
706 | }; |
707 | |
708 | /// AbstractCallSite |
709 | /// |
710 | /// An abstract call site is a wrapper that allows to treat direct, |
711 | /// indirect, and callback calls the same. If an abstract call site |
712 | /// represents a direct or indirect call site it behaves like a stripped |
713 | /// down version of a normal call site object. The abstract call site can |
714 | /// also represent a callback call, thus the fact that the initially |
715 | /// called function (=broker) may invoke a third one (=callback callee). |
716 | /// In this case, the abstract call site hides the middle man, hence the |
717 | /// broker function. The result is a representation of the callback call, |
718 | /// inside the broker, but in the context of the original call to the broker. |
719 | /// |
720 | /// There are up to three functions involved when we talk about callback call |
721 | /// sites. The caller (1), which invokes the broker function. The broker |
722 | /// function (2), that will invoke the callee zero or more times. And finally |
723 | /// the callee (3), which is the target of the callback call. |
724 | /// |
725 | /// The abstract call site will handle the mapping from parameters to arguments |
726 | /// depending on the semantic of the broker function. However, it is important |
727 | /// to note that the mapping is often partial. Thus, some arguments of the |
728 | /// call/invoke instruction are mapped to parameters of the callee while others |
729 | /// are not. |
730 | class AbstractCallSite { |
731 | public: |
732 | |
733 | /// The encoding of a callback with regards to the underlying instruction. |
734 | struct CallbackInfo { |
735 | |
736 | /// For direct/indirect calls the parameter encoding is empty. If it is not, |
737 | /// the abstract call site represents a callback. In that case, the first |
738 | /// element of the encoding vector represents which argument of the call |
739 | /// site CS is the callback callee. The remaining elements map parameters |
740 | /// (identified by their position) to the arguments that will be passed |
741 | /// through (also identified by position but in the call site instruction). |
742 | /// |
743 | /// NOTE that we use LLVM argument numbers (starting at 0) and not |
744 | /// clang/source argument numbers (starting at 1). The -1 entries represent |
745 | /// unknown values that are passed to the callee. |
746 | using ParameterEncodingTy = SmallVector<int, 0>; |
747 | ParameterEncodingTy ParameterEncoding; |
748 | |
749 | }; |
750 | |
751 | private: |
752 | |
753 | /// The underlying call site: |
754 | /// caller -> callee, if this is a direct or indirect call site |
755 | /// caller -> broker function, if this is a callback call site |
756 | CallSite CS; |
757 | |
758 | /// The encoding of a callback with regards to the underlying instruction. |
759 | CallbackInfo CI; |
760 | |
761 | public: |
762 | /// Sole constructor for abstract call sites (ACS). |
763 | /// |
764 | /// An abstract call site can only be constructed through a llvm::Use because |
765 | /// each operand (=use) of an instruction could potentially be a different |
766 | /// abstract call site. Furthermore, even if the value of the llvm::Use is the |
767 | /// same, and the user is as well, the abstract call sites might not be. |
768 | /// |
769 | /// If a use is not associated with an abstract call site the constructed ACS |
770 | /// will evaluate to false if converted to a boolean. |
771 | /// |
772 | /// If the use is the callee use of a call or invoke instruction, the |
773 | /// constructed abstract call site will behave as a llvm::CallSite would. |
774 | /// |
775 | /// If the use is not a callee use of a call or invoke instruction, the |
776 | /// callback metadata is used to determine the argument <-> parameter mapping |
777 | /// as well as the callee of the abstract call site. |
778 | AbstractCallSite(const Use *U); |
779 | |
780 | /// Add operand uses of \p ICS that represent callback uses into \p CBUses. |
781 | /// |
782 | /// All uses added to \p CBUses can be used to create abstract call sites for |
783 | /// which AbstractCallSite::isCallbackCall() will return true. |
784 | static void getCallbackUses(ImmutableCallSite ICS, |
785 | SmallVectorImpl<const Use *> &CBUses); |
786 | |
787 | /// Conversion operator to conveniently check for a valid/initialized ACS. |
788 | explicit operator bool() const { return (bool)CS; } |
789 | |
790 | /// Return the underlying instruction. |
791 | Instruction *getInstruction() const { return CS.getInstruction(); } |
792 | |
793 | /// Return the call site abstraction for the underlying instruction. |
794 | CallSite getCallSite() const { return CS; } |
795 | |
796 | /// Return true if this ACS represents a direct call. |
797 | bool isDirectCall() const { |
798 | return !isCallbackCall() && !CS.isIndirectCall(); |
799 | } |
800 | |
801 | /// Return true if this ACS represents an indirect call. |
802 | bool isIndirectCall() const { |
803 | return !isCallbackCall() && CS.isIndirectCall(); |
804 | } |
805 | |
806 | /// Return true if this ACS represents a callback call. |
807 | bool isCallbackCall() const { |
808 | // For a callback call site the callee is ALWAYS stored first in the |
809 | // transitive values vector. Thus, a non-empty vector indicates a callback. |
810 | return !CI.ParameterEncoding.empty(); |
811 | } |
812 | |
813 | /// Return true if @p UI is the use that defines the callee of this ACS. |
814 | bool isCallee(Value::const_user_iterator UI) const { |
815 | return isCallee(&UI.getUse()); |
816 | } |
817 | |
818 | /// Return true if @p U is the use that defines the callee of this ACS. |
819 | bool isCallee(const Use *U) const { |
820 | if (isDirectCall()) |
821 | return CS.isCallee(U); |
822 | |
823 | assert(!CI.ParameterEncoding.empty() &&((!CI.ParameterEncoding.empty() && "Callback without parameter encoding!" ) ? static_cast<void> (0) : __assert_fail ("!CI.ParameterEncoding.empty() && \"Callback without parameter encoding!\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/IR/CallSite.h" , 824, __PRETTY_FUNCTION__)) |
824 | "Callback without parameter encoding!")((!CI.ParameterEncoding.empty() && "Callback without parameter encoding!" ) ? static_cast<void> (0) : __assert_fail ("!CI.ParameterEncoding.empty() && \"Callback without parameter encoding!\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/IR/CallSite.h" , 824, __PRETTY_FUNCTION__)); |
825 | |
826 | return (int)CS.getArgumentNo(U) == CI.ParameterEncoding[0]; |
827 | } |
828 | |
829 | /// Return the number of parameters of the callee. |
830 | unsigned getNumArgOperands() const { |
831 | if (isDirectCall()) |
832 | return CS.getNumArgOperands(); |
833 | // Subtract 1 for the callee encoding. |
834 | return CI.ParameterEncoding.size() - 1; |
835 | } |
836 | |
837 | /// Return the operand index of the underlying instruction associated with @p |
838 | /// Arg. |
839 | int getCallArgOperandNo(Argument &Arg) const { |
840 | return getCallArgOperandNo(Arg.getArgNo()); |
841 | } |
842 | |
843 | /// Return the operand index of the underlying instruction associated with |
844 | /// the function parameter number @p ArgNo or -1 if there is none. |
845 | int getCallArgOperandNo(unsigned ArgNo) const { |
846 | if (isDirectCall()) |
847 | return ArgNo; |
848 | // Add 1 for the callee encoding. |
849 | return CI.ParameterEncoding[ArgNo + 1]; |
850 | } |
851 | |
852 | /// Return the operand of the underlying instruction associated with @p Arg. |
853 | Value *getCallArgOperand(Argument &Arg) const { |
854 | return getCallArgOperand(Arg.getArgNo()); |
855 | } |
856 | |
857 | /// Return the operand of the underlying instruction associated with the |
858 | /// function parameter number @p ArgNo or nullptr if there is none. |
859 | Value *getCallArgOperand(unsigned ArgNo) const { |
860 | if (isDirectCall()) |
861 | return CS.getArgOperand(ArgNo); |
862 | // Add 1 for the callee encoding. |
863 | return CI.ParameterEncoding[ArgNo + 1] >= 0 |
864 | ? CS.getArgOperand(CI.ParameterEncoding[ArgNo + 1]) |
865 | : nullptr; |
866 | } |
867 | |
868 | /// Return the operand index of the underlying instruction associated with the |
869 | /// callee of this ACS. Only valid for callback calls! |
870 | int getCallArgOperandNoForCallee() const { |
871 | assert(isCallbackCall())((isCallbackCall()) ? static_cast<void> (0) : __assert_fail ("isCallbackCall()", "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/IR/CallSite.h" , 871, __PRETTY_FUNCTION__)); |
872 | assert(CI.ParameterEncoding.size() && CI.ParameterEncoding[0] >= 0)((CI.ParameterEncoding.size() && CI.ParameterEncoding [0] >= 0) ? static_cast<void> (0) : __assert_fail ("CI.ParameterEncoding.size() && CI.ParameterEncoding[0] >= 0" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/IR/CallSite.h" , 872, __PRETTY_FUNCTION__)); |
873 | return CI.ParameterEncoding[0]; |
874 | } |
875 | |
876 | /// Return the use of the callee value in the underlying instruction. Only |
877 | /// valid for callback calls! |
878 | const Use &getCalleeUseForCallback() const { |
879 | int CalleeArgIdx = getCallArgOperandNoForCallee(); |
880 | assert(CalleeArgIdx >= 0 &&((CalleeArgIdx >= 0 && unsigned(CalleeArgIdx) < getInstruction()->getNumOperands()) ? static_cast<void > (0) : __assert_fail ("CalleeArgIdx >= 0 && unsigned(CalleeArgIdx) < getInstruction()->getNumOperands()" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/IR/CallSite.h" , 881, __PRETTY_FUNCTION__)) |
881 | unsigned(CalleeArgIdx) < getInstruction()->getNumOperands())((CalleeArgIdx >= 0 && unsigned(CalleeArgIdx) < getInstruction()->getNumOperands()) ? static_cast<void > (0) : __assert_fail ("CalleeArgIdx >= 0 && unsigned(CalleeArgIdx) < getInstruction()->getNumOperands()" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/IR/CallSite.h" , 881, __PRETTY_FUNCTION__)); |
882 | return getInstruction()->getOperandUse(CalleeArgIdx); |
883 | } |
884 | |
885 | /// Return the pointer to function that is being called. |
886 | Value *getCalledValue() const { |
887 | if (isDirectCall()) |
888 | return CS.getCalledValue(); |
889 | return CS.getArgOperand(getCallArgOperandNoForCallee()); |
890 | } |
891 | |
892 | /// Return the function being called if this is a direct call, otherwise |
893 | /// return null (if it's an indirect call). |
894 | Function *getCalledFunction() const { |
895 | Value *V = getCalledValue(); |
896 | return V ? dyn_cast<Function>(V->stripPointerCasts()) : nullptr; |
897 | } |
898 | }; |
899 | |
900 | template <> struct DenseMapInfo<CallSite> { |
901 | using BaseInfo = DenseMapInfo<decltype(CallSite::I)>; |
902 | |
903 | static CallSite getEmptyKey() { |
904 | CallSite CS; |
905 | CS.I = BaseInfo::getEmptyKey(); |
906 | return CS; |
907 | } |
908 | |
909 | static CallSite getTombstoneKey() { |
910 | CallSite CS; |
911 | CS.I = BaseInfo::getTombstoneKey(); |
912 | return CS; |
913 | } |
914 | |
915 | static unsigned getHashValue(const CallSite &CS) { |
916 | return BaseInfo::getHashValue(CS.I); |
917 | } |
918 | |
919 | static bool isEqual(const CallSite &LHS, const CallSite &RHS) { |
920 | return LHS == RHS; |
921 | } |
922 | }; |
923 | |
924 | } // end namespace llvm |
925 | |
926 | #endif // LLVM_IR_CALLSITE_H |
1 | //===- llvm/ADT/PointerIntPair.h - Pair for pointer and int -----*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file defines the PointerIntPair class. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #ifndef LLVM_ADT_POINTERINTPAIR_H |
14 | #define LLVM_ADT_POINTERINTPAIR_H |
15 | |
16 | #include "llvm/Support/Compiler.h" |
17 | #include "llvm/Support/PointerLikeTypeTraits.h" |
18 | #include "llvm/Support/type_traits.h" |
19 | #include <cassert> |
20 | #include <cstdint> |
21 | #include <limits> |
22 | |
23 | namespace llvm { |
24 | |
25 | template <typename T> struct DenseMapInfo; |
26 | template <typename PointerT, unsigned IntBits, typename PtrTraits> |
27 | struct PointerIntPairInfo; |
28 | |
29 | /// PointerIntPair - This class implements a pair of a pointer and small |
30 | /// integer. It is designed to represent this in the space required by one |
31 | /// pointer by bitmangling the integer into the low part of the pointer. This |
32 | /// can only be done for small integers: typically up to 3 bits, but it depends |
33 | /// on the number of bits available according to PointerLikeTypeTraits for the |
34 | /// type. |
35 | /// |
36 | /// Note that PointerIntPair always puts the IntVal part in the highest bits |
37 | /// possible. For example, PointerIntPair<void*, 1, bool> will put the bit for |
38 | /// the bool into bit #2, not bit #0, which allows the low two bits to be used |
39 | /// for something else. For example, this allows: |
40 | /// PointerIntPair<PointerIntPair<void*, 1, bool>, 1, bool> |
41 | /// ... and the two bools will land in different bits. |
42 | template <typename PointerTy, unsigned IntBits, typename IntType = unsigned, |
43 | typename PtrTraits = PointerLikeTypeTraits<PointerTy>, |
44 | typename Info = PointerIntPairInfo<PointerTy, IntBits, PtrTraits>> |
45 | class PointerIntPair { |
46 | // Used by MSVC visualizer and generally helpful for debugging/visualizing. |
47 | using InfoTy = Info; |
48 | intptr_t Value = 0; |
49 | |
50 | public: |
51 | constexpr PointerIntPair() = default; |
52 | |
53 | PointerIntPair(PointerTy PtrVal, IntType IntVal) { |
54 | setPointerAndInt(PtrVal, IntVal); |
55 | } |
56 | |
57 | explicit PointerIntPair(PointerTy PtrVal) { initWithPointer(PtrVal); } |
58 | |
59 | PointerTy getPointer() const { return Info::getPointer(Value); } |
60 | |
61 | IntType getInt() const { return (IntType)Info::getInt(Value); } |
62 | |
63 | void setPointer(PointerTy PtrVal) LLVM_LVALUE_FUNCTION& { |
64 | Value = Info::updatePointer(Value, PtrVal); |
65 | } |
66 | |
67 | void setInt(IntType IntVal) LLVM_LVALUE_FUNCTION& { |
68 | Value = Info::updateInt(Value, static_cast<intptr_t>(IntVal)); |
69 | } |
70 | |
71 | void initWithPointer(PointerTy PtrVal) LLVM_LVALUE_FUNCTION& { |
72 | Value = Info::updatePointer(0, PtrVal); |
73 | } |
74 | |
75 | void setPointerAndInt(PointerTy PtrVal, IntType IntVal) LLVM_LVALUE_FUNCTION& { |
76 | Value = Info::updateInt(Info::updatePointer(0, PtrVal), |
77 | static_cast<intptr_t>(IntVal)); |
78 | } |
79 | |
80 | PointerTy const *getAddrOfPointer() const { |
81 | return const_cast<PointerIntPair *>(this)->getAddrOfPointer(); |
82 | } |
83 | |
84 | PointerTy *getAddrOfPointer() { |
85 | assert(Value == reinterpret_cast<intptr_t>(getPointer()) &&((Value == reinterpret_cast<intptr_t>(getPointer()) && "Can only return the address if IntBits is cleared and " "PtrTraits doesn't change the pointer" ) ? static_cast<void> (0) : __assert_fail ("Value == reinterpret_cast<intptr_t>(getPointer()) && \"Can only return the address if IntBits is cleared and \" \"PtrTraits doesn't change the pointer\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/ADT/PointerIntPair.h" , 87, __PRETTY_FUNCTION__)) |
86 | "Can only return the address if IntBits is cleared and "((Value == reinterpret_cast<intptr_t>(getPointer()) && "Can only return the address if IntBits is cleared and " "PtrTraits doesn't change the pointer" ) ? static_cast<void> (0) : __assert_fail ("Value == reinterpret_cast<intptr_t>(getPointer()) && \"Can only return the address if IntBits is cleared and \" \"PtrTraits doesn't change the pointer\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/ADT/PointerIntPair.h" , 87, __PRETTY_FUNCTION__)) |
87 | "PtrTraits doesn't change the pointer")((Value == reinterpret_cast<intptr_t>(getPointer()) && "Can only return the address if IntBits is cleared and " "PtrTraits doesn't change the pointer" ) ? static_cast<void> (0) : __assert_fail ("Value == reinterpret_cast<intptr_t>(getPointer()) && \"Can only return the address if IntBits is cleared and \" \"PtrTraits doesn't change the pointer\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/ADT/PointerIntPair.h" , 87, __PRETTY_FUNCTION__)); |
88 | return reinterpret_cast<PointerTy *>(&Value); |
89 | } |
90 | |
91 | void *getOpaqueValue() const { return reinterpret_cast<void *>(Value); } |
92 | |
93 | void setFromOpaqueValue(void *Val) LLVM_LVALUE_FUNCTION& { |
94 | Value = reinterpret_cast<intptr_t>(Val); |
95 | } |
96 | |
97 | static PointerIntPair getFromOpaqueValue(void *V) { |
98 | PointerIntPair P; |
99 | P.setFromOpaqueValue(V); |
100 | return P; |
101 | } |
102 | |
103 | // Allow PointerIntPairs to be created from const void * if and only if the |
104 | // pointer type could be created from a const void *. |
105 | static PointerIntPair getFromOpaqueValue(const void *V) { |
106 | (void)PtrTraits::getFromVoidPointer(V); |
107 | return getFromOpaqueValue(const_cast<void *>(V)); |
108 | } |
109 | |
110 | bool operator==(const PointerIntPair &RHS) const { |
111 | return Value == RHS.Value; |
112 | } |
113 | |
114 | bool operator!=(const PointerIntPair &RHS) const { |
115 | return Value != RHS.Value; |
116 | } |
117 | |
118 | bool operator<(const PointerIntPair &RHS) const { return Value < RHS.Value; } |
119 | bool operator>(const PointerIntPair &RHS) const { return Value > RHS.Value; } |
120 | |
121 | bool operator<=(const PointerIntPair &RHS) const { |
122 | return Value <= RHS.Value; |
123 | } |
124 | |
125 | bool operator>=(const PointerIntPair &RHS) const { |
126 | return Value >= RHS.Value; |
127 | } |
128 | }; |
129 | |
130 | // Specialize is_trivially_copyable to avoid limitation of llvm::is_trivially_copyable |
131 | // when compiled with gcc 4.9. |
132 | template <typename PointerTy, unsigned IntBits, typename IntType, |
133 | typename PtrTraits, |
134 | typename Info> |
135 | struct is_trivially_copyable<PointerIntPair<PointerTy, IntBits, IntType, PtrTraits, Info>> : std::true_type { |
136 | #ifdef HAVE_STD_IS_TRIVIALLY_COPYABLE |
137 | static_assert(std::is_trivially_copyable<PointerIntPair<PointerTy, IntBits, IntType, PtrTraits, Info>>::value, |
138 | "inconsistent behavior between llvm:: and std:: implementation of is_trivially_copyable"); |
139 | #endif |
140 | }; |
141 | |
142 | |
143 | template <typename PointerT, unsigned IntBits, typename PtrTraits> |
144 | struct PointerIntPairInfo { |
145 | static_assert(PtrTraits::NumLowBitsAvailable < |
146 | std::numeric_limits<uintptr_t>::digits, |
147 | "cannot use a pointer type that has all bits free"); |
148 | static_assert(IntBits <= PtrTraits::NumLowBitsAvailable, |
149 | "PointerIntPair with integer size too large for pointer"); |
150 | enum MaskAndShiftConstants : uintptr_t { |
151 | /// PointerBitMask - The bits that come from the pointer. |
152 | PointerBitMask = |
153 | ~(uintptr_t)(((intptr_t)1 << PtrTraits::NumLowBitsAvailable) - 1), |
154 | |
155 | /// IntShift - The number of low bits that we reserve for other uses, and |
156 | /// keep zero. |
157 | IntShift = (uintptr_t)PtrTraits::NumLowBitsAvailable - IntBits, |
158 | |
159 | /// IntMask - This is the unshifted mask for valid bits of the int type. |
160 | IntMask = (uintptr_t)(((intptr_t)1 << IntBits) - 1), |
161 | |
162 | // ShiftedIntMask - This is the bits for the integer shifted in place. |
163 | ShiftedIntMask = (uintptr_t)(IntMask << IntShift) |
164 | }; |
165 | |
166 | static PointerT getPointer(intptr_t Value) { |
167 | return PtrTraits::getFromVoidPointer( |
168 | reinterpret_cast<void *>(Value & PointerBitMask)); |
169 | } |
170 | |
171 | static intptr_t getInt(intptr_t Value) { |
172 | return (Value >> IntShift) & IntMask; |
173 | } |
174 | |
175 | static intptr_t updatePointer(intptr_t OrigValue, PointerT Ptr) { |
176 | intptr_t PtrWord = |
177 | reinterpret_cast<intptr_t>(PtrTraits::getAsVoidPointer(Ptr)); |
178 | assert((PtrWord & ~PointerBitMask) == 0 &&(((PtrWord & ~PointerBitMask) == 0 && "Pointer is not sufficiently aligned" ) ? static_cast<void> (0) : __assert_fail ("(PtrWord & ~PointerBitMask) == 0 && \"Pointer is not sufficiently aligned\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/ADT/PointerIntPair.h" , 179, __PRETTY_FUNCTION__)) |
179 | "Pointer is not sufficiently aligned")(((PtrWord & ~PointerBitMask) == 0 && "Pointer is not sufficiently aligned" ) ? static_cast<void> (0) : __assert_fail ("(PtrWord & ~PointerBitMask) == 0 && \"Pointer is not sufficiently aligned\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/ADT/PointerIntPair.h" , 179, __PRETTY_FUNCTION__)); |
180 | // Preserve all low bits, just update the pointer. |
181 | return PtrWord | (OrigValue & ~PointerBitMask); |
182 | } |
183 | |
184 | static intptr_t updateInt(intptr_t OrigValue, intptr_t Int) { |
185 | intptr_t IntWord = static_cast<intptr_t>(Int); |
186 | assert((IntWord & ~IntMask) == 0 && "Integer too large for field")(((IntWord & ~IntMask) == 0 && "Integer too large for field" ) ? static_cast<void> (0) : __assert_fail ("(IntWord & ~IntMask) == 0 && \"Integer too large for field\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/ADT/PointerIntPair.h" , 186, __PRETTY_FUNCTION__)); |
187 | |
188 | // Preserve all bits other than the ones we are updating. |
189 | return (OrigValue & ~ShiftedIntMask) | IntWord << IntShift; |
190 | } |
191 | }; |
192 | |
193 | // Provide specialization of DenseMapInfo for PointerIntPair. |
194 | template <typename PointerTy, unsigned IntBits, typename IntType> |
195 | struct DenseMapInfo<PointerIntPair<PointerTy, IntBits, IntType>> { |
196 | using Ty = PointerIntPair<PointerTy, IntBits, IntType>; |
197 | |
198 | static Ty getEmptyKey() { |
199 | uintptr_t Val = static_cast<uintptr_t>(-1); |
200 | Val <<= PointerLikeTypeTraits<Ty>::NumLowBitsAvailable; |
201 | return Ty::getFromOpaqueValue(reinterpret_cast<void *>(Val)); |
202 | } |
203 | |
204 | static Ty getTombstoneKey() { |
205 | uintptr_t Val = static_cast<uintptr_t>(-2); |
206 | Val <<= PointerLikeTypeTraits<PointerTy>::NumLowBitsAvailable; |
207 | return Ty::getFromOpaqueValue(reinterpret_cast<void *>(Val)); |
208 | } |
209 | |
210 | static unsigned getHashValue(Ty V) { |
211 | uintptr_t IV = reinterpret_cast<uintptr_t>(V.getOpaqueValue()); |
212 | return unsigned(IV) ^ unsigned(IV >> 9); |
213 | } |
214 | |
215 | static bool isEqual(const Ty &LHS, const Ty &RHS) { return LHS == RHS; } |
216 | }; |
217 | |
218 | // Teach SmallPtrSet that PointerIntPair is "basically a pointer". |
219 | template <typename PointerTy, unsigned IntBits, typename IntType, |
220 | typename PtrTraits> |
221 | struct PointerLikeTypeTraits< |
222 | PointerIntPair<PointerTy, IntBits, IntType, PtrTraits>> { |
223 | static inline void * |
224 | getAsVoidPointer(const PointerIntPair<PointerTy, IntBits, IntType> &P) { |
225 | return P.getOpaqueValue(); |
226 | } |
227 | |
228 | static inline PointerIntPair<PointerTy, IntBits, IntType> |
229 | getFromVoidPointer(void *P) { |
230 | return PointerIntPair<PointerTy, IntBits, IntType>::getFromOpaqueValue(P); |
231 | } |
232 | |
233 | static inline PointerIntPair<PointerTy, IntBits, IntType> |
234 | getFromVoidPointer(const void *P) { |
235 | return PointerIntPair<PointerTy, IntBits, IntType>::getFromOpaqueValue(P); |
236 | } |
237 | |
238 | static constexpr int NumLowBitsAvailable = |
239 | PtrTraits::NumLowBitsAvailable - IntBits; |
240 | }; |
241 | |
242 | } // end namespace llvm |
243 | |
244 | #endif // LLVM_ADT_POINTERINTPAIR_H |
1 | //===- llvm/Support/PointerLikeTypeTraits.h - Pointer Traits ----*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file defines the PointerLikeTypeTraits class. This allows data |
10 | // structures to reason about pointers and other things that are pointer sized. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #ifndef LLVM_SUPPORT_POINTERLIKETYPETRAITS_H |
15 | #define LLVM_SUPPORT_POINTERLIKETYPETRAITS_H |
16 | |
17 | #include "llvm/Support/DataTypes.h" |
18 | #include <assert.h> |
19 | #include <type_traits> |
20 | |
21 | namespace llvm { |
22 | |
23 | /// A traits type that is used to handle pointer types and things that are just |
24 | /// wrappers for pointers as a uniform entity. |
25 | template <typename T> struct PointerLikeTypeTraits; |
26 | |
27 | namespace detail { |
28 | /// A tiny meta function to compute the log2 of a compile time constant. |
29 | template <size_t N> |
30 | struct ConstantLog2 |
31 | : std::integral_constant<size_t, ConstantLog2<N / 2>::value + 1> {}; |
32 | template <> struct ConstantLog2<1> : std::integral_constant<size_t, 0> {}; |
33 | |
34 | // Provide a trait to check if T is pointer-like. |
35 | template <typename T, typename U = void> struct HasPointerLikeTypeTraits { |
36 | static const bool value = false; |
37 | }; |
38 | |
39 | // sizeof(T) is valid only for a complete T. |
40 | template <typename T> struct HasPointerLikeTypeTraits< |
41 | T, decltype((sizeof(PointerLikeTypeTraits<T>) + sizeof(T)), void())> { |
42 | static const bool value = true; |
43 | }; |
44 | |
45 | template <typename T> struct IsPointerLike { |
46 | static const bool value = HasPointerLikeTypeTraits<T>::value; |
47 | }; |
48 | |
49 | template <typename T> struct IsPointerLike<T *> { |
50 | static const bool value = true; |
51 | }; |
52 | } // namespace detail |
53 | |
54 | // Provide PointerLikeTypeTraits for non-cvr pointers. |
55 | template <typename T> struct PointerLikeTypeTraits<T *> { |
56 | static inline void *getAsVoidPointer(T *P) { return P; } |
57 | static inline T *getFromVoidPointer(void *P) { return static_cast<T *>(P); } |
58 | |
59 | static constexpr int NumLowBitsAvailable = |
60 | detail::ConstantLog2<alignof(T)>::value; |
61 | }; |
62 | |
63 | template <> struct PointerLikeTypeTraits<void *> { |
64 | static inline void *getAsVoidPointer(void *P) { return P; } |
65 | static inline void *getFromVoidPointer(void *P) { return P; } |
66 | |
67 | /// Note, we assume here that void* is related to raw malloc'ed memory and |
68 | /// that malloc returns objects at least 4-byte aligned. However, this may be |
69 | /// wrong, or pointers may be from something other than malloc. In this case, |
70 | /// you should specify a real typed pointer or avoid this template. |
71 | /// |
72 | /// All clients should use assertions to do a run-time check to ensure that |
73 | /// this is actually true. |
74 | static constexpr int NumLowBitsAvailable = 2; |
75 | }; |
76 | |
77 | // Provide PointerLikeTypeTraits for const things. |
78 | template <typename T> struct PointerLikeTypeTraits<const T> { |
79 | typedef PointerLikeTypeTraits<T> NonConst; |
80 | |
81 | static inline const void *getAsVoidPointer(const T P) { |
82 | return NonConst::getAsVoidPointer(P); |
83 | } |
84 | static inline const T getFromVoidPointer(const void *P) { |
85 | return NonConst::getFromVoidPointer(const_cast<void *>(P)); |
86 | } |
87 | static constexpr int NumLowBitsAvailable = NonConst::NumLowBitsAvailable; |
88 | }; |
89 | |
90 | // Provide PointerLikeTypeTraits for const pointers. |
91 | template <typename T> struct PointerLikeTypeTraits<const T *> { |
92 | typedef PointerLikeTypeTraits<T *> NonConst; |
93 | |
94 | static inline const void *getAsVoidPointer(const T *P) { |
95 | return NonConst::getAsVoidPointer(const_cast<T *>(P)); |
96 | } |
97 | static inline const T *getFromVoidPointer(const void *P) { |
98 | return NonConst::getFromVoidPointer(const_cast<void *>(P)); |
99 | } |
100 | static constexpr int NumLowBitsAvailable = NonConst::NumLowBitsAvailable; |
101 | }; |
102 | |
103 | // Provide PointerLikeTypeTraits for uintptr_t. |
104 | template <> struct PointerLikeTypeTraits<uintptr_t> { |
105 | static inline void *getAsVoidPointer(uintptr_t P) { |
106 | return reinterpret_cast<void *>(P); |
107 | } |
108 | static inline uintptr_t getFromVoidPointer(void *P) { |
109 | return reinterpret_cast<uintptr_t>(P); |
110 | } |
111 | // No bits are available! |
112 | static constexpr int NumLowBitsAvailable = 0; |
113 | }; |
114 | |
115 | /// Provide suitable custom traits struct for function pointers. |
116 | /// |
117 | /// Function pointers can't be directly given these traits as functions can't |
118 | /// have their alignment computed with `alignof` and we need different casting. |
119 | /// |
120 | /// To rely on higher alignment for a specialized use, you can provide a |
121 | /// customized form of this template explicitly with higher alignment, and |
122 | /// potentially use alignment attributes on functions to satisfy that. |
123 | template <int Alignment, typename FunctionPointerT> |
124 | struct FunctionPointerLikeTypeTraits { |
125 | static constexpr int NumLowBitsAvailable = |
126 | detail::ConstantLog2<Alignment>::value; |
127 | static inline void *getAsVoidPointer(FunctionPointerT P) { |
128 | assert((reinterpret_cast<uintptr_t>(P) &(((reinterpret_cast<uintptr_t>(P) & ~((uintptr_t)-1 << NumLowBitsAvailable)) == 0 && "Alignment not satisfied for an actual function pointer!" ) ? static_cast<void> (0) : __assert_fail ("(reinterpret_cast<uintptr_t>(P) & ~((uintptr_t)-1 << NumLowBitsAvailable)) == 0 && \"Alignment not satisfied for an actual function pointer!\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/Support/PointerLikeTypeTraits.h" , 130, __PRETTY_FUNCTION__)) |
129 | ~((uintptr_t)-1 << NumLowBitsAvailable)) == 0 &&(((reinterpret_cast<uintptr_t>(P) & ~((uintptr_t)-1 << NumLowBitsAvailable)) == 0 && "Alignment not satisfied for an actual function pointer!" ) ? static_cast<void> (0) : __assert_fail ("(reinterpret_cast<uintptr_t>(P) & ~((uintptr_t)-1 << NumLowBitsAvailable)) == 0 && \"Alignment not satisfied for an actual function pointer!\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/Support/PointerLikeTypeTraits.h" , 130, __PRETTY_FUNCTION__)) |
130 | "Alignment not satisfied for an actual function pointer!")(((reinterpret_cast<uintptr_t>(P) & ~((uintptr_t)-1 << NumLowBitsAvailable)) == 0 && "Alignment not satisfied for an actual function pointer!" ) ? static_cast<void> (0) : __assert_fail ("(reinterpret_cast<uintptr_t>(P) & ~((uintptr_t)-1 << NumLowBitsAvailable)) == 0 && \"Alignment not satisfied for an actual function pointer!\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/Support/PointerLikeTypeTraits.h" , 130, __PRETTY_FUNCTION__)); |
131 | return reinterpret_cast<void *>(P); |
132 | } |
133 | static inline FunctionPointerT getFromVoidPointer(void *P) { |
134 | return reinterpret_cast<FunctionPointerT>(P); |
135 | } |
136 | }; |
137 | |
138 | /// Provide a default specialization for function pointers that assumes 4-byte |
139 | /// alignment. |
140 | /// |
141 | /// We assume here that functions used with this are always at least 4-byte |
142 | /// aligned. This means that, for example, thumb functions won't work or systems |
143 | /// with weird unaligned function pointers won't work. But all practical systems |
144 | /// we support satisfy this requirement. |
145 | template <typename ReturnT, typename... ParamTs> |
146 | struct PointerLikeTypeTraits<ReturnT (*)(ParamTs...)> |
147 | : FunctionPointerLikeTypeTraits<4, ReturnT (*)(ParamTs...)> {}; |
148 | |
149 | } // end namespace llvm |
150 | |
151 | #endif |
1 | //===-- llvm/Operator.h - Operator utility subclass -------------*- C++ -*-===// | ||||||||||||||
2 | // | ||||||||||||||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||||||||||||||
4 | // See https://llvm.org/LICENSE.txt for license information. | ||||||||||||||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||||||||||||||
6 | // | ||||||||||||||
7 | //===----------------------------------------------------------------------===// | ||||||||||||||
8 | // | ||||||||||||||
9 | // This file defines various classes for working with Instructions and | ||||||||||||||
10 | // ConstantExprs. | ||||||||||||||
11 | // | ||||||||||||||
12 | //===----------------------------------------------------------------------===// | ||||||||||||||
13 | |||||||||||||||
14 | #ifndef LLVM_IR_OPERATOR_H | ||||||||||||||
15 | #define LLVM_IR_OPERATOR_H | ||||||||||||||
16 | |||||||||||||||
17 | #include "llvm/ADT/None.h" | ||||||||||||||
18 | #include "llvm/ADT/Optional.h" | ||||||||||||||
19 | #include "llvm/IR/Constants.h" | ||||||||||||||
20 | #include "llvm/IR/Instruction.h" | ||||||||||||||
21 | #include "llvm/IR/Type.h" | ||||||||||||||
22 | #include "llvm/IR/Value.h" | ||||||||||||||
23 | #include "llvm/Support/Casting.h" | ||||||||||||||
24 | #include <cstddef> | ||||||||||||||
25 | |||||||||||||||
26 | namespace llvm { | ||||||||||||||
27 | |||||||||||||||
28 | /// This is a utility class that provides an abstraction for the common | ||||||||||||||
29 | /// functionality between Instructions and ConstantExprs. | ||||||||||||||
30 | class Operator : public User { | ||||||||||||||
31 | public: | ||||||||||||||
32 | // The Operator class is intended to be used as a utility, and is never itself | ||||||||||||||
33 | // instantiated. | ||||||||||||||
34 | Operator() = delete; | ||||||||||||||
35 | ~Operator() = delete; | ||||||||||||||
36 | |||||||||||||||
37 | void *operator new(size_t s) = delete; | ||||||||||||||
38 | |||||||||||||||
39 | /// Return the opcode for this Instruction or ConstantExpr. | ||||||||||||||
40 | unsigned getOpcode() const { | ||||||||||||||
41 | if (const Instruction *I = dyn_cast<Instruction>(this)) | ||||||||||||||
42 | return I->getOpcode(); | ||||||||||||||
43 | return cast<ConstantExpr>(this)->getOpcode(); | ||||||||||||||
44 | } | ||||||||||||||
45 | |||||||||||||||
46 | /// If V is an Instruction or ConstantExpr, return its opcode. | ||||||||||||||
47 | /// Otherwise return UserOp1. | ||||||||||||||
48 | static unsigned getOpcode(const Value *V) { | ||||||||||||||
49 | if (const Instruction *I
| ||||||||||||||
50 | return I->getOpcode(); | ||||||||||||||
51 | if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) | ||||||||||||||
52 | return CE->getOpcode(); | ||||||||||||||
53 | return Instruction::UserOp1; | ||||||||||||||
54 | } | ||||||||||||||
55 | |||||||||||||||
56 | static bool classof(const Instruction *) { return true; } | ||||||||||||||
57 | static bool classof(const ConstantExpr *) { return true; } | ||||||||||||||
58 | static bool classof(const Value *V) { | ||||||||||||||
59 | return isa<Instruction>(V) || isa<ConstantExpr>(V); | ||||||||||||||
60 | } | ||||||||||||||
61 | }; | ||||||||||||||
62 | |||||||||||||||
63 | /// Utility class for integer operators which may exhibit overflow - Add, Sub, | ||||||||||||||
64 | /// Mul, and Shl. It does not include SDiv, despite that operator having the | ||||||||||||||
65 | /// potential for overflow. | ||||||||||||||
66 | class OverflowingBinaryOperator : public Operator { | ||||||||||||||
67 | public: | ||||||||||||||
68 | enum { | ||||||||||||||
69 | AnyWrap = 0, | ||||||||||||||
70 | NoUnsignedWrap = (1 << 0), | ||||||||||||||
71 | NoSignedWrap = (1 << 1) | ||||||||||||||
72 | }; | ||||||||||||||
73 | |||||||||||||||
74 | private: | ||||||||||||||
75 | friend class Instruction; | ||||||||||||||
76 | friend class ConstantExpr; | ||||||||||||||
77 | |||||||||||||||
78 | void setHasNoUnsignedWrap(bool B) { | ||||||||||||||
79 | SubclassOptionalData = | ||||||||||||||
80 | (SubclassOptionalData & ~NoUnsignedWrap) | (B * NoUnsignedWrap); | ||||||||||||||
81 | } | ||||||||||||||
82 | void setHasNoSignedWrap(bool B) { | ||||||||||||||
83 | SubclassOptionalData = | ||||||||||||||
84 | (SubclassOptionalData & ~NoSignedWrap) | (B * NoSignedWrap); | ||||||||||||||
85 | } | ||||||||||||||
86 | |||||||||||||||
87 | public: | ||||||||||||||
88 | /// Test whether this operation is known to never | ||||||||||||||
89 | /// undergo unsigned overflow, aka the nuw property. | ||||||||||||||
90 | bool hasNoUnsignedWrap() const { | ||||||||||||||
91 | return SubclassOptionalData & NoUnsignedWrap; | ||||||||||||||
92 | } | ||||||||||||||
93 | |||||||||||||||
94 | /// Test whether this operation is known to never | ||||||||||||||
95 | /// undergo signed overflow, aka the nsw property. | ||||||||||||||
96 | bool hasNoSignedWrap() const { | ||||||||||||||
97 | return (SubclassOptionalData & NoSignedWrap) != 0; | ||||||||||||||
98 | } | ||||||||||||||
99 | |||||||||||||||
100 | static bool classof(const Instruction *I) { | ||||||||||||||
101 | return I->getOpcode() == Instruction::Add || | ||||||||||||||
102 | I->getOpcode() == Instruction::Sub || | ||||||||||||||
103 | I->getOpcode() == Instruction::Mul || | ||||||||||||||
104 | I->getOpcode() == Instruction::Shl; | ||||||||||||||
105 | } | ||||||||||||||
106 | static bool classof(const ConstantExpr *CE) { | ||||||||||||||
107 | return CE->getOpcode() == Instruction::Add || | ||||||||||||||
108 | CE->getOpcode() == Instruction::Sub || | ||||||||||||||
109 | CE->getOpcode() == Instruction::Mul || | ||||||||||||||
110 | CE->getOpcode() == Instruction::Shl; | ||||||||||||||
111 | } | ||||||||||||||
112 | static bool classof(const Value *V) { | ||||||||||||||
113 | return (isa<Instruction>(V) && classof(cast<Instruction>(V))) || | ||||||||||||||
114 | (isa<ConstantExpr>(V) && classof(cast<ConstantExpr>(V))); | ||||||||||||||
115 | } | ||||||||||||||
116 | }; | ||||||||||||||
117 | |||||||||||||||
118 | /// A udiv or sdiv instruction, which can be marked as "exact", | ||||||||||||||
119 | /// indicating that no bits are destroyed. | ||||||||||||||
120 | class PossiblyExactOperator : public Operator { | ||||||||||||||
121 | public: | ||||||||||||||
122 | enum { | ||||||||||||||
123 | IsExact = (1 << 0) | ||||||||||||||
124 | }; | ||||||||||||||
125 | |||||||||||||||
126 | private: | ||||||||||||||
127 | friend class Instruction; | ||||||||||||||
128 | friend class ConstantExpr; | ||||||||||||||
129 | |||||||||||||||
130 | void setIsExact(bool B) { | ||||||||||||||
131 | SubclassOptionalData = (SubclassOptionalData & ~IsExact) | (B * IsExact); | ||||||||||||||
132 | } | ||||||||||||||
133 | |||||||||||||||
134 | public: | ||||||||||||||
135 | /// Test whether this division is known to be exact, with zero remainder. | ||||||||||||||
136 | bool isExact() const { | ||||||||||||||
137 | return SubclassOptionalData & IsExact; | ||||||||||||||
138 | } | ||||||||||||||
139 | |||||||||||||||
140 | static bool isPossiblyExactOpcode(unsigned OpC) { | ||||||||||||||
141 | return OpC == Instruction::SDiv || | ||||||||||||||
142 | OpC == Instruction::UDiv || | ||||||||||||||
143 | OpC == Instruction::AShr || | ||||||||||||||
144 | OpC == Instruction::LShr; | ||||||||||||||
145 | } | ||||||||||||||
146 | |||||||||||||||
147 | static bool classof(const ConstantExpr *CE) { | ||||||||||||||
148 | return isPossiblyExactOpcode(CE->getOpcode()); | ||||||||||||||
149 | } | ||||||||||||||
150 | static bool classof(const Instruction *I) { | ||||||||||||||
151 | return isPossiblyExactOpcode(I->getOpcode()); | ||||||||||||||
152 | } | ||||||||||||||
153 | static bool classof(const Value *V) { | ||||||||||||||
154 | return (isa<Instruction>(V) && classof(cast<Instruction>(V))) || | ||||||||||||||
155 | (isa<ConstantExpr>(V) && classof(cast<ConstantExpr>(V))); | ||||||||||||||
156 | } | ||||||||||||||
157 | }; | ||||||||||||||
158 | |||||||||||||||
159 | /// Convenience struct for specifying and reasoning about fast-math flags. | ||||||||||||||
160 | class FastMathFlags { | ||||||||||||||
161 | private: | ||||||||||||||
162 | friend class FPMathOperator; | ||||||||||||||
163 | |||||||||||||||
164 | unsigned Flags = 0; | ||||||||||||||
165 | |||||||||||||||
166 | FastMathFlags(unsigned F) { | ||||||||||||||
167 | // If all 7 bits are set, turn this into -1. If the number of bits grows, | ||||||||||||||
168 | // this must be updated. This is intended to provide some forward binary | ||||||||||||||
169 | // compatibility insurance for the meaning of 'fast' in case bits are added. | ||||||||||||||
170 | if (F == 0x7F) Flags = ~0U; | ||||||||||||||
171 | else Flags = F; | ||||||||||||||
172 | } | ||||||||||||||
173 | |||||||||||||||
174 | public: | ||||||||||||||
175 | // This is how the bits are used in Value::SubclassOptionalData so they | ||||||||||||||
176 | // should fit there too. | ||||||||||||||
177 | // WARNING: We're out of space. SubclassOptionalData only has 7 bits. New | ||||||||||||||
178 | // functionality will require a change in how this information is stored. | ||||||||||||||
179 | enum { | ||||||||||||||
180 | AllowReassoc = (1 << 0), | ||||||||||||||
181 | NoNaNs = (1 << 1), | ||||||||||||||
182 | NoInfs = (1 << 2), | ||||||||||||||
183 | NoSignedZeros = (1 << 3), | ||||||||||||||
184 | AllowReciprocal = (1 << 4), | ||||||||||||||
185 | AllowContract = (1 << 5), | ||||||||||||||
186 | ApproxFunc = (1 << 6) | ||||||||||||||
187 | }; | ||||||||||||||
188 | |||||||||||||||
189 | FastMathFlags() = default; | ||||||||||||||
190 | |||||||||||||||
191 | static FastMathFlags getFast() { | ||||||||||||||
192 | FastMathFlags FMF; | ||||||||||||||
193 | FMF.setFast(); | ||||||||||||||
194 | return FMF; | ||||||||||||||
195 | } | ||||||||||||||
196 | |||||||||||||||
197 | bool any() const { return Flags != 0; } | ||||||||||||||
198 | bool none() const { return Flags == 0; } | ||||||||||||||
199 | bool all() const { return Flags == ~0U; } | ||||||||||||||
200 | |||||||||||||||
201 | void clear() { Flags = 0; } | ||||||||||||||
202 | void set() { Flags = ~0U; } | ||||||||||||||
203 | |||||||||||||||
204 | /// Flag queries | ||||||||||||||
205 | bool allowReassoc() const { return 0 != (Flags & AllowReassoc); } | ||||||||||||||
206 | bool noNaNs() const { return 0 != (Flags & NoNaNs); } | ||||||||||||||
207 | bool noInfs() const { return 0 != (Flags & NoInfs); } | ||||||||||||||
208 | bool noSignedZeros() const { return 0 != (Flags & NoSignedZeros); } | ||||||||||||||
209 | bool allowReciprocal() const { return 0 != (Flags & AllowReciprocal); } | ||||||||||||||
210 | bool allowContract() const { return 0 != (Flags & AllowContract); } | ||||||||||||||
211 | bool approxFunc() const { return 0 != (Flags & ApproxFunc); } | ||||||||||||||
212 | /// 'Fast' means all bits are set. | ||||||||||||||
213 | bool isFast() const { return all(); } | ||||||||||||||
214 | |||||||||||||||
215 | /// Flag setters | ||||||||||||||
216 | void setAllowReassoc(bool B = true) { | ||||||||||||||
217 | Flags = (Flags & ~AllowReassoc) | B * AllowReassoc; | ||||||||||||||
218 | } | ||||||||||||||
219 | void setNoNaNs(bool B = true) { | ||||||||||||||
220 | Flags = (Flags & ~NoNaNs) | B * NoNaNs; | ||||||||||||||
221 | } | ||||||||||||||
222 | void setNoInfs(bool B = true) { | ||||||||||||||
223 | Flags = (Flags & ~NoInfs) | B * NoInfs; | ||||||||||||||
224 | } | ||||||||||||||
225 | void setNoSignedZeros(bool B = true) { | ||||||||||||||
226 | Flags = (Flags & ~NoSignedZeros) | B * NoSignedZeros; | ||||||||||||||
227 | } | ||||||||||||||
228 | void setAllowReciprocal(bool B = true) { | ||||||||||||||
229 | Flags = (Flags & ~AllowReciprocal) | B * AllowReciprocal; | ||||||||||||||
230 | } | ||||||||||||||
231 | void setAllowContract(bool B = true) { | ||||||||||||||
232 | Flags = (Flags & ~AllowContract) | B * AllowContract; | ||||||||||||||
233 | } | ||||||||||||||
234 | void setApproxFunc(bool B = true) { | ||||||||||||||
235 | Flags = (Flags & ~ApproxFunc) | B * ApproxFunc; | ||||||||||||||
236 | } | ||||||||||||||
237 | void setFast(bool B = true) { B ? set() : clear(); } | ||||||||||||||
238 | |||||||||||||||
239 | void operator&=(const FastMathFlags &OtherFlags) { | ||||||||||||||
240 | Flags &= OtherFlags.Flags; | ||||||||||||||
241 | } | ||||||||||||||
242 | }; | ||||||||||||||
243 | |||||||||||||||
244 | /// Utility class for floating point operations which can have | ||||||||||||||
245 | /// information about relaxed accuracy requirements attached to them. | ||||||||||||||
246 | class FPMathOperator : public Operator { | ||||||||||||||
247 | private: | ||||||||||||||
248 | friend class Instruction; | ||||||||||||||
249 | |||||||||||||||
250 | /// 'Fast' means all bits are set. | ||||||||||||||
251 | void setFast(bool B) { | ||||||||||||||
252 | setHasAllowReassoc(B); | ||||||||||||||
253 | setHasNoNaNs(B); | ||||||||||||||
254 | setHasNoInfs(B); | ||||||||||||||
255 | setHasNoSignedZeros(B); | ||||||||||||||
256 | setHasAllowReciprocal(B); | ||||||||||||||
257 | setHasAllowContract(B); | ||||||||||||||
258 | setHasApproxFunc(B); | ||||||||||||||
259 | } | ||||||||||||||
260 | |||||||||||||||
261 | void setHasAllowReassoc(bool B) { | ||||||||||||||
262 | SubclassOptionalData = | ||||||||||||||
263 | (SubclassOptionalData & ~FastMathFlags::AllowReassoc) | | ||||||||||||||
264 | (B * FastMathFlags::AllowReassoc); | ||||||||||||||
265 | } | ||||||||||||||
266 | |||||||||||||||
267 | void setHasNoNaNs(bool B) { | ||||||||||||||
268 | SubclassOptionalData = | ||||||||||||||
269 | (SubclassOptionalData & ~FastMathFlags::NoNaNs) | | ||||||||||||||
270 | (B * FastMathFlags::NoNaNs); | ||||||||||||||
271 | } | ||||||||||||||
272 | |||||||||||||||
273 | void setHasNoInfs(bool B) { | ||||||||||||||
274 | SubclassOptionalData = | ||||||||||||||
275 | (SubclassOptionalData & ~FastMathFlags::NoInfs) | | ||||||||||||||
276 | (B * FastMathFlags::NoInfs); | ||||||||||||||
277 | } | ||||||||||||||
278 | |||||||||||||||
279 | void setHasNoSignedZeros(bool B) { | ||||||||||||||
280 | SubclassOptionalData = | ||||||||||||||
281 | (SubclassOptionalData & ~FastMathFlags::NoSignedZeros) | | ||||||||||||||
282 | (B * FastMathFlags::NoSignedZeros); | ||||||||||||||
283 | } | ||||||||||||||
284 | |||||||||||||||
285 | void setHasAllowReciprocal(bool B) { | ||||||||||||||
286 | SubclassOptionalData = | ||||||||||||||
287 | (SubclassOptionalData & ~FastMathFlags::AllowReciprocal) | | ||||||||||||||
288 | (B * FastMathFlags::AllowReciprocal); | ||||||||||||||
289 | } | ||||||||||||||
290 | |||||||||||||||
291 | void setHasAllowContract(bool B) { | ||||||||||||||
292 | SubclassOptionalData = | ||||||||||||||
293 | (SubclassOptionalData & ~FastMathFlags::AllowContract) | | ||||||||||||||
294 | (B * FastMathFlags::AllowContract); | ||||||||||||||
295 | } | ||||||||||||||
296 | |||||||||||||||
297 | void setHasApproxFunc(bool B) { | ||||||||||||||
298 | SubclassOptionalData = | ||||||||||||||
299 | (SubclassOptionalData & ~FastMathFlags::ApproxFunc) | | ||||||||||||||
300 | (B * FastMathFlags::ApproxFunc); | ||||||||||||||
301 | } | ||||||||||||||
302 | |||||||||||||||
303 | /// Convenience function for setting multiple fast-math flags. | ||||||||||||||
304 | /// FMF is a mask of the bits to set. | ||||||||||||||
305 | void setFastMathFlags(FastMathFlags FMF) { | ||||||||||||||
306 | SubclassOptionalData |= FMF.Flags; | ||||||||||||||
307 | } | ||||||||||||||
308 | |||||||||||||||
309 | /// Convenience function for copying all fast-math flags. | ||||||||||||||
310 | /// All values in FMF are transferred to this operator. | ||||||||||||||
311 | void copyFastMathFlags(FastMathFlags FMF) { | ||||||||||||||
312 | SubclassOptionalData = FMF.Flags; | ||||||||||||||
313 | } | ||||||||||||||
314 | |||||||||||||||
315 | public: | ||||||||||||||
316 | /// Test if this operation allows all non-strict floating-point transforms. | ||||||||||||||
317 | bool isFast() const { | ||||||||||||||
318 | return ((SubclassOptionalData & FastMathFlags::AllowReassoc) != 0 && | ||||||||||||||
319 | (SubclassOptionalData & FastMathFlags::NoNaNs) != 0 && | ||||||||||||||
320 | (SubclassOptionalData & FastMathFlags::NoInfs) != 0 && | ||||||||||||||
321 | (SubclassOptionalData & FastMathFlags::NoSignedZeros) != 0 && | ||||||||||||||
322 | (SubclassOptionalData & FastMathFlags::AllowReciprocal) != 0 && | ||||||||||||||
323 | (SubclassOptionalData & FastMathFlags::AllowContract) != 0 && | ||||||||||||||
324 | (SubclassOptionalData & FastMathFlags::ApproxFunc) != 0); | ||||||||||||||
325 | } | ||||||||||||||
326 | |||||||||||||||
327 | /// Test if this operation may be simplified with reassociative transforms. | ||||||||||||||
328 | bool hasAllowReassoc() const { | ||||||||||||||
329 | return (SubclassOptionalData & FastMathFlags::AllowReassoc) != 0; | ||||||||||||||
330 | } | ||||||||||||||
331 | |||||||||||||||
332 | /// Test if this operation's arguments and results are assumed not-NaN. | ||||||||||||||
333 | bool hasNoNaNs() const { | ||||||||||||||
334 | return (SubclassOptionalData & FastMathFlags::NoNaNs) != 0; | ||||||||||||||
335 | } | ||||||||||||||
336 | |||||||||||||||
337 | /// Test if this operation's arguments and results are assumed not-infinite. | ||||||||||||||
338 | bool hasNoInfs() const { | ||||||||||||||
339 | return (SubclassOptionalData & FastMathFlags::NoInfs) != 0; | ||||||||||||||
340 | } | ||||||||||||||
341 | |||||||||||||||
342 | /// Test if this operation can ignore the sign of zero. | ||||||||||||||
343 | bool hasNoSignedZeros() const { | ||||||||||||||
344 | return (SubclassOptionalData & FastMathFlags::NoSignedZeros) != 0; | ||||||||||||||
345 | } | ||||||||||||||
346 | |||||||||||||||
347 | /// Test if this operation can use reciprocal multiply instead of division. | ||||||||||||||
348 | bool hasAllowReciprocal() const { | ||||||||||||||
349 | return (SubclassOptionalData & FastMathFlags::AllowReciprocal) != 0; | ||||||||||||||
350 | } | ||||||||||||||
351 | |||||||||||||||
352 | /// Test if this operation can be floating-point contracted (FMA). | ||||||||||||||
353 | bool hasAllowContract() const { | ||||||||||||||
354 | return (SubclassOptionalData & FastMathFlags::AllowContract) != 0; | ||||||||||||||
355 | } | ||||||||||||||
356 | |||||||||||||||
357 | /// Test if this operation allows approximations of math library functions or | ||||||||||||||
358 | /// intrinsics. | ||||||||||||||
359 | bool hasApproxFunc() const { | ||||||||||||||
360 | return (SubclassOptionalData & FastMathFlags::ApproxFunc) != 0; | ||||||||||||||
361 | } | ||||||||||||||
362 | |||||||||||||||
363 | /// Convenience function for getting all the fast-math flags | ||||||||||||||
364 | FastMathFlags getFastMathFlags() const { | ||||||||||||||
365 | return FastMathFlags(SubclassOptionalData); | ||||||||||||||
366 | } | ||||||||||||||
367 | |||||||||||||||
368 | /// Get the maximum error permitted by this operation in ULPs. An accuracy of | ||||||||||||||
369 | /// 0.0 means that the operation should be performed with the default | ||||||||||||||
370 | /// precision. | ||||||||||||||
371 | float getFPAccuracy() const; | ||||||||||||||
372 | |||||||||||||||
373 | static bool classof(const Value *V) { | ||||||||||||||
374 | unsigned Opcode; | ||||||||||||||
375 | if (auto *I = dyn_cast<Instruction>(V)) | ||||||||||||||
376 | Opcode = I->getOpcode(); | ||||||||||||||
377 | else if (auto *CE = dyn_cast<ConstantExpr>(V)) | ||||||||||||||
378 | Opcode = CE->getOpcode(); | ||||||||||||||
379 | else | ||||||||||||||
380 | return false; | ||||||||||||||
381 | |||||||||||||||
382 | switch (Opcode) { | ||||||||||||||
383 | case Instruction::FNeg: | ||||||||||||||
384 | case Instruction::FAdd: | ||||||||||||||
385 | case Instruction::FSub: | ||||||||||||||
386 | case Instruction::FMul: | ||||||||||||||
387 | case Instruction::FDiv: | ||||||||||||||
388 | case Instruction::FRem: | ||||||||||||||
389 | // FIXME: To clean up and correct the semantics of fast-math-flags, FCmp | ||||||||||||||
390 | // should not be treated as a math op, but the other opcodes should. | ||||||||||||||
391 | // This would make things consistent with Select/PHI (FP value type | ||||||||||||||
392 | // determines whether they are math ops and, therefore, capable of | ||||||||||||||
393 | // having fast-math-flags). | ||||||||||||||
394 | case Instruction::FCmp: | ||||||||||||||
395 | return true; | ||||||||||||||
396 | case Instruction::PHI: | ||||||||||||||
397 | case Instruction::Select: | ||||||||||||||
398 | case Instruction::Call: { | ||||||||||||||
399 | Type *Ty = V->getType(); | ||||||||||||||
400 | while (ArrayType *ArrTy = dyn_cast<ArrayType>(Ty)) | ||||||||||||||
401 | Ty = ArrTy->getElementType(); | ||||||||||||||
402 | return Ty->isFPOrFPVectorTy(); | ||||||||||||||
403 | } | ||||||||||||||
404 | default: | ||||||||||||||
405 | return false; | ||||||||||||||
406 | } | ||||||||||||||
407 | } | ||||||||||||||
408 | }; | ||||||||||||||
409 | |||||||||||||||
410 | /// A helper template for defining operators for individual opcodes. | ||||||||||||||
411 | template<typename SuperClass, unsigned Opc> | ||||||||||||||
412 | class ConcreteOperator : public SuperClass { | ||||||||||||||
413 | public: | ||||||||||||||
414 | static bool classof(const Instruction *I) { | ||||||||||||||
415 | return I->getOpcode() == Opc; | ||||||||||||||
416 | } | ||||||||||||||
417 | static bool classof(const ConstantExpr *CE) { | ||||||||||||||
418 | return CE->getOpcode() == Opc; | ||||||||||||||
419 | } | ||||||||||||||
420 | static bool classof(const Value *V) { | ||||||||||||||
421 | return (isa<Instruction>(V) && classof(cast<Instruction>(V))) || | ||||||||||||||
422 | (isa<ConstantExpr>(V) && classof(cast<ConstantExpr>(V))); | ||||||||||||||
423 | } | ||||||||||||||
424 | }; | ||||||||||||||
425 | |||||||||||||||
426 | class AddOperator | ||||||||||||||
427 | : public ConcreteOperator<OverflowingBinaryOperator, Instruction::Add> { | ||||||||||||||
428 | }; | ||||||||||||||
429 | class SubOperator | ||||||||||||||
430 | : public ConcreteOperator<OverflowingBinaryOperator, Instruction::Sub> { | ||||||||||||||
431 | }; | ||||||||||||||
432 | class MulOperator | ||||||||||||||
433 | : public ConcreteOperator<OverflowingBinaryOperator, Instruction::Mul> { | ||||||||||||||
434 | }; | ||||||||||||||
435 | class ShlOperator | ||||||||||||||
436 | : public ConcreteOperator<OverflowingBinaryOperator, Instruction::Shl> { | ||||||||||||||
437 | }; | ||||||||||||||
438 | |||||||||||||||
439 | class SDivOperator | ||||||||||||||
440 | : public ConcreteOperator<PossiblyExactOperator, Instruction::SDiv> { | ||||||||||||||
441 | }; | ||||||||||||||
442 | class UDivOperator | ||||||||||||||
443 | : public ConcreteOperator<PossiblyExactOperator, Instruction::UDiv> { | ||||||||||||||
444 | }; | ||||||||||||||
445 | class AShrOperator | ||||||||||||||
446 | : public ConcreteOperator<PossiblyExactOperator, Instruction::AShr> { | ||||||||||||||
447 | }; | ||||||||||||||
448 | class LShrOperator | ||||||||||||||
449 | : public ConcreteOperator<PossiblyExactOperator, Instruction::LShr> { | ||||||||||||||
450 | }; | ||||||||||||||
451 | |||||||||||||||
452 | class ZExtOperator : public ConcreteOperator<Operator, Instruction::ZExt> {}; | ||||||||||||||
453 | |||||||||||||||
454 | class GEPOperator | ||||||||||||||
455 | : public ConcreteOperator<Operator, Instruction::GetElementPtr> { | ||||||||||||||
456 | friend class GetElementPtrInst; | ||||||||||||||
457 | friend class ConstantExpr; | ||||||||||||||
458 | |||||||||||||||
459 | enum { | ||||||||||||||
460 | IsInBounds = (1 << 0), | ||||||||||||||
461 | // InRangeIndex: bits 1-6 | ||||||||||||||
462 | }; | ||||||||||||||
463 | |||||||||||||||
464 | void setIsInBounds(bool B) { | ||||||||||||||
465 | SubclassOptionalData = | ||||||||||||||
466 | (SubclassOptionalData & ~IsInBounds) | (B * IsInBounds); | ||||||||||||||
467 | } | ||||||||||||||
468 | |||||||||||||||
469 | public: | ||||||||||||||
470 | /// Test whether this is an inbounds GEP, as defined by LangRef.html. | ||||||||||||||
471 | bool isInBounds() const { | ||||||||||||||
472 | return SubclassOptionalData & IsInBounds; | ||||||||||||||
473 | } | ||||||||||||||
474 | |||||||||||||||
475 | /// Returns the offset of the index with an inrange attachment, or None if | ||||||||||||||
476 | /// none. | ||||||||||||||
477 | Optional<unsigned> getInRangeIndex() const { | ||||||||||||||
478 | if (SubclassOptionalData >> 1 == 0) return None; | ||||||||||||||
479 | return (SubclassOptionalData >> 1) - 1; | ||||||||||||||
480 | } | ||||||||||||||
481 | |||||||||||||||
482 | inline op_iterator idx_begin() { return op_begin()+1; } | ||||||||||||||
483 | inline const_op_iterator idx_begin() const { return op_begin()+1; } | ||||||||||||||
484 | inline op_iterator idx_end() { return op_end(); } | ||||||||||||||
485 | inline const_op_iterator idx_end() const { return op_end(); } | ||||||||||||||
486 | |||||||||||||||
487 | Value *getPointerOperand() { | ||||||||||||||
488 | return getOperand(0); | ||||||||||||||
489 | } | ||||||||||||||
490 | const Value *getPointerOperand() const { | ||||||||||||||
491 | return getOperand(0); | ||||||||||||||
492 | } | ||||||||||||||
493 | static unsigned getPointerOperandIndex() { | ||||||||||||||
494 | return 0U; // get index for modifying correct operand | ||||||||||||||
495 | } | ||||||||||||||
496 | |||||||||||||||
497 | /// Method to return the pointer operand as a PointerType. | ||||||||||||||
498 | Type *getPointerOperandType() const { | ||||||||||||||
499 | return getPointerOperand()->getType(); | ||||||||||||||
500 | } | ||||||||||||||
501 | |||||||||||||||
502 | Type *getSourceElementType() const; | ||||||||||||||
503 | Type *getResultElementType() const; | ||||||||||||||
504 | |||||||||||||||
505 | /// Method to return the address space of the pointer operand. | ||||||||||||||
506 | unsigned getPointerAddressSpace() const { | ||||||||||||||
507 | return getPointerOperandType()->getPointerAddressSpace(); | ||||||||||||||
508 | } | ||||||||||||||
509 | |||||||||||||||
510 | unsigned getNumIndices() const { // Note: always non-negative | ||||||||||||||
511 | return getNumOperands() - 1; | ||||||||||||||
512 | } | ||||||||||||||
513 | |||||||||||||||
514 | bool hasIndices() const { | ||||||||||||||
515 | return getNumOperands() > 1; | ||||||||||||||
516 | } | ||||||||||||||
517 | |||||||||||||||
518 | /// Return true if all of the indices of this GEP are zeros. | ||||||||||||||
519 | /// If so, the result pointer and the first operand have the same | ||||||||||||||
520 | /// value, just potentially different types. | ||||||||||||||
521 | bool hasAllZeroIndices() const { | ||||||||||||||
522 | for (const_op_iterator I = idx_begin(), E = idx_end(); I != E; ++I) { | ||||||||||||||
523 | if (ConstantInt *C = dyn_cast<ConstantInt>(I)) | ||||||||||||||
524 | if (C->isZero()) | ||||||||||||||
525 | continue; | ||||||||||||||
526 | return false; | ||||||||||||||
527 | } | ||||||||||||||
528 | return true; | ||||||||||||||
529 | } | ||||||||||||||
530 | |||||||||||||||
531 | /// Return true if all of the indices of this GEP are constant integers. | ||||||||||||||
532 | /// If so, the result pointer and the first operand have | ||||||||||||||
533 | /// a constant offset between them. | ||||||||||||||
534 | bool hasAllConstantIndices() const { | ||||||||||||||
535 | for (const_op_iterator I = idx_begin(), E = idx_end(); I != E; ++I) { | ||||||||||||||
536 | if (!isa<ConstantInt>(I)) | ||||||||||||||
537 | return false; | ||||||||||||||
538 | } | ||||||||||||||
539 | return true; | ||||||||||||||
540 | } | ||||||||||||||
541 | |||||||||||||||
542 | unsigned countNonConstantIndices() const { | ||||||||||||||
543 | return count_if(make_range(idx_begin(), idx_end()), [](const Use& use) { | ||||||||||||||
544 | return !isa<ConstantInt>(*use); | ||||||||||||||
545 | }); | ||||||||||||||
546 | } | ||||||||||||||
547 | |||||||||||||||
548 | /// Accumulate the constant address offset of this GEP if possible. | ||||||||||||||
549 | /// | ||||||||||||||
550 | /// This routine accepts an APInt into which it will accumulate the constant | ||||||||||||||
551 | /// offset of this GEP if the GEP is in fact constant. If the GEP is not | ||||||||||||||
552 | /// all-constant, it returns false and the value of the offset APInt is | ||||||||||||||
553 | /// undefined (it is *not* preserved!). The APInt passed into this routine | ||||||||||||||
554 | /// must be at exactly as wide as the IntPtr type for the address space of the | ||||||||||||||
555 | /// base GEP pointer. | ||||||||||||||
556 | bool accumulateConstantOffset(const DataLayout &DL, APInt &Offset) const; | ||||||||||||||
557 | }; | ||||||||||||||
558 | |||||||||||||||
559 | class PtrToIntOperator | ||||||||||||||
560 | : public ConcreteOperator<Operator, Instruction::PtrToInt> { | ||||||||||||||
561 | friend class PtrToInt; | ||||||||||||||
562 | friend class ConstantExpr; | ||||||||||||||
563 | |||||||||||||||
564 | public: | ||||||||||||||
565 | Value *getPointerOperand() { | ||||||||||||||
566 | return getOperand(0); | ||||||||||||||
567 | } | ||||||||||||||
568 | const Value *getPointerOperand() const { | ||||||||||||||
569 | return getOperand(0); | ||||||||||||||
570 | } | ||||||||||||||
571 | |||||||||||||||
572 | static unsigned getPointerOperandIndex() { | ||||||||||||||
573 | return 0U; // get index for modifying correct operand | ||||||||||||||
574 | } | ||||||||||||||
575 | |||||||||||||||
576 | /// Method to return the pointer operand as a PointerType. | ||||||||||||||
577 | Type *getPointerOperandType() const { | ||||||||||||||
578 | return getPointerOperand()->getType(); | ||||||||||||||
579 | } | ||||||||||||||
580 | |||||||||||||||
581 | /// Method to return the address space of the pointer operand. | ||||||||||||||
582 | unsigned getPointerAddressSpace() const { | ||||||||||||||
583 | return cast<PointerType>(getPointerOperandType())->getAddressSpace(); | ||||||||||||||
584 | } | ||||||||||||||
585 | }; | ||||||||||||||
586 | |||||||||||||||
587 | class BitCastOperator | ||||||||||||||
588 | : public ConcreteOperator<Operator, Instruction::BitCast> { | ||||||||||||||
589 | friend class BitCastInst; | ||||||||||||||
590 | friend class ConstantExpr; | ||||||||||||||
591 | |||||||||||||||
592 | public: | ||||||||||||||
593 | Type *getSrcTy() const { | ||||||||||||||
594 | return getOperand(0)->getType(); | ||||||||||||||
595 | } | ||||||||||||||
596 | |||||||||||||||
597 | Type *getDestTy() const { | ||||||||||||||
598 | return getType(); | ||||||||||||||
599 | } | ||||||||||||||
600 | }; | ||||||||||||||
601 | |||||||||||||||
602 | } // end namespace llvm | ||||||||||||||
603 | |||||||||||||||
604 | #endif // LLVM_IR_OPERATOR_H |
1 | //===- BasicTTIImpl.h -------------------------------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | /// \file |
10 | /// This file provides a helper that implements much of the TTI interface in |
11 | /// terms of the target-independent code generator and TargetLowering |
12 | /// interfaces. |
13 | // |
14 | //===----------------------------------------------------------------------===// |
15 | |
16 | #ifndef LLVM_CODEGEN_BASICTTIIMPL_H |
17 | #define LLVM_CODEGEN_BASICTTIIMPL_H |
18 | |
19 | #include "llvm/ADT/APInt.h" |
20 | #include "llvm/ADT/ArrayRef.h" |
21 | #include "llvm/ADT/BitVector.h" |
22 | #include "llvm/ADT/SmallPtrSet.h" |
23 | #include "llvm/ADT/SmallVector.h" |
24 | #include "llvm/Analysis/LoopInfo.h" |
25 | #include "llvm/Analysis/TargetTransformInfo.h" |
26 | #include "llvm/Analysis/TargetTransformInfoImpl.h" |
27 | #include "llvm/CodeGen/ISDOpcodes.h" |
28 | #include "llvm/CodeGen/TargetLowering.h" |
29 | #include "llvm/CodeGen/TargetSubtargetInfo.h" |
30 | #include "llvm/CodeGen/ValueTypes.h" |
31 | #include "llvm/IR/BasicBlock.h" |
32 | #include "llvm/IR/CallSite.h" |
33 | #include "llvm/IR/Constant.h" |
34 | #include "llvm/IR/Constants.h" |
35 | #include "llvm/IR/DataLayout.h" |
36 | #include "llvm/IR/DerivedTypes.h" |
37 | #include "llvm/IR/InstrTypes.h" |
38 | #include "llvm/IR/Instruction.h" |
39 | #include "llvm/IR/Instructions.h" |
40 | #include "llvm/IR/Intrinsics.h" |
41 | #include "llvm/IR/Operator.h" |
42 | #include "llvm/IR/Type.h" |
43 | #include "llvm/IR/Value.h" |
44 | #include "llvm/MC/MCSchedule.h" |
45 | #include "llvm/Support/Casting.h" |
46 | #include "llvm/Support/CommandLine.h" |
47 | #include "llvm/Support/ErrorHandling.h" |
48 | #include "llvm/Support/MachineValueType.h" |
49 | #include "llvm/Support/MathExtras.h" |
50 | #include <algorithm> |
51 | #include <cassert> |
52 | #include <cstdint> |
53 | #include <limits> |
54 | #include <utility> |
55 | |
56 | namespace llvm { |
57 | |
58 | class Function; |
59 | class GlobalValue; |
60 | class LLVMContext; |
61 | class ScalarEvolution; |
62 | class SCEV; |
63 | class TargetMachine; |
64 | |
65 | extern cl::opt<unsigned> PartialUnrollingThreshold; |
66 | |
67 | /// Base class which can be used to help build a TTI implementation. |
68 | /// |
69 | /// This class provides as much implementation of the TTI interface as is |
70 | /// possible using the target independent parts of the code generator. |
71 | /// |
72 | /// In order to subclass it, your class must implement a getST() method to |
73 | /// return the subtarget, and a getTLI() method to return the target lowering. |
74 | /// We need these methods implemented in the derived class so that this class |
75 | /// doesn't have to duplicate storage for them. |
76 | template <typename T> |
77 | class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> { |
78 | private: |
79 | using BaseT = TargetTransformInfoImplCRTPBase<T>; |
80 | using TTI = TargetTransformInfo; |
81 | |
82 | /// Estimate a cost of Broadcast as an extract and sequence of insert |
83 | /// operations. |
84 | unsigned getBroadcastShuffleOverhead(Type *Ty) { |
85 | assert(Ty->isVectorTy() && "Can only shuffle vectors")((Ty->isVectorTy() && "Can only shuffle vectors") ? static_cast<void> (0) : __assert_fail ("Ty->isVectorTy() && \"Can only shuffle vectors\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 85, __PRETTY_FUNCTION__)); |
86 | unsigned Cost = 0; |
87 | // Broadcast cost is equal to the cost of extracting the zero'th element |
88 | // plus the cost of inserting it into every element of the result vector. |
89 | Cost += static_cast<T *>(this)->getVectorInstrCost( |
90 | Instruction::ExtractElement, Ty, 0); |
91 | |
92 | for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) { |
93 | Cost += static_cast<T *>(this)->getVectorInstrCost( |
94 | Instruction::InsertElement, Ty, i); |
95 | } |
96 | return Cost; |
97 | } |
98 | |
99 | /// Estimate a cost of shuffle as a sequence of extract and insert |
100 | /// operations. |
101 | unsigned getPermuteShuffleOverhead(Type *Ty) { |
102 | assert(Ty->isVectorTy() && "Can only shuffle vectors")((Ty->isVectorTy() && "Can only shuffle vectors") ? static_cast<void> (0) : __assert_fail ("Ty->isVectorTy() && \"Can only shuffle vectors\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 102, __PRETTY_FUNCTION__)); |
103 | unsigned Cost = 0; |
104 | // Shuffle cost is equal to the cost of extracting element from its argument |
105 | // plus the cost of inserting them onto the result vector. |
106 | |
107 | // e.g. <4 x float> has a mask of <0,5,2,7> i.e we need to extract from |
108 | // index 0 of first vector, index 1 of second vector,index 2 of first |
109 | // vector and finally index 3 of second vector and insert them at index |
110 | // <0,1,2,3> of result vector. |
111 | for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) { |
112 | Cost += static_cast<T *>(this) |
113 | ->getVectorInstrCost(Instruction::InsertElement, Ty, i); |
114 | Cost += static_cast<T *>(this) |
115 | ->getVectorInstrCost(Instruction::ExtractElement, Ty, i); |
116 | } |
117 | return Cost; |
118 | } |
119 | |
120 | /// Estimate a cost of subvector extraction as a sequence of extract and |
121 | /// insert operations. |
122 | unsigned getExtractSubvectorOverhead(Type *Ty, int Index, Type *SubTy) { |
123 | assert(Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() &&((Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() && "Can only extract subvectors from vectors" ) ? static_cast<void> (0) : __assert_fail ("Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() && \"Can only extract subvectors from vectors\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 124, __PRETTY_FUNCTION__)) |
124 | "Can only extract subvectors from vectors")((Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() && "Can only extract subvectors from vectors" ) ? static_cast<void> (0) : __assert_fail ("Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() && \"Can only extract subvectors from vectors\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 124, __PRETTY_FUNCTION__)); |
125 | int NumSubElts = SubTy->getVectorNumElements(); |
126 | assert((Index + NumSubElts) <= (int)Ty->getVectorNumElements() &&(((Index + NumSubElts) <= (int)Ty->getVectorNumElements () && "SK_ExtractSubvector index out of range") ? static_cast <void> (0) : __assert_fail ("(Index + NumSubElts) <= (int)Ty->getVectorNumElements() && \"SK_ExtractSubvector index out of range\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 127, __PRETTY_FUNCTION__)) |
127 | "SK_ExtractSubvector index out of range")(((Index + NumSubElts) <= (int)Ty->getVectorNumElements () && "SK_ExtractSubvector index out of range") ? static_cast <void> (0) : __assert_fail ("(Index + NumSubElts) <= (int)Ty->getVectorNumElements() && \"SK_ExtractSubvector index out of range\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 127, __PRETTY_FUNCTION__)); |
128 | |
129 | unsigned Cost = 0; |
130 | // Subvector extraction cost is equal to the cost of extracting element from |
131 | // the source type plus the cost of inserting them into the result vector |
132 | // type. |
133 | for (int i = 0; i != NumSubElts; ++i) { |
134 | Cost += static_cast<T *>(this)->getVectorInstrCost( |
135 | Instruction::ExtractElement, Ty, i + Index); |
136 | Cost += static_cast<T *>(this)->getVectorInstrCost( |
137 | Instruction::InsertElement, SubTy, i); |
138 | } |
139 | return Cost; |
140 | } |
141 | |
142 | /// Estimate a cost of subvector insertion as a sequence of extract and |
143 | /// insert operations. |
144 | unsigned getInsertSubvectorOverhead(Type *Ty, int Index, Type *SubTy) { |
145 | assert(Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() &&((Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() && "Can only insert subvectors into vectors" ) ? static_cast<void> (0) : __assert_fail ("Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() && \"Can only insert subvectors into vectors\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 146, __PRETTY_FUNCTION__)) |
146 | "Can only insert subvectors into vectors")((Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() && "Can only insert subvectors into vectors" ) ? static_cast<void> (0) : __assert_fail ("Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() && \"Can only insert subvectors into vectors\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 146, __PRETTY_FUNCTION__)); |
147 | int NumSubElts = SubTy->getVectorNumElements(); |
148 | assert((Index + NumSubElts) <= (int)Ty->getVectorNumElements() &&(((Index + NumSubElts) <= (int)Ty->getVectorNumElements () && "SK_InsertSubvector index out of range") ? static_cast <void> (0) : __assert_fail ("(Index + NumSubElts) <= (int)Ty->getVectorNumElements() && \"SK_InsertSubvector index out of range\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 149, __PRETTY_FUNCTION__)) |
149 | "SK_InsertSubvector index out of range")(((Index + NumSubElts) <= (int)Ty->getVectorNumElements () && "SK_InsertSubvector index out of range") ? static_cast <void> (0) : __assert_fail ("(Index + NumSubElts) <= (int)Ty->getVectorNumElements() && \"SK_InsertSubvector index out of range\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 149, __PRETTY_FUNCTION__)); |
150 | |
151 | unsigned Cost = 0; |
152 | // Subvector insertion cost is equal to the cost of extracting element from |
153 | // the source type plus the cost of inserting them into the result vector |
154 | // type. |
155 | for (int i = 0; i != NumSubElts; ++i) { |
156 | Cost += static_cast<T *>(this)->getVectorInstrCost( |
157 | Instruction::ExtractElement, SubTy, i); |
158 | Cost += static_cast<T *>(this)->getVectorInstrCost( |
159 | Instruction::InsertElement, Ty, i + Index); |
160 | } |
161 | return Cost; |
162 | } |
163 | |
164 | /// Local query method delegates up to T which *must* implement this! |
165 | const TargetSubtargetInfo *getST() const { |
166 | return static_cast<const T *>(this)->getST(); |
167 | } |
168 | |
169 | /// Local query method delegates up to T which *must* implement this! |
170 | const TargetLoweringBase *getTLI() const { |
171 | return static_cast<const T *>(this)->getTLI(); |
172 | } |
173 | |
174 | static ISD::MemIndexedMode getISDIndexedMode(TTI::MemIndexedMode M) { |
175 | switch (M) { |
176 | case TTI::MIM_Unindexed: |
177 | return ISD::UNINDEXED; |
178 | case TTI::MIM_PreInc: |
179 | return ISD::PRE_INC; |
180 | case TTI::MIM_PreDec: |
181 | return ISD::PRE_DEC; |
182 | case TTI::MIM_PostInc: |
183 | return ISD::POST_INC; |
184 | case TTI::MIM_PostDec: |
185 | return ISD::POST_DEC; |
186 | } |
187 | llvm_unreachable("Unexpected MemIndexedMode")::llvm::llvm_unreachable_internal("Unexpected MemIndexedMode" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 187); |
188 | } |
189 | |
190 | protected: |
191 | explicit BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL) |
192 | : BaseT(DL) {} |
193 | virtual ~BasicTTIImplBase() = default; |
194 | |
195 | using TargetTransformInfoImplBase::DL; |
196 | |
197 | public: |
198 | /// \name Scalar TTI Implementations |
199 | /// @{ |
200 | bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, |
201 | unsigned AddressSpace, unsigned Alignment, |
202 | bool *Fast) const { |
203 | EVT E = EVT::getIntegerVT(Context, BitWidth); |
204 | return getTLI()->allowsMisalignedMemoryAccesses( |
205 | E, AddressSpace, Alignment, MachineMemOperand::MONone, Fast); |
206 | } |
207 | |
208 | bool hasBranchDivergence() { return false; } |
209 | |
210 | bool useGPUDivergenceAnalysis() { return false; } |
211 | |
212 | bool isSourceOfDivergence(const Value *V) { return false; } |
213 | |
214 | bool isAlwaysUniform(const Value *V) { return false; } |
215 | |
216 | unsigned getFlatAddressSpace() { |
217 | // Return an invalid address space. |
218 | return -1; |
219 | } |
220 | |
221 | bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes, |
222 | Intrinsic::ID IID) const { |
223 | return false; |
224 | } |
225 | |
226 | bool rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, |
227 | Value *OldV, Value *NewV) const { |
228 | return false; |
229 | } |
230 | |
231 | bool isLegalAddImmediate(int64_t imm) { |
232 | return getTLI()->isLegalAddImmediate(imm); |
233 | } |
234 | |
235 | bool isLegalICmpImmediate(int64_t imm) { |
236 | return getTLI()->isLegalICmpImmediate(imm); |
237 | } |
238 | |
239 | bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, |
240 | bool HasBaseReg, int64_t Scale, |
241 | unsigned AddrSpace, Instruction *I = nullptr) { |
242 | TargetLoweringBase::AddrMode AM; |
243 | AM.BaseGV = BaseGV; |
244 | AM.BaseOffs = BaseOffset; |
245 | AM.HasBaseReg = HasBaseReg; |
246 | AM.Scale = Scale; |
247 | return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace, I); |
248 | } |
249 | |
250 | bool isIndexedLoadLegal(TTI::MemIndexedMode M, Type *Ty, |
251 | const DataLayout &DL) const { |
252 | EVT VT = getTLI()->getValueType(DL, Ty); |
253 | return getTLI()->isIndexedLoadLegal(getISDIndexedMode(M), VT); |
254 | } |
255 | |
256 | bool isIndexedStoreLegal(TTI::MemIndexedMode M, Type *Ty, |
257 | const DataLayout &DL) const { |
258 | EVT VT = getTLI()->getValueType(DL, Ty); |
259 | return getTLI()->isIndexedStoreLegal(getISDIndexedMode(M), VT); |
260 | } |
261 | |
262 | bool isLSRCostLess(TTI::LSRCost C1, TTI::LSRCost C2) { |
263 | return TargetTransformInfoImplBase::isLSRCostLess(C1, C2); |
264 | } |
265 | |
266 | int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, |
267 | bool HasBaseReg, int64_t Scale, unsigned AddrSpace) { |
268 | TargetLoweringBase::AddrMode AM; |
269 | AM.BaseGV = BaseGV; |
270 | AM.BaseOffs = BaseOffset; |
271 | AM.HasBaseReg = HasBaseReg; |
272 | AM.Scale = Scale; |
273 | return getTLI()->getScalingFactorCost(DL, AM, Ty, AddrSpace); |
274 | } |
275 | |
276 | bool isTruncateFree(Type *Ty1, Type *Ty2) { |
277 | return getTLI()->isTruncateFree(Ty1, Ty2); |
278 | } |
279 | |
280 | bool isProfitableToHoist(Instruction *I) { |
281 | return getTLI()->isProfitableToHoist(I); |
282 | } |
283 | |
284 | bool useAA() const { return getST()->useAA(); } |
285 | |
286 | bool isTypeLegal(Type *Ty) { |
287 | EVT VT = getTLI()->getValueType(DL, Ty); |
288 | return getTLI()->isTypeLegal(VT); |
289 | } |
290 | |
291 | int getGEPCost(Type *PointeeType, const Value *Ptr, |
292 | ArrayRef<const Value *> Operands) { |
293 | return BaseT::getGEPCost(PointeeType, Ptr, Operands); |
294 | } |
295 | |
296 | int getExtCost(const Instruction *I, const Value *Src) { |
297 | if (getTLI()->isExtFree(I)) |
298 | return TargetTransformInfo::TCC_Free; |
299 | |
300 | if (isa<ZExtInst>(I) || isa<SExtInst>(I)) |
301 | if (const LoadInst *LI = dyn_cast<LoadInst>(Src)) |
302 | if (getTLI()->isExtLoad(LI, I, DL)) |
303 | return TargetTransformInfo::TCC_Free; |
304 | |
305 | return TargetTransformInfo::TCC_Basic; |
306 | } |
307 | |
308 | unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, |
309 | ArrayRef<const Value *> Arguments, const User *U) { |
310 | return BaseT::getIntrinsicCost(IID, RetTy, Arguments, U); |
311 | } |
312 | |
313 | unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, |
314 | ArrayRef<Type *> ParamTys, const User *U) { |
315 | if (IID == Intrinsic::cttz) { |
316 | if (getTLI()->isCheapToSpeculateCttz()) |
317 | return TargetTransformInfo::TCC_Basic; |
318 | return TargetTransformInfo::TCC_Expensive; |
319 | } |
320 | |
321 | if (IID == Intrinsic::ctlz) { |
322 | if (getTLI()->isCheapToSpeculateCtlz()) |
323 | return TargetTransformInfo::TCC_Basic; |
324 | return TargetTransformInfo::TCC_Expensive; |
325 | } |
326 | |
327 | return BaseT::getIntrinsicCost(IID, RetTy, ParamTys, U); |
328 | } |
329 | |
330 | unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, |
331 | unsigned &JumpTableSize, |
332 | ProfileSummaryInfo *PSI, |
333 | BlockFrequencyInfo *BFI) { |
334 | /// Try to find the estimated number of clusters. Note that the number of |
335 | /// clusters identified in this function could be different from the actual |
336 | /// numbers found in lowering. This function ignore switches that are |
337 | /// lowered with a mix of jump table / bit test / BTree. This function was |
338 | /// initially intended to be used when estimating the cost of switch in |
339 | /// inline cost heuristic, but it's a generic cost model to be used in other |
340 | /// places (e.g., in loop unrolling). |
341 | unsigned N = SI.getNumCases(); |
342 | const TargetLoweringBase *TLI = getTLI(); |
343 | const DataLayout &DL = this->getDataLayout(); |
344 | |
345 | JumpTableSize = 0; |
346 | bool IsJTAllowed = TLI->areJTsAllowed(SI.getParent()->getParent()); |
347 | |
348 | // Early exit if both a jump table and bit test are not allowed. |
349 | if (N < 1 || (!IsJTAllowed && DL.getIndexSizeInBits(0u) < N)) |
350 | return N; |
351 | |
352 | APInt MaxCaseVal = SI.case_begin()->getCaseValue()->getValue(); |
353 | APInt MinCaseVal = MaxCaseVal; |
354 | for (auto CI : SI.cases()) { |
355 | const APInt &CaseVal = CI.getCaseValue()->getValue(); |
356 | if (CaseVal.sgt(MaxCaseVal)) |
357 | MaxCaseVal = CaseVal; |
358 | if (CaseVal.slt(MinCaseVal)) |
359 | MinCaseVal = CaseVal; |
360 | } |
361 | |
362 | // Check if suitable for a bit test |
363 | if (N <= DL.getIndexSizeInBits(0u)) { |
364 | SmallPtrSet<const BasicBlock *, 4> Dests; |
365 | for (auto I : SI.cases()) |
366 | Dests.insert(I.getCaseSuccessor()); |
367 | |
368 | if (TLI->isSuitableForBitTests(Dests.size(), N, MinCaseVal, MaxCaseVal, |
369 | DL)) |
370 | return 1; |
371 | } |
372 | |
373 | // Check if suitable for a jump table. |
374 | if (IsJTAllowed) { |
375 | if (N < 2 || N < TLI->getMinimumJumpTableEntries()) |
376 | return N; |
377 | uint64_t Range = |
378 | (MaxCaseVal - MinCaseVal) |
379 | .getLimitedValue(std::numeric_limits<uint64_t>::max() - 1) + 1; |
380 | // Check whether a range of clusters is dense enough for a jump table |
381 | if (TLI->isSuitableForJumpTable(&SI, N, Range, PSI, BFI)) { |
382 | JumpTableSize = Range; |
383 | return 1; |
384 | } |
385 | } |
386 | return N; |
387 | } |
388 | |
389 | bool shouldBuildLookupTables() { |
390 | const TargetLoweringBase *TLI = getTLI(); |
391 | return TLI->isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || |
392 | TLI->isOperationLegalOrCustom(ISD::BRIND, MVT::Other); |
393 | } |
394 | |
395 | bool haveFastSqrt(Type *Ty) { |
396 | const TargetLoweringBase *TLI = getTLI(); |
397 | EVT VT = TLI->getValueType(DL, Ty); |
398 | return TLI->isTypeLegal(VT) && |
399 | TLI->isOperationLegalOrCustom(ISD::FSQRT, VT); |
400 | } |
401 | |
402 | bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) { |
403 | return true; |
404 | } |
405 | |
406 | unsigned getFPOpCost(Type *Ty) { |
407 | // Check whether FADD is available, as a proxy for floating-point in |
408 | // general. |
409 | const TargetLoweringBase *TLI = getTLI(); |
410 | EVT VT = TLI->getValueType(DL, Ty); |
411 | if (TLI->isOperationLegalOrCustomOrPromote(ISD::FADD, VT)) |
412 | return TargetTransformInfo::TCC_Basic; |
413 | return TargetTransformInfo::TCC_Expensive; |
414 | } |
415 | |
416 | unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) { |
417 | const TargetLoweringBase *TLI = getTLI(); |
418 | switch (Opcode) { |
419 | default: break; |
420 | case Instruction::Trunc: |
421 | if (TLI->isTruncateFree(OpTy, Ty)) |
422 | return TargetTransformInfo::TCC_Free; |
423 | return TargetTransformInfo::TCC_Basic; |
424 | case Instruction::ZExt: |
425 | if (TLI->isZExtFree(OpTy, Ty)) |
426 | return TargetTransformInfo::TCC_Free; |
427 | return TargetTransformInfo::TCC_Basic; |
428 | |
429 | case Instruction::AddrSpaceCast: |
430 | if (TLI->isFreeAddrSpaceCast(OpTy->getPointerAddressSpace(), |
431 | Ty->getPointerAddressSpace())) |
432 | return TargetTransformInfo::TCC_Free; |
433 | return TargetTransformInfo::TCC_Basic; |
434 | } |
435 | |
436 | return BaseT::getOperationCost(Opcode, Ty, OpTy); |
437 | } |
438 | |
439 | unsigned getInliningThresholdMultiplier() { return 1; } |
440 | |
441 | int getInlinerVectorBonusPercent() { return 150; } |
442 | |
443 | void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, |
444 | TTI::UnrollingPreferences &UP) { |
445 | // This unrolling functionality is target independent, but to provide some |
446 | // motivation for its intended use, for x86: |
447 | |
448 | // According to the Intel 64 and IA-32 Architectures Optimization Reference |
449 | // Manual, Intel Core models and later have a loop stream detector (and |
450 | // associated uop queue) that can benefit from partial unrolling. |
451 | // The relevant requirements are: |
452 | // - The loop must have no more than 4 (8 for Nehalem and later) branches |
453 | // taken, and none of them may be calls. |
454 | // - The loop can have no more than 18 (28 for Nehalem and later) uops. |
455 | |
456 | // According to the Software Optimization Guide for AMD Family 15h |
457 | // Processors, models 30h-4fh (Steamroller and later) have a loop predictor |
458 | // and loop buffer which can benefit from partial unrolling. |
459 | // The relevant requirements are: |
460 | // - The loop must have fewer than 16 branches |
461 | // - The loop must have less than 40 uops in all executed loop branches |
462 | |
463 | // The number of taken branches in a loop is hard to estimate here, and |
464 | // benchmarking has revealed that it is better not to be conservative when |
465 | // estimating the branch count. As a result, we'll ignore the branch limits |
466 | // until someone finds a case where it matters in practice. |
467 | |
468 | unsigned MaxOps; |
469 | const TargetSubtargetInfo *ST = getST(); |
470 | if (PartialUnrollingThreshold.getNumOccurrences() > 0) |
471 | MaxOps = PartialUnrollingThreshold; |
472 | else if (ST->getSchedModel().LoopMicroOpBufferSize > 0) |
473 | MaxOps = ST->getSchedModel().LoopMicroOpBufferSize; |
474 | else |
475 | return; |
476 | |
477 | // Scan the loop: don't unroll loops with calls. |
478 | for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); I != E; |
479 | ++I) { |
480 | BasicBlock *BB = *I; |
481 | |
482 | for (BasicBlock::iterator J = BB->begin(), JE = BB->end(); J != JE; ++J) |
483 | if (isa<CallInst>(J) || isa<InvokeInst>(J)) { |
484 | ImmutableCallSite CS(&*J); |
485 | if (const Function *F = CS.getCalledFunction()) { |
486 | if (!static_cast<T *>(this)->isLoweredToCall(F)) |
487 | continue; |
488 | } |
489 | |
490 | return; |
491 | } |
492 | } |
493 | |
494 | // Enable runtime and partial unrolling up to the specified size. |
495 | // Enable using trip count upper bound to unroll loops. |
496 | UP.Partial = UP.Runtime = UP.UpperBound = true; |
497 | UP.PartialThreshold = MaxOps; |
498 | |
499 | // Avoid unrolling when optimizing for size. |
500 | UP.OptSizeThreshold = 0; |
501 | UP.PartialOptSizeThreshold = 0; |
502 | |
503 | // Set number of instructions optimized when "back edge" |
504 | // becomes "fall through" to default value of 2. |
505 | UP.BEInsns = 2; |
506 | } |
507 | |
508 | bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, |
509 | AssumptionCache &AC, |
510 | TargetLibraryInfo *LibInfo, |
511 | HardwareLoopInfo &HWLoopInfo) { |
512 | return BaseT::isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo); |
513 | } |
514 | |
515 | bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE, |
516 | AssumptionCache &AC, TargetLibraryInfo *TLI, |
517 | DominatorTree *DT, |
518 | const LoopAccessInfo *LAI) { |
519 | return BaseT::preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LAI); |
520 | } |
521 | |
522 | int getInstructionLatency(const Instruction *I) { |
523 | if (isa<LoadInst>(I)) |
524 | return getST()->getSchedModel().DefaultLoadLatency; |
525 | |
526 | return BaseT::getInstructionLatency(I); |
527 | } |
528 | |
529 | virtual Optional<unsigned> |
530 | getCacheSize(TargetTransformInfo::CacheLevel Level) const { |
531 | return Optional<unsigned>( |
532 | getST()->getCacheSize(static_cast<unsigned>(Level))); |
533 | } |
534 | |
535 | virtual Optional<unsigned> |
536 | getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const { |
537 | Optional<unsigned> TargetResult = |
538 | getST()->getCacheAssociativity(static_cast<unsigned>(Level)); |
539 | |
540 | if (TargetResult) |
541 | return TargetResult; |
542 | |
543 | return BaseT::getCacheAssociativity(Level); |
544 | } |
545 | |
546 | virtual unsigned getCacheLineSize() const { |
547 | return getST()->getCacheLineSize(); |
548 | } |
549 | |
550 | virtual unsigned getPrefetchDistance() const { |
551 | return getST()->getPrefetchDistance(); |
552 | } |
553 | |
554 | virtual unsigned getMinPrefetchStride() const { |
555 | return getST()->getMinPrefetchStride(); |
556 | } |
557 | |
558 | virtual unsigned getMaxPrefetchIterationsAhead() const { |
559 | return getST()->getMaxPrefetchIterationsAhead(); |
560 | } |
561 | |
562 | /// @} |
563 | |
564 | /// \name Vector TTI Implementations |
565 | /// @{ |
566 | |
567 | unsigned getRegisterBitWidth(bool Vector) const { return 32; } |
568 | |
569 | /// Estimate the overhead of scalarizing an instruction. Insert and Extract |
570 | /// are set if the result needs to be inserted and/or extracted from vectors. |
571 | unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) { |
572 | assert(Ty->isVectorTy() && "Can only scalarize vectors")((Ty->isVectorTy() && "Can only scalarize vectors" ) ? static_cast<void> (0) : __assert_fail ("Ty->isVectorTy() && \"Can only scalarize vectors\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 572, __PRETTY_FUNCTION__)); |
573 | unsigned Cost = 0; |
574 | |
575 | for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) { |
576 | if (Insert) |
577 | Cost += static_cast<T *>(this) |
578 | ->getVectorInstrCost(Instruction::InsertElement, Ty, i); |
579 | if (Extract) |
580 | Cost += static_cast<T *>(this) |
581 | ->getVectorInstrCost(Instruction::ExtractElement, Ty, i); |
582 | } |
583 | |
584 | return Cost; |
585 | } |
586 | |
587 | /// Estimate the overhead of scalarizing an instructions unique |
588 | /// non-constant operands. The types of the arguments are ordinarily |
589 | /// scalar, in which case the costs are multiplied with VF. |
590 | unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args, |
591 | unsigned VF) { |
592 | unsigned Cost = 0; |
593 | SmallPtrSet<const Value*, 4> UniqueOperands; |
594 | for (const Value *A : Args) { |
595 | if (!isa<Constant>(A) && UniqueOperands.insert(A).second) { |
596 | Type *VecTy = nullptr; |
597 | if (A->getType()->isVectorTy()) { |
598 | VecTy = A->getType(); |
599 | // If A is a vector operand, VF should be 1 or correspond to A. |
600 | assert((VF == 1 || VF == VecTy->getVectorNumElements()) &&(((VF == 1 || VF == VecTy->getVectorNumElements()) && "Vector argument does not match VF") ? static_cast<void> (0) : __assert_fail ("(VF == 1 || VF == VecTy->getVectorNumElements()) && \"Vector argument does not match VF\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 601, __PRETTY_FUNCTION__)) |
601 | "Vector argument does not match VF")(((VF == 1 || VF == VecTy->getVectorNumElements()) && "Vector argument does not match VF") ? static_cast<void> (0) : __assert_fail ("(VF == 1 || VF == VecTy->getVectorNumElements()) && \"Vector argument does not match VF\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 601, __PRETTY_FUNCTION__)); |
602 | } |
603 | else |
604 | VecTy = VectorType::get(A->getType(), VF); |
605 | |
606 | Cost += getScalarizationOverhead(VecTy, false, true); |
607 | } |
608 | } |
609 | |
610 | return Cost; |
611 | } |
612 | |
613 | unsigned getScalarizationOverhead(Type *VecTy, ArrayRef<const Value *> Args) { |
614 | assert(VecTy->isVectorTy())((VecTy->isVectorTy()) ? static_cast<void> (0) : __assert_fail ("VecTy->isVectorTy()", "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 614, __PRETTY_FUNCTION__)); |
615 | |
616 | unsigned Cost = 0; |
617 | |
618 | Cost += getScalarizationOverhead(VecTy, true, false); |
619 | if (!Args.empty()) |
620 | Cost += getOperandsScalarizationOverhead(Args, |
621 | VecTy->getVectorNumElements()); |
622 | else |
623 | // When no information on arguments is provided, we add the cost |
624 | // associated with one argument as a heuristic. |
625 | Cost += getScalarizationOverhead(VecTy, false, true); |
626 | |
627 | return Cost; |
628 | } |
629 | |
630 | unsigned getMaxInterleaveFactor(unsigned VF) { return 1; } |
631 | |
632 | unsigned getArithmeticInstrCost( |
633 | unsigned Opcode, Type *Ty, |
634 | TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, |
635 | TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, |
636 | TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, |
637 | TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None, |
638 | ArrayRef<const Value *> Args = ArrayRef<const Value *>(), |
639 | const Instruction *CxtI = nullptr) { |
640 | // Check if any of the operands are vector operands. |
641 | const TargetLoweringBase *TLI = getTLI(); |
642 | int ISD = TLI->InstructionOpcodeToISD(Opcode); |
643 | assert(ISD && "Invalid opcode")((ISD && "Invalid opcode") ? static_cast<void> ( 0) : __assert_fail ("ISD && \"Invalid opcode\"", "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 643, __PRETTY_FUNCTION__)); |
644 | |
645 | std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty); |
646 | |
647 | bool IsFloat = Ty->isFPOrFPVectorTy(); |
648 | // Assume that floating point arithmetic operations cost twice as much as |
649 | // integer operations. |
650 | unsigned OpCost = (IsFloat ? 2 : 1); |
651 | |
652 | if (TLI->isOperationLegalOrPromote(ISD, LT.second)) { |
653 | // The operation is legal. Assume it costs 1. |
654 | // TODO: Once we have extract/insert subvector cost we need to use them. |
655 | return LT.first * OpCost; |
656 | } |
657 | |
658 | if (!TLI->isOperationExpand(ISD, LT.second)) { |
659 | // If the operation is custom lowered, then assume that the code is twice |
660 | // as expensive. |
661 | return LT.first * 2 * OpCost; |
662 | } |
663 | |
664 | // Else, assume that we need to scalarize this op. |
665 | // TODO: If one of the types get legalized by splitting, handle this |
666 | // similarly to what getCastInstrCost() does. |
667 | if (Ty->isVectorTy()) { |
668 | unsigned Num = Ty->getVectorNumElements(); |
669 | unsigned Cost = static_cast<T *>(this) |
670 | ->getArithmeticInstrCost(Opcode, Ty->getScalarType()); |
671 | // Return the cost of multiple scalar invocation plus the cost of |
672 | // inserting and extracting the values. |
673 | return getScalarizationOverhead(Ty, Args) + Num * Cost; |
674 | } |
675 | |
676 | // We don't know anything about this scalar instruction. |
677 | return OpCost; |
678 | } |
679 | |
680 | unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, |
681 | Type *SubTp) { |
682 | switch (Kind) { |
683 | case TTI::SK_Broadcast: |
684 | return getBroadcastShuffleOverhead(Tp); |
685 | case TTI::SK_Select: |
686 | case TTI::SK_Reverse: |
687 | case TTI::SK_Transpose: |
688 | case TTI::SK_PermuteSingleSrc: |
689 | case TTI::SK_PermuteTwoSrc: |
690 | return getPermuteShuffleOverhead(Tp); |
691 | case TTI::SK_ExtractSubvector: |
692 | return getExtractSubvectorOverhead(Tp, Index, SubTp); |
693 | case TTI::SK_InsertSubvector: |
694 | return getInsertSubvectorOverhead(Tp, Index, SubTp); |
695 | } |
696 | llvm_unreachable("Unknown TTI::ShuffleKind")::llvm::llvm_unreachable_internal("Unknown TTI::ShuffleKind", "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 696); |
697 | } |
698 | |
699 | unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, |
700 | const Instruction *I = nullptr) { |
701 | const TargetLoweringBase *TLI = getTLI(); |
702 | int ISD = TLI->InstructionOpcodeToISD(Opcode); |
703 | assert(ISD && "Invalid opcode")((ISD && "Invalid opcode") ? static_cast<void> ( 0) : __assert_fail ("ISD && \"Invalid opcode\"", "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 703, __PRETTY_FUNCTION__)); |
704 | std::pair<unsigned, MVT> SrcLT = TLI->getTypeLegalizationCost(DL, Src); |
705 | std::pair<unsigned, MVT> DstLT = TLI->getTypeLegalizationCost(DL, Dst); |
706 | |
707 | // Check for NOOP conversions. |
708 | if (SrcLT.first == DstLT.first && |
709 | SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) { |
710 | |
711 | // Bitcast between types that are legalized to the same type are free. |
712 | if (Opcode == Instruction::BitCast || Opcode == Instruction::Trunc) |
713 | return 0; |
714 | } |
715 | |
716 | if (Opcode == Instruction::Trunc && |
717 | TLI->isTruncateFree(SrcLT.second, DstLT.second)) |
718 | return 0; |
719 | |
720 | if (Opcode == Instruction::ZExt && |
721 | TLI->isZExtFree(SrcLT.second, DstLT.second)) |
722 | return 0; |
723 | |
724 | if (Opcode == Instruction::AddrSpaceCast && |
725 | TLI->isFreeAddrSpaceCast(Src->getPointerAddressSpace(), |
726 | Dst->getPointerAddressSpace())) |
727 | return 0; |
728 | |
729 | // If this is a zext/sext of a load, return 0 if the corresponding |
730 | // extending load exists on target. |
731 | if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) && |
732 | I && isa<LoadInst>(I->getOperand(0))) { |
733 | EVT ExtVT = EVT::getEVT(Dst); |
734 | EVT LoadVT = EVT::getEVT(Src); |
735 | unsigned LType = |
736 | ((Opcode == Instruction::ZExt) ? ISD::ZEXTLOAD : ISD::SEXTLOAD); |
737 | if (TLI->isLoadExtLegal(LType, ExtVT, LoadVT)) |
738 | return 0; |
739 | } |
740 | |
741 | // If the cast is marked as legal (or promote) then assume low cost. |
742 | if (SrcLT.first == DstLT.first && |
743 | TLI->isOperationLegalOrPromote(ISD, DstLT.second)) |
744 | return 1; |
745 | |
746 | // Handle scalar conversions. |
747 | if (!Src->isVectorTy() && !Dst->isVectorTy()) { |
748 | // Scalar bitcasts are usually free. |
749 | if (Opcode == Instruction::BitCast) |
750 | return 0; |
751 | |
752 | // Just check the op cost. If the operation is legal then assume it costs |
753 | // 1. |
754 | if (!TLI->isOperationExpand(ISD, DstLT.second)) |
755 | return 1; |
756 | |
757 | // Assume that illegal scalar instruction are expensive. |
758 | return 4; |
759 | } |
760 | |
761 | // Check vector-to-vector casts. |
762 | if (Dst->isVectorTy() && Src->isVectorTy()) { |
763 | // If the cast is between same-sized registers, then the check is simple. |
764 | if (SrcLT.first == DstLT.first && |
765 | SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) { |
766 | |
767 | // Assume that Zext is done using AND. |
768 | if (Opcode == Instruction::ZExt) |
769 | return 1; |
770 | |
771 | // Assume that sext is done using SHL and SRA. |
772 | if (Opcode == Instruction::SExt) |
773 | return 2; |
774 | |
775 | // Just check the op cost. If the operation is legal then assume it |
776 | // costs |
777 | // 1 and multiply by the type-legalization overhead. |
778 | if (!TLI->isOperationExpand(ISD, DstLT.second)) |
779 | return SrcLT.first * 1; |
780 | } |
781 | |
782 | // If we are legalizing by splitting, query the concrete TTI for the cost |
783 | // of casting the original vector twice. We also need to factor in the |
784 | // cost of the split itself. Count that as 1, to be consistent with |
785 | // TLI->getTypeLegalizationCost(). |
786 | if ((TLI->getTypeAction(Src->getContext(), TLI->getValueType(DL, Src)) == |
787 | TargetLowering::TypeSplitVector || |
788 | TLI->getTypeAction(Dst->getContext(), TLI->getValueType(DL, Dst)) == |
789 | TargetLowering::TypeSplitVector) && |
790 | Src->getVectorNumElements() > 1 && Dst->getVectorNumElements() > 1) { |
791 | Type *SplitDst = VectorType::get(Dst->getVectorElementType(), |
792 | Dst->getVectorNumElements() / 2); |
793 | Type *SplitSrc = VectorType::get(Src->getVectorElementType(), |
794 | Src->getVectorNumElements() / 2); |
795 | T *TTI = static_cast<T *>(this); |
796 | return TTI->getVectorSplitCost() + |
797 | (2 * TTI->getCastInstrCost(Opcode, SplitDst, SplitSrc, I)); |
798 | } |
799 | |
800 | // In other cases where the source or destination are illegal, assume |
801 | // the operation will get scalarized. |
802 | unsigned Num = Dst->getVectorNumElements(); |
803 | unsigned Cost = static_cast<T *>(this)->getCastInstrCost( |
804 | Opcode, Dst->getScalarType(), Src->getScalarType(), I); |
805 | |
806 | // Return the cost of multiple scalar invocation plus the cost of |
807 | // inserting and extracting the values. |
808 | return getScalarizationOverhead(Dst, true, true) + Num * Cost; |
809 | } |
810 | |
811 | // We already handled vector-to-vector and scalar-to-scalar conversions. |
812 | // This |
813 | // is where we handle bitcast between vectors and scalars. We need to assume |
814 | // that the conversion is scalarized in one way or another. |
815 | if (Opcode == Instruction::BitCast) |
816 | // Illegal bitcasts are done by storing and loading from a stack slot. |
817 | return (Src->isVectorTy() ? getScalarizationOverhead(Src, false, true) |
818 | : 0) + |
819 | (Dst->isVectorTy() ? getScalarizationOverhead(Dst, true, false) |
820 | : 0); |
821 | |
822 | llvm_unreachable("Unhandled cast")::llvm::llvm_unreachable_internal("Unhandled cast", "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 822); |
823 | } |
824 | |
825 | unsigned getExtractWithExtendCost(unsigned Opcode, Type *Dst, |
826 | VectorType *VecTy, unsigned Index) { |
827 | return static_cast<T *>(this)->getVectorInstrCost( |
828 | Instruction::ExtractElement, VecTy, Index) + |
829 | static_cast<T *>(this)->getCastInstrCost(Opcode, Dst, |
830 | VecTy->getElementType()); |
831 | } |
832 | |
833 | unsigned getCFInstrCost(unsigned Opcode) { |
834 | // Branches are assumed to be predicted. |
835 | return 0; |
836 | } |
837 | |
838 | unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, |
839 | const Instruction *I) { |
840 | const TargetLoweringBase *TLI = getTLI(); |
841 | int ISD = TLI->InstructionOpcodeToISD(Opcode); |
842 | assert(ISD && "Invalid opcode")((ISD && "Invalid opcode") ? static_cast<void> ( 0) : __assert_fail ("ISD && \"Invalid opcode\"", "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 842, __PRETTY_FUNCTION__)); |
843 | |
844 | // Selects on vectors are actually vector selects. |
845 | if (ISD == ISD::SELECT) { |
846 | assert(CondTy && "CondTy must exist")((CondTy && "CondTy must exist") ? static_cast<void > (0) : __assert_fail ("CondTy && \"CondTy must exist\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 846, __PRETTY_FUNCTION__)); |
847 | if (CondTy->isVectorTy()) |
848 | ISD = ISD::VSELECT; |
849 | } |
850 | std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy); |
851 | |
852 | if (!(ValTy->isVectorTy() && !LT.second.isVector()) && |
853 | !TLI->isOperationExpand(ISD, LT.second)) { |
854 | // The operation is legal. Assume it costs 1. Multiply |
855 | // by the type-legalization overhead. |
856 | return LT.first * 1; |
857 | } |
858 | |
859 | // Otherwise, assume that the cast is scalarized. |
860 | // TODO: If one of the types get legalized by splitting, handle this |
861 | // similarly to what getCastInstrCost() does. |
862 | if (ValTy->isVectorTy()) { |
863 | unsigned Num = ValTy->getVectorNumElements(); |
864 | if (CondTy) |
865 | CondTy = CondTy->getScalarType(); |
866 | unsigned Cost = static_cast<T *>(this)->getCmpSelInstrCost( |
867 | Opcode, ValTy->getScalarType(), CondTy, I); |
868 | |
869 | // Return the cost of multiple scalar invocation plus the cost of |
870 | // inserting and extracting the values. |
871 | return getScalarizationOverhead(ValTy, true, false) + Num * Cost; |
872 | } |
873 | |
874 | // Unknown scalar opcode. |
875 | return 1; |
876 | } |
877 | |
878 | unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) { |
879 | std::pair<unsigned, MVT> LT = |
880 | getTLI()->getTypeLegalizationCost(DL, Val->getScalarType()); |
881 | |
882 | return LT.first; |
883 | } |
884 | |
885 | unsigned getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, |
886 | unsigned AddressSpace, |
887 | const Instruction *I = nullptr) { |
888 | assert(!Src->isVoidTy() && "Invalid type")((!Src->isVoidTy() && "Invalid type") ? static_cast <void> (0) : __assert_fail ("!Src->isVoidTy() && \"Invalid type\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 888, __PRETTY_FUNCTION__)); |
889 | std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Src); |
890 | |
891 | // Assuming that all loads of legal types cost 1. |
892 | unsigned Cost = LT.first; |
893 | |
894 | if (Src->isVectorTy() && |
895 | Src->getPrimitiveSizeInBits() < LT.second.getSizeInBits()) { |
896 | // This is a vector load that legalizes to a larger type than the vector |
897 | // itself. Unless the corresponding extending load or truncating store is |
898 | // legal, then this will scalarize. |
899 | TargetLowering::LegalizeAction LA = TargetLowering::Expand; |
900 | EVT MemVT = getTLI()->getValueType(DL, Src); |
901 | if (Opcode == Instruction::Store) |
902 | LA = getTLI()->getTruncStoreAction(LT.second, MemVT); |
903 | else |
904 | LA = getTLI()->getLoadExtAction(ISD::EXTLOAD, LT.second, MemVT); |
905 | |
906 | if (LA != TargetLowering::Legal && LA != TargetLowering::Custom) { |
907 | // This is a vector load/store for some illegal type that is scalarized. |
908 | // We must account for the cost of building or decomposing the vector. |
909 | Cost += getScalarizationOverhead(Src, Opcode != Instruction::Store, |
910 | Opcode == Instruction::Store); |
911 | } |
912 | } |
913 | |
914 | return Cost; |
915 | } |
916 | |
917 | unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, |
918 | unsigned Factor, |
919 | ArrayRef<unsigned> Indices, |
920 | unsigned Alignment, unsigned AddressSpace, |
921 | bool UseMaskForCond = false, |
922 | bool UseMaskForGaps = false) { |
923 | VectorType *VT = dyn_cast<VectorType>(VecTy); |
924 | assert(VT && "Expect a vector type for interleaved memory op")((VT && "Expect a vector type for interleaved memory op" ) ? static_cast<void> (0) : __assert_fail ("VT && \"Expect a vector type for interleaved memory op\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 924, __PRETTY_FUNCTION__)); |
925 | |
926 | unsigned NumElts = VT->getNumElements(); |
927 | assert(Factor > 1 && NumElts % Factor == 0 && "Invalid interleave factor")((Factor > 1 && NumElts % Factor == 0 && "Invalid interleave factor" ) ? static_cast<void> (0) : __assert_fail ("Factor > 1 && NumElts % Factor == 0 && \"Invalid interleave factor\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 927, __PRETTY_FUNCTION__)); |
928 | |
929 | unsigned NumSubElts = NumElts / Factor; |
930 | VectorType *SubVT = VectorType::get(VT->getElementType(), NumSubElts); |
931 | |
932 | // Firstly, the cost of load/store operation. |
933 | unsigned Cost; |
934 | if (UseMaskForCond || UseMaskForGaps) |
935 | Cost = static_cast<T *>(this)->getMaskedMemoryOpCost( |
936 | Opcode, VecTy, Alignment, AddressSpace); |
937 | else |
938 | Cost = static_cast<T *>(this)->getMemoryOpCost( |
939 | Opcode, VecTy, MaybeAlign(Alignment), AddressSpace); |
940 | |
941 | // Legalize the vector type, and get the legalized and unlegalized type |
942 | // sizes. |
943 | MVT VecTyLT = getTLI()->getTypeLegalizationCost(DL, VecTy).second; |
944 | unsigned VecTySize = |
945 | static_cast<T *>(this)->getDataLayout().getTypeStoreSize(VecTy); |
946 | unsigned VecTyLTSize = VecTyLT.getStoreSize(); |
947 | |
948 | // Return the ceiling of dividing A by B. |
949 | auto ceil = [](unsigned A, unsigned B) { return (A + B - 1) / B; }; |
950 | |
951 | // Scale the cost of the memory operation by the fraction of legalized |
952 | // instructions that will actually be used. We shouldn't account for the |
953 | // cost of dead instructions since they will be removed. |
954 | // |
955 | // E.g., An interleaved load of factor 8: |
956 | // %vec = load <16 x i64>, <16 x i64>* %ptr |
957 | // %v0 = shufflevector %vec, undef, <0, 8> |
958 | // |
959 | // If <16 x i64> is legalized to 8 v2i64 loads, only 2 of the loads will be |
960 | // used (those corresponding to elements [0:1] and [8:9] of the unlegalized |
961 | // type). The other loads are unused. |
962 | // |
963 | // We only scale the cost of loads since interleaved store groups aren't |
964 | // allowed to have gaps. |
965 | if (Opcode == Instruction::Load && VecTySize > VecTyLTSize) { |
966 | // The number of loads of a legal type it will take to represent a load |
967 | // of the unlegalized vector type. |
968 | unsigned NumLegalInsts = ceil(VecTySize, VecTyLTSize); |
969 | |
970 | // The number of elements of the unlegalized type that correspond to a |
971 | // single legal instruction. |
972 | unsigned NumEltsPerLegalInst = ceil(NumElts, NumLegalInsts); |
973 | |
974 | // Determine which legal instructions will be used. |
975 | BitVector UsedInsts(NumLegalInsts, false); |
976 | for (unsigned Index : Indices) |
977 | for (unsigned Elt = 0; Elt < NumSubElts; ++Elt) |
978 | UsedInsts.set((Index + Elt * Factor) / NumEltsPerLegalInst); |
979 | |
980 | // Scale the cost of the load by the fraction of legal instructions that |
981 | // will be used. |
982 | Cost *= UsedInsts.count() / NumLegalInsts; |
983 | } |
984 | |
985 | // Then plus the cost of interleave operation. |
986 | if (Opcode == Instruction::Load) { |
987 | // The interleave cost is similar to extract sub vectors' elements |
988 | // from the wide vector, and insert them into sub vectors. |
989 | // |
990 | // E.g. An interleaved load of factor 2 (with one member of index 0): |
991 | // %vec = load <8 x i32>, <8 x i32>* %ptr |
992 | // %v0 = shuffle %vec, undef, <0, 2, 4, 6> ; Index 0 |
993 | // The cost is estimated as extract elements at 0, 2, 4, 6 from the |
994 | // <8 x i32> vector and insert them into a <4 x i32> vector. |
995 | |
996 | assert(Indices.size() <= Factor &&((Indices.size() <= Factor && "Interleaved memory op has too many members" ) ? static_cast<void> (0) : __assert_fail ("Indices.size() <= Factor && \"Interleaved memory op has too many members\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 997, __PRETTY_FUNCTION__)) |
997 | "Interleaved memory op has too many members")((Indices.size() <= Factor && "Interleaved memory op has too many members" ) ? static_cast<void> (0) : __assert_fail ("Indices.size() <= Factor && \"Interleaved memory op has too many members\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 997, __PRETTY_FUNCTION__)); |
998 | |
999 | for (unsigned Index : Indices) { |
1000 | assert(Index < Factor && "Invalid index for interleaved memory op")((Index < Factor && "Invalid index for interleaved memory op" ) ? static_cast<void> (0) : __assert_fail ("Index < Factor && \"Invalid index for interleaved memory op\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 1000, __PRETTY_FUNCTION__)); |
1001 | |
1002 | // Extract elements from loaded vector for each sub vector. |
1003 | for (unsigned i = 0; i < NumSubElts; i++) |
1004 | Cost += static_cast<T *>(this)->getVectorInstrCost( |
1005 | Instruction::ExtractElement, VT, Index + i * Factor); |
1006 | } |
1007 | |
1008 | unsigned InsSubCost = 0; |
1009 | for (unsigned i = 0; i < NumSubElts; i++) |
1010 | InsSubCost += static_cast<T *>(this)->getVectorInstrCost( |
1011 | Instruction::InsertElement, SubVT, i); |
1012 | |
1013 | Cost += Indices.size() * InsSubCost; |
1014 | } else { |
1015 | // The interleave cost is extract all elements from sub vectors, and |
1016 | // insert them into the wide vector. |
1017 | // |
1018 | // E.g. An interleaved store of factor 2: |
1019 | // %v0_v1 = shuffle %v0, %v1, <0, 4, 1, 5, 2, 6, 3, 7> |
1020 | // store <8 x i32> %interleaved.vec, <8 x i32>* %ptr |
1021 | // The cost is estimated as extract all elements from both <4 x i32> |
1022 | // vectors and insert into the <8 x i32> vector. |
1023 | |
1024 | unsigned ExtSubCost = 0; |
1025 | for (unsigned i = 0; i < NumSubElts; i++) |
1026 | ExtSubCost += static_cast<T *>(this)->getVectorInstrCost( |
1027 | Instruction::ExtractElement, SubVT, i); |
1028 | Cost += ExtSubCost * Factor; |
1029 | |
1030 | for (unsigned i = 0; i < NumElts; i++) |
1031 | Cost += static_cast<T *>(this) |
1032 | ->getVectorInstrCost(Instruction::InsertElement, VT, i); |
1033 | } |
1034 | |
1035 | if (!UseMaskForCond) |
1036 | return Cost; |
1037 | |
1038 | Type *I8Type = Type::getInt8Ty(VT->getContext()); |
1039 | VectorType *MaskVT = VectorType::get(I8Type, NumElts); |
1040 | SubVT = VectorType::get(I8Type, NumSubElts); |
1041 | |
1042 | // The Mask shuffling cost is extract all the elements of the Mask |
1043 | // and insert each of them Factor times into the wide vector: |
1044 | // |
1045 | // E.g. an interleaved group with factor 3: |
1046 | // %mask = icmp ult <8 x i32> %vec1, %vec2 |
1047 | // %interleaved.mask = shufflevector <8 x i1> %mask, <8 x i1> undef, |
1048 | // <24 x i32> <0,0,0,1,1,1,2,2,2,3,3,3,4,4,4,5,5,5,6,6,6,7,7,7> |
1049 | // The cost is estimated as extract all mask elements from the <8xi1> mask |
1050 | // vector and insert them factor times into the <24xi1> shuffled mask |
1051 | // vector. |
1052 | for (unsigned i = 0; i < NumSubElts; i++) |
1053 | Cost += static_cast<T *>(this)->getVectorInstrCost( |
1054 | Instruction::ExtractElement, SubVT, i); |
1055 | |
1056 | for (unsigned i = 0; i < NumElts; i++) |
1057 | Cost += static_cast<T *>(this)->getVectorInstrCost( |
1058 | Instruction::InsertElement, MaskVT, i); |
1059 | |
1060 | // The Gaps mask is invariant and created outside the loop, therefore the |
1061 | // cost of creating it is not accounted for here. However if we have both |
1062 | // a MaskForGaps and some other mask that guards the execution of the |
1063 | // memory access, we need to account for the cost of And-ing the two masks |
1064 | // inside the loop. |
1065 | if (UseMaskForGaps) |
1066 | Cost += static_cast<T *>(this)->getArithmeticInstrCost( |
1067 | BinaryOperator::And, MaskVT); |
1068 | |
1069 | return Cost; |
1070 | } |
1071 | |
1072 | /// Get intrinsic cost based on arguments. |
1073 | unsigned getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, |
1074 | ArrayRef<Value *> Args, FastMathFlags FMF, |
1075 | unsigned VF = 1) { |
1076 | unsigned RetVF = (RetTy->isVectorTy() ? RetTy->getVectorNumElements() : 1); |
1077 | assert((RetVF == 1 || VF == 1) && "VF > 1 and RetVF is a vector type")(((RetVF == 1 || VF == 1) && "VF > 1 and RetVF is a vector type" ) ? static_cast<void> (0) : __assert_fail ("(RetVF == 1 || VF == 1) && \"VF > 1 and RetVF is a vector type\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 1077, __PRETTY_FUNCTION__)); |
1078 | auto *ConcreteTTI = static_cast<T *>(this); |
1079 | |
1080 | switch (IID) { |
1081 | default: { |
1082 | // Assume that we need to scalarize this intrinsic. |
1083 | SmallVector<Type *, 4> Types; |
1084 | for (Value *Op : Args) { |
1085 | Type *OpTy = Op->getType(); |
1086 | assert(VF == 1 || !OpTy->isVectorTy())((VF == 1 || !OpTy->isVectorTy()) ? static_cast<void> (0) : __assert_fail ("VF == 1 || !OpTy->isVectorTy()", "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 1086, __PRETTY_FUNCTION__)); |
1087 | Types.push_back(VF == 1 ? OpTy : VectorType::get(OpTy, VF)); |
1088 | } |
1089 | |
1090 | if (VF > 1 && !RetTy->isVoidTy()) |
1091 | RetTy = VectorType::get(RetTy, VF); |
1092 | |
1093 | // Compute the scalarization overhead based on Args for a vector |
1094 | // intrinsic. A vectorizer will pass a scalar RetTy and VF > 1, while |
1095 | // CostModel will pass a vector RetTy and VF is 1. |
1096 | unsigned ScalarizationCost = std::numeric_limits<unsigned>::max(); |
1097 | if (RetVF > 1 || VF > 1) { |
1098 | ScalarizationCost = 0; |
1099 | if (!RetTy->isVoidTy()) |
1100 | ScalarizationCost += getScalarizationOverhead(RetTy, true, false); |
1101 | ScalarizationCost += getOperandsScalarizationOverhead(Args, VF); |
1102 | } |
1103 | |
1104 | return ConcreteTTI->getIntrinsicInstrCost(IID, RetTy, Types, FMF, |
1105 | ScalarizationCost); |
1106 | } |
1107 | case Intrinsic::masked_scatter: { |
1108 | assert(VF == 1 && "Can't vectorize types here.")((VF == 1 && "Can't vectorize types here.") ? static_cast <void> (0) : __assert_fail ("VF == 1 && \"Can't vectorize types here.\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 1108, __PRETTY_FUNCTION__)); |
1109 | Value *Mask = Args[3]; |
1110 | bool VarMask = !isa<Constant>(Mask); |
1111 | unsigned Alignment = cast<ConstantInt>(Args[2])->getZExtValue(); |
1112 | return ConcreteTTI->getGatherScatterOpCost( |
1113 | Instruction::Store, Args[0]->getType(), Args[1], VarMask, Alignment); |
1114 | } |
1115 | case Intrinsic::masked_gather: { |
1116 | assert(VF == 1 && "Can't vectorize types here.")((VF == 1 && "Can't vectorize types here.") ? static_cast <void> (0) : __assert_fail ("VF == 1 && \"Can't vectorize types here.\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 1116, __PRETTY_FUNCTION__)); |
1117 | Value *Mask = Args[2]; |
1118 | bool VarMask = !isa<Constant>(Mask); |
1119 | unsigned Alignment = cast<ConstantInt>(Args[1])->getZExtValue(); |
1120 | return ConcreteTTI->getGatherScatterOpCost(Instruction::Load, RetTy, |
1121 | Args[0], VarMask, Alignment); |
1122 | } |
1123 | case Intrinsic::experimental_vector_reduce_add: |
1124 | case Intrinsic::experimental_vector_reduce_mul: |
1125 | case Intrinsic::experimental_vector_reduce_and: |
1126 | case Intrinsic::experimental_vector_reduce_or: |
1127 | case Intrinsic::experimental_vector_reduce_xor: |
1128 | case Intrinsic::experimental_vector_reduce_v2_fadd: |
1129 | case Intrinsic::experimental_vector_reduce_v2_fmul: |
1130 | case Intrinsic::experimental_vector_reduce_smax: |
1131 | case Intrinsic::experimental_vector_reduce_smin: |
1132 | case Intrinsic::experimental_vector_reduce_fmax: |
1133 | case Intrinsic::experimental_vector_reduce_fmin: |
1134 | case Intrinsic::experimental_vector_reduce_umax: |
1135 | case Intrinsic::experimental_vector_reduce_umin: |
1136 | return getIntrinsicInstrCost(IID, RetTy, Args[0]->getType(), FMF); |
1137 | case Intrinsic::fshl: |
1138 | case Intrinsic::fshr: { |
1139 | Value *X = Args[0]; |
1140 | Value *Y = Args[1]; |
1141 | Value *Z = Args[2]; |
1142 | TTI::OperandValueProperties OpPropsX, OpPropsY, OpPropsZ, OpPropsBW; |
1143 | TTI::OperandValueKind OpKindX = TTI::getOperandInfo(X, OpPropsX); |
1144 | TTI::OperandValueKind OpKindY = TTI::getOperandInfo(Y, OpPropsY); |
1145 | TTI::OperandValueKind OpKindZ = TTI::getOperandInfo(Z, OpPropsZ); |
1146 | TTI::OperandValueKind OpKindBW = TTI::OK_UniformConstantValue; |
1147 | OpPropsBW = isPowerOf2_32(RetTy->getScalarSizeInBits()) ? TTI::OP_PowerOf2 |
1148 | : TTI::OP_None; |
1149 | // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW))) |
1150 | // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW)) |
1151 | unsigned Cost = 0; |
1152 | Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Or, RetTy); |
1153 | Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Sub, RetTy); |
1154 | Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Shl, RetTy, |
1155 | OpKindX, OpKindZ, OpPropsX); |
1156 | Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::LShr, RetTy, |
1157 | OpKindY, OpKindZ, OpPropsY); |
1158 | // Non-constant shift amounts requires a modulo. |
1159 | if (OpKindZ != TTI::OK_UniformConstantValue && |
1160 | OpKindZ != TTI::OK_NonUniformConstantValue) |
1161 | Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::URem, RetTy, |
1162 | OpKindZ, OpKindBW, OpPropsZ, |
1163 | OpPropsBW); |
1164 | // For non-rotates (X != Y) we must add shift-by-zero handling costs. |
1165 | if (X != Y) { |
1166 | Type *CondTy = RetTy->getWithNewBitWidth(1); |
1167 | Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, |
1168 | CondTy, nullptr); |
1169 | Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy, |
1170 | CondTy, nullptr); |
1171 | } |
1172 | return Cost; |
1173 | } |
1174 | } |
1175 | } |
1176 | |
1177 | /// Get intrinsic cost based on argument types. |
1178 | /// If ScalarizationCostPassed is std::numeric_limits<unsigned>::max(), the |
1179 | /// cost of scalarizing the arguments and the return value will be computed |
1180 | /// based on types. |
1181 | unsigned getIntrinsicInstrCost( |
1182 | Intrinsic::ID IID, Type *RetTy, ArrayRef<Type *> Tys, FastMathFlags FMF, |
1183 | unsigned ScalarizationCostPassed = std::numeric_limits<unsigned>::max()) { |
1184 | auto *ConcreteTTI = static_cast<T *>(this); |
1185 | |
1186 | SmallVector<unsigned, 2> ISDs; |
1187 | unsigned SingleCallCost = 10; // Library call cost. Make it expensive. |
1188 | switch (IID) { |
1189 | default: { |
1190 | // Assume that we need to scalarize this intrinsic. |
1191 | unsigned ScalarizationCost = ScalarizationCostPassed; |
1192 | unsigned ScalarCalls = 1; |
1193 | Type *ScalarRetTy = RetTy; |
1194 | if (RetTy->isVectorTy()) { |
1195 | if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max()) |
1196 | ScalarizationCost = getScalarizationOverhead(RetTy, true, false); |
1197 | ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements()); |
1198 | ScalarRetTy = RetTy->getScalarType(); |
1199 | } |
1200 | SmallVector<Type *, 4> ScalarTys; |
1201 | for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) { |
1202 | Type *Ty = Tys[i]; |
1203 | if (Ty->isVectorTy()) { |
1204 | if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max()) |
1205 | ScalarizationCost += getScalarizationOverhead(Ty, false, true); |
1206 | ScalarCalls = std::max(ScalarCalls, Ty->getVectorNumElements()); |
1207 | Ty = Ty->getScalarType(); |
1208 | } |
1209 | ScalarTys.push_back(Ty); |
1210 | } |
1211 | if (ScalarCalls == 1) |
1212 | return 1; // Return cost of a scalar intrinsic. Assume it to be cheap. |
1213 | |
1214 | unsigned ScalarCost = |
1215 | ConcreteTTI->getIntrinsicInstrCost(IID, ScalarRetTy, ScalarTys, FMF); |
1216 | |
1217 | return ScalarCalls * ScalarCost + ScalarizationCost; |
1218 | } |
1219 | // Look for intrinsics that can be lowered directly or turned into a scalar |
1220 | // intrinsic call. |
1221 | case Intrinsic::sqrt: |
1222 | ISDs.push_back(ISD::FSQRT); |
1223 | break; |
1224 | case Intrinsic::sin: |
1225 | ISDs.push_back(ISD::FSIN); |
1226 | break; |
1227 | case Intrinsic::cos: |
1228 | ISDs.push_back(ISD::FCOS); |
1229 | break; |
1230 | case Intrinsic::exp: |
1231 | ISDs.push_back(ISD::FEXP); |
1232 | break; |
1233 | case Intrinsic::exp2: |
1234 | ISDs.push_back(ISD::FEXP2); |
1235 | break; |
1236 | case Intrinsic::log: |
1237 | ISDs.push_back(ISD::FLOG); |
1238 | break; |
1239 | case Intrinsic::log10: |
1240 | ISDs.push_back(ISD::FLOG10); |
1241 | break; |
1242 | case Intrinsic::log2: |
1243 | ISDs.push_back(ISD::FLOG2); |
1244 | break; |
1245 | case Intrinsic::fabs: |
1246 | ISDs.push_back(ISD::FABS); |
1247 | break; |
1248 | case Intrinsic::canonicalize: |
1249 | ISDs.push_back(ISD::FCANONICALIZE); |
1250 | break; |
1251 | case Intrinsic::minnum: |
1252 | ISDs.push_back(ISD::FMINNUM); |
1253 | if (FMF.noNaNs()) |
1254 | ISDs.push_back(ISD::FMINIMUM); |
1255 | break; |
1256 | case Intrinsic::maxnum: |
1257 | ISDs.push_back(ISD::FMAXNUM); |
1258 | if (FMF.noNaNs()) |
1259 | ISDs.push_back(ISD::FMAXIMUM); |
1260 | break; |
1261 | case Intrinsic::copysign: |
1262 | ISDs.push_back(ISD::FCOPYSIGN); |
1263 | break; |
1264 | case Intrinsic::floor: |
1265 | ISDs.push_back(ISD::FFLOOR); |
1266 | break; |
1267 | case Intrinsic::ceil: |
1268 | ISDs.push_back(ISD::FCEIL); |
1269 | break; |
1270 | case Intrinsic::trunc: |
1271 | ISDs.push_back(ISD::FTRUNC); |
1272 | break; |
1273 | case Intrinsic::nearbyint: |
1274 | ISDs.push_back(ISD::FNEARBYINT); |
1275 | break; |
1276 | case Intrinsic::rint: |
1277 | ISDs.push_back(ISD::FRINT); |
1278 | break; |
1279 | case Intrinsic::round: |
1280 | ISDs.push_back(ISD::FROUND); |
1281 | break; |
1282 | case Intrinsic::pow: |
1283 | ISDs.push_back(ISD::FPOW); |
1284 | break; |
1285 | case Intrinsic::fma: |
1286 | ISDs.push_back(ISD::FMA); |
1287 | break; |
1288 | case Intrinsic::fmuladd: |
1289 | ISDs.push_back(ISD::FMA); |
1290 | break; |
1291 | case Intrinsic::experimental_constrained_fmuladd: |
1292 | ISDs.push_back(ISD::STRICT_FMA); |
1293 | break; |
1294 | // FIXME: We should return 0 whenever getIntrinsicCost == TCC_Free. |
1295 | case Intrinsic::lifetime_start: |
1296 | case Intrinsic::lifetime_end: |
1297 | case Intrinsic::sideeffect: |
1298 | return 0; |
1299 | case Intrinsic::masked_store: |
1300 | return ConcreteTTI->getMaskedMemoryOpCost(Instruction::Store, Tys[0], 0, |
1301 | 0); |
1302 | case Intrinsic::masked_load: |
1303 | return ConcreteTTI->getMaskedMemoryOpCost(Instruction::Load, RetTy, 0, 0); |
1304 | case Intrinsic::experimental_vector_reduce_add: |
1305 | return ConcreteTTI->getArithmeticReductionCost(Instruction::Add, Tys[0], |
1306 | /*IsPairwiseForm=*/false); |
1307 | case Intrinsic::experimental_vector_reduce_mul: |
1308 | return ConcreteTTI->getArithmeticReductionCost(Instruction::Mul, Tys[0], |
1309 | /*IsPairwiseForm=*/false); |
1310 | case Intrinsic::experimental_vector_reduce_and: |
1311 | return ConcreteTTI->getArithmeticReductionCost(Instruction::And, Tys[0], |
1312 | /*IsPairwiseForm=*/false); |
1313 | case Intrinsic::experimental_vector_reduce_or: |
1314 | return ConcreteTTI->getArithmeticReductionCost(Instruction::Or, Tys[0], |
1315 | /*IsPairwiseForm=*/false); |
1316 | case Intrinsic::experimental_vector_reduce_xor: |
1317 | return ConcreteTTI->getArithmeticReductionCost(Instruction::Xor, Tys[0], |
1318 | /*IsPairwiseForm=*/false); |
1319 | case Intrinsic::experimental_vector_reduce_v2_fadd: |
1320 | return ConcreteTTI->getArithmeticReductionCost( |
1321 | Instruction::FAdd, Tys[0], |
1322 | /*IsPairwiseForm=*/false); // FIXME: Add new flag for cost of strict |
1323 | // reductions. |
1324 | case Intrinsic::experimental_vector_reduce_v2_fmul: |
1325 | return ConcreteTTI->getArithmeticReductionCost( |
1326 | Instruction::FMul, Tys[0], |
1327 | /*IsPairwiseForm=*/false); // FIXME: Add new flag for cost of strict |
1328 | // reductions. |
1329 | case Intrinsic::experimental_vector_reduce_smax: |
1330 | case Intrinsic::experimental_vector_reduce_smin: |
1331 | case Intrinsic::experimental_vector_reduce_fmax: |
1332 | case Intrinsic::experimental_vector_reduce_fmin: |
1333 | return ConcreteTTI->getMinMaxReductionCost( |
1334 | Tys[0], CmpInst::makeCmpResultType(Tys[0]), /*IsPairwiseForm=*/false, |
1335 | /*IsUnsigned=*/true); |
1336 | case Intrinsic::experimental_vector_reduce_umax: |
1337 | case Intrinsic::experimental_vector_reduce_umin: |
1338 | return ConcreteTTI->getMinMaxReductionCost( |
1339 | Tys[0], CmpInst::makeCmpResultType(Tys[0]), /*IsPairwiseForm=*/false, |
1340 | /*IsUnsigned=*/false); |
1341 | case Intrinsic::sadd_sat: |
1342 | case Intrinsic::ssub_sat: { |
1343 | Type *CondTy = RetTy->getWithNewBitWidth(1); |
1344 | |
1345 | Type *OpTy = StructType::create({RetTy, CondTy}); |
1346 | Intrinsic::ID OverflowOp = IID == Intrinsic::sadd_sat |
1347 | ? Intrinsic::sadd_with_overflow |
1348 | : Intrinsic::ssub_with_overflow; |
1349 | |
1350 | // SatMax -> Overflow && SumDiff < 0 |
1351 | // SatMin -> Overflow && SumDiff >= 0 |
1352 | unsigned Cost = 0; |
1353 | Cost += ConcreteTTI->getIntrinsicInstrCost( |
1354 | OverflowOp, OpTy, {RetTy, RetTy}, FMF, ScalarizationCostPassed); |
1355 | Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, |
1356 | CondTy, nullptr); |
1357 | Cost += 2 * ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy, |
1358 | CondTy, nullptr); |
1359 | return Cost; |
1360 | } |
1361 | case Intrinsic::uadd_sat: |
1362 | case Intrinsic::usub_sat: { |
1363 | Type *CondTy = RetTy->getWithNewBitWidth(1); |
1364 | |
1365 | Type *OpTy = StructType::create({RetTy, CondTy}); |
1366 | Intrinsic::ID OverflowOp = IID == Intrinsic::uadd_sat |
1367 | ? Intrinsic::uadd_with_overflow |
1368 | : Intrinsic::usub_with_overflow; |
1369 | |
1370 | unsigned Cost = 0; |
1371 | Cost += ConcreteTTI->getIntrinsicInstrCost( |
1372 | OverflowOp, OpTy, {RetTy, RetTy}, FMF, ScalarizationCostPassed); |
1373 | Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy, |
1374 | CondTy, nullptr); |
1375 | return Cost; |
1376 | } |
1377 | case Intrinsic::smul_fix: |
1378 | case Intrinsic::umul_fix: { |
1379 | unsigned ExtSize = RetTy->getScalarSizeInBits() * 2; |
1380 | Type *ExtTy = RetTy->getWithNewBitWidth(ExtSize); |
1381 | |
1382 | unsigned ExtOp = |
1383 | IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt; |
1384 | |
1385 | unsigned Cost = 0; |
1386 | Cost += 2 * ConcreteTTI->getCastInstrCost(ExtOp, ExtTy, RetTy); |
1387 | Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Mul, ExtTy); |
1388 | Cost += |
1389 | 2 * ConcreteTTI->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy); |
1390 | Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::LShr, RetTy, |
1391 | TTI::OK_AnyValue, |
1392 | TTI::OK_UniformConstantValue); |
1393 | Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Shl, RetTy, |
1394 | TTI::OK_AnyValue, |
1395 | TTI::OK_UniformConstantValue); |
1396 | Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Or, RetTy); |
1397 | return Cost; |
1398 | } |
1399 | case Intrinsic::sadd_with_overflow: |
1400 | case Intrinsic::ssub_with_overflow: { |
1401 | Type *SumTy = RetTy->getContainedType(0); |
1402 | Type *OverflowTy = RetTy->getContainedType(1); |
1403 | unsigned Opcode = IID == Intrinsic::sadd_with_overflow |
1404 | ? BinaryOperator::Add |
1405 | : BinaryOperator::Sub; |
1406 | |
1407 | // LHSSign -> LHS >= 0 |
1408 | // RHSSign -> RHS >= 0 |
1409 | // SumSign -> Sum >= 0 |
1410 | // |
1411 | // Add: |
1412 | // Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign) |
1413 | // Sub: |
1414 | // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign) |
1415 | unsigned Cost = 0; |
1416 | Cost += ConcreteTTI->getArithmeticInstrCost(Opcode, SumTy); |
1417 | Cost += 3 * ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy, |
1418 | OverflowTy, nullptr); |
1419 | Cost += 2 * ConcreteTTI->getCmpSelInstrCost( |
1420 | BinaryOperator::ICmp, OverflowTy, OverflowTy, nullptr); |
1421 | Cost += |
1422 | ConcreteTTI->getArithmeticInstrCost(BinaryOperator::And, OverflowTy); |
1423 | return Cost; |
1424 | } |
1425 | case Intrinsic::uadd_with_overflow: |
1426 | case Intrinsic::usub_with_overflow: { |
1427 | Type *SumTy = RetTy->getContainedType(0); |
1428 | Type *OverflowTy = RetTy->getContainedType(1); |
1429 | unsigned Opcode = IID == Intrinsic::uadd_with_overflow |
1430 | ? BinaryOperator::Add |
1431 | : BinaryOperator::Sub; |
1432 | |
1433 | unsigned Cost = 0; |
1434 | Cost += ConcreteTTI->getArithmeticInstrCost(Opcode, SumTy); |
1435 | Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy, |
1436 | OverflowTy, nullptr); |
1437 | return Cost; |
1438 | } |
1439 | case Intrinsic::smul_with_overflow: |
1440 | case Intrinsic::umul_with_overflow: { |
1441 | Type *MulTy = RetTy->getContainedType(0); |
1442 | Type *OverflowTy = RetTy->getContainedType(1); |
1443 | unsigned ExtSize = MulTy->getScalarSizeInBits() * 2; |
1444 | Type *ExtTy = MulTy->getWithNewBitWidth(ExtSize); |
1445 | |
1446 | unsigned ExtOp = |
1447 | IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt; |
1448 | |
1449 | unsigned Cost = 0; |
1450 | Cost += 2 * ConcreteTTI->getCastInstrCost(ExtOp, ExtTy, MulTy); |
1451 | Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Mul, ExtTy); |
1452 | Cost += |
1453 | 2 * ConcreteTTI->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy); |
1454 | Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::LShr, MulTy, |
1455 | TTI::OK_AnyValue, |
1456 | TTI::OK_UniformConstantValue); |
1457 | |
1458 | if (IID == Intrinsic::smul_with_overflow) |
1459 | Cost += ConcreteTTI->getArithmeticInstrCost( |
1460 | Instruction::AShr, MulTy, TTI::OK_AnyValue, |
1461 | TTI::OK_UniformConstantValue); |
1462 | |
1463 | Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, MulTy, |
1464 | OverflowTy, nullptr); |
1465 | return Cost; |
1466 | } |
1467 | case Intrinsic::ctpop: |
1468 | ISDs.push_back(ISD::CTPOP); |
1469 | // In case of legalization use TCC_Expensive. This is cheaper than a |
1470 | // library call but still not a cheap instruction. |
1471 | SingleCallCost = TargetTransformInfo::TCC_Expensive; |
1472 | break; |
1473 | // FIXME: ctlz, cttz, ... |
1474 | case Intrinsic::bswap: |
1475 | ISDs.push_back(ISD::BSWAP); |
1476 | break; |
1477 | case Intrinsic::bitreverse: |
1478 | ISDs.push_back(ISD::BITREVERSE); |
1479 | break; |
1480 | } |
1481 | |
1482 | const TargetLoweringBase *TLI = getTLI(); |
1483 | std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, RetTy); |
1484 | |
1485 | SmallVector<unsigned, 2> LegalCost; |
1486 | SmallVector<unsigned, 2> CustomCost; |
1487 | for (unsigned ISD : ISDs) { |
1488 | if (TLI->isOperationLegalOrPromote(ISD, LT.second)) { |
1489 | if (IID == Intrinsic::fabs && LT.second.isFloatingPoint() && |
1490 | TLI->isFAbsFree(LT.second)) { |
1491 | return 0; |
1492 | } |
1493 | |
1494 | // The operation is legal. Assume it costs 1. |
1495 | // If the type is split to multiple registers, assume that there is some |
1496 | // overhead to this. |
1497 | // TODO: Once we have extract/insert subvector cost we need to use them. |
1498 | if (LT.first > 1) |
1499 | LegalCost.push_back(LT.first * 2); |
1500 | else |
1501 | LegalCost.push_back(LT.first * 1); |
1502 | } else if (!TLI->isOperationExpand(ISD, LT.second)) { |
1503 | // If the operation is custom lowered then assume |
1504 | // that the code is twice as expensive. |
1505 | CustomCost.push_back(LT.first * 2); |
1506 | } |
1507 | } |
1508 | |
1509 | auto MinLegalCostI = std::min_element(LegalCost.begin(), LegalCost.end()); |
1510 | if (MinLegalCostI != LegalCost.end()) |
1511 | return *MinLegalCostI; |
1512 | |
1513 | auto MinCustomCostI = |
1514 | std::min_element(CustomCost.begin(), CustomCost.end()); |
1515 | if (MinCustomCostI != CustomCost.end()) |
1516 | return *MinCustomCostI; |
1517 | |
1518 | // If we can't lower fmuladd into an FMA estimate the cost as a floating |
1519 | // point mul followed by an add. |
1520 | if (IID == Intrinsic::fmuladd) |
1521 | return ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FMul, RetTy) + |
1522 | ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FAdd, RetTy); |
1523 | if (IID == Intrinsic::experimental_constrained_fmuladd) |
1524 | return ConcreteTTI->getIntrinsicCost( |
1525 | Intrinsic::experimental_constrained_fmul, RetTy, Tys, |
1526 | nullptr) + |
1527 | ConcreteTTI->getIntrinsicCost( |
1528 | Intrinsic::experimental_constrained_fadd, RetTy, Tys, nullptr); |
1529 | |
1530 | // Else, assume that we need to scalarize this intrinsic. For math builtins |
1531 | // this will emit a costly libcall, adding call overhead and spills. Make it |
1532 | // very expensive. |
1533 | if (RetTy->isVectorTy()) { |
1534 | unsigned ScalarizationCost = |
1535 | ((ScalarizationCostPassed != std::numeric_limits<unsigned>::max()) |
1536 | ? ScalarizationCostPassed |
1537 | : getScalarizationOverhead(RetTy, true, false)); |
1538 | unsigned ScalarCalls = RetTy->getVectorNumElements(); |
1539 | SmallVector<Type *, 4> ScalarTys; |
1540 | for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) { |
1541 | Type *Ty = Tys[i]; |
1542 | if (Ty->isVectorTy()) |
1543 | Ty = Ty->getScalarType(); |
1544 | ScalarTys.push_back(Ty); |
1545 | } |
1546 | unsigned ScalarCost = ConcreteTTI->getIntrinsicInstrCost( |
1547 | IID, RetTy->getScalarType(), ScalarTys, FMF); |
1548 | for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) { |
1549 | if (Tys[i]->isVectorTy()) { |
1550 | if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max()) |
1551 | ScalarizationCost += getScalarizationOverhead(Tys[i], false, true); |
1552 | ScalarCalls = std::max(ScalarCalls, Tys[i]->getVectorNumElements()); |
1553 | } |
1554 | } |
1555 | |
1556 | return ScalarCalls * ScalarCost + ScalarizationCost; |
1557 | } |
1558 | |
1559 | // This is going to be turned into a library call, make it expensive. |
1560 | return SingleCallCost; |
1561 | } |
1562 | |
1563 | /// Compute a cost of the given call instruction. |
1564 | /// |
1565 | /// Compute the cost of calling function F with return type RetTy and |
1566 | /// argument types Tys. F might be nullptr, in this case the cost of an |
1567 | /// arbitrary call with the specified signature will be returned. |
1568 | /// This is used, for instance, when we estimate call of a vector |
1569 | /// counterpart of the given function. |
1570 | /// \param F Called function, might be nullptr. |
1571 | /// \param RetTy Return value types. |
1572 | /// \param Tys Argument types. |
1573 | /// \returns The cost of Call instruction. |
1574 | unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) { |
1575 | return 10; |
1576 | } |
1577 | |
1578 | unsigned getNumberOfParts(Type *Tp) { |
1579 | std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Tp); |
1580 | return LT.first; |
1581 | } |
1582 | |
1583 | unsigned getAddressComputationCost(Type *Ty, ScalarEvolution *, |
1584 | const SCEV *) { |
1585 | return 0; |
1586 | } |
1587 | |
1588 | /// Try to calculate arithmetic and shuffle op costs for reduction operations. |
1589 | /// We're assuming that reduction operation are performing the following way: |
1590 | /// 1. Non-pairwise reduction |
1591 | /// %val1 = shufflevector<n x t> %val, <n x t> %undef, |
1592 | /// <n x i32> <i32 n/2, i32 n/2 + 1, ..., i32 n, i32 undef, ..., i32 undef> |
1593 | /// \----------------v-------------/ \----------v------------/ |
1594 | /// n/2 elements n/2 elements |
1595 | /// %red1 = op <n x t> %val, <n x t> val1 |
1596 | /// After this operation we have a vector %red1 where only the first n/2 |
1597 | /// elements are meaningful, the second n/2 elements are undefined and can be |
1598 | /// dropped. All other operations are actually working with the vector of |
1599 | /// length n/2, not n, though the real vector length is still n. |
1600 | /// %val2 = shufflevector<n x t> %red1, <n x t> %undef, |
1601 | /// <n x i32> <i32 n/4, i32 n/4 + 1, ..., i32 n/2, i32 undef, ..., i32 undef> |
1602 | /// \----------------v-------------/ \----------v------------/ |
1603 | /// n/4 elements 3*n/4 elements |
1604 | /// %red2 = op <n x t> %red1, <n x t> val2 - working with the vector of |
1605 | /// length n/2, the resulting vector has length n/4 etc. |
1606 | /// 2. Pairwise reduction: |
1607 | /// Everything is the same except for an additional shuffle operation which |
1608 | /// is used to produce operands for pairwise kind of reductions. |
1609 | /// %val1 = shufflevector<n x t> %val, <n x t> %undef, |
1610 | /// <n x i32> <i32 0, i32 2, ..., i32 n-2, i32 undef, ..., i32 undef> |
1611 | /// \-------------v----------/ \----------v------------/ |
1612 | /// n/2 elements n/2 elements |
1613 | /// %val2 = shufflevector<n x t> %val, <n x t> %undef, |
1614 | /// <n x i32> <i32 1, i32 3, ..., i32 n-1, i32 undef, ..., i32 undef> |
1615 | /// \-------------v----------/ \----------v------------/ |
1616 | /// n/2 elements n/2 elements |
1617 | /// %red1 = op <n x t> %val1, <n x t> val2 |
1618 | /// Again, the operation is performed on <n x t> vector, but the resulting |
1619 | /// vector %red1 is <n/2 x t> vector. |
1620 | /// |
1621 | /// The cost model should take into account that the actual length of the |
1622 | /// vector is reduced on each iteration. |
1623 | unsigned getArithmeticReductionCost(unsigned Opcode, Type *Ty, |
1624 | bool IsPairwise) { |
1625 | assert(Ty->isVectorTy() && "Expect a vector type")((Ty->isVectorTy() && "Expect a vector type") ? static_cast <void> (0) : __assert_fail ("Ty->isVectorTy() && \"Expect a vector type\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 1625, __PRETTY_FUNCTION__)); |
1626 | Type *ScalarTy = Ty->getVectorElementType(); |
1627 | unsigned NumVecElts = Ty->getVectorNumElements(); |
1628 | unsigned NumReduxLevels = Log2_32(NumVecElts); |
1629 | unsigned ArithCost = 0; |
1630 | unsigned ShuffleCost = 0; |
1631 | auto *ConcreteTTI = static_cast<T *>(this); |
1632 | std::pair<unsigned, MVT> LT = |
1633 | ConcreteTTI->getTLI()->getTypeLegalizationCost(DL, Ty); |
1634 | unsigned LongVectorCount = 0; |
1635 | unsigned MVTLen = |
1636 | LT.second.isVector() ? LT.second.getVectorNumElements() : 1; |
1637 | while (NumVecElts > MVTLen) { |
1638 | NumVecElts /= 2; |
1639 | Type *SubTy = VectorType::get(ScalarTy, NumVecElts); |
1640 | // Assume the pairwise shuffles add a cost. |
1641 | ShuffleCost += (IsPairwise + 1) * |
1642 | ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty, |
1643 | NumVecElts, SubTy); |
1644 | ArithCost += ConcreteTTI->getArithmeticInstrCost(Opcode, SubTy); |
1645 | Ty = SubTy; |
1646 | ++LongVectorCount; |
1647 | } |
1648 | |
1649 | NumReduxLevels -= LongVectorCount; |
1650 | |
1651 | // The minimal length of the vector is limited by the real length of vector |
1652 | // operations performed on the current platform. That's why several final |
1653 | // reduction operations are performed on the vectors with the same |
1654 | // architecture-dependent length. |
1655 | |
1656 | // Non pairwise reductions need one shuffle per reduction level. Pairwise |
1657 | // reductions need two shuffles on every level, but the last one. On that |
1658 | // level one of the shuffles is <0, u, u, ...> which is identity. |
1659 | unsigned NumShuffles = NumReduxLevels; |
1660 | if (IsPairwise && NumReduxLevels >= 1) |
1661 | NumShuffles += NumReduxLevels - 1; |
1662 | ShuffleCost += NumShuffles * |
1663 | ConcreteTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty, |
1664 | 0, Ty); |
1665 | ArithCost += NumReduxLevels * |
1666 | ConcreteTTI->getArithmeticInstrCost(Opcode, Ty); |
1667 | return ShuffleCost + ArithCost + |
1668 | ConcreteTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, 0); |
1669 | } |
1670 | |
1671 | /// Try to calculate op costs for min/max reduction operations. |
1672 | /// \param CondTy Conditional type for the Select instruction. |
1673 | unsigned getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwise, |
1674 | bool) { |
1675 | assert(Ty->isVectorTy() && "Expect a vector type")((Ty->isVectorTy() && "Expect a vector type") ? static_cast <void> (0) : __assert_fail ("Ty->isVectorTy() && \"Expect a vector type\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 1675, __PRETTY_FUNCTION__)); |
1676 | Type *ScalarTy = Ty->getVectorElementType(); |
1677 | Type *ScalarCondTy = CondTy->getVectorElementType(); |
1678 | unsigned NumVecElts = Ty->getVectorNumElements(); |
1679 | unsigned NumReduxLevels = Log2_32(NumVecElts); |
1680 | unsigned CmpOpcode; |
1681 | if (Ty->isFPOrFPVectorTy()) { |
1682 | CmpOpcode = Instruction::FCmp; |
1683 | } else { |
1684 | assert(Ty->isIntOrIntVectorTy() &&((Ty->isIntOrIntVectorTy() && "expecting floating point or integer type for min/max reduction" ) ? static_cast<void> (0) : __assert_fail ("Ty->isIntOrIntVectorTy() && \"expecting floating point or integer type for min/max reduction\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 1685, __PRETTY_FUNCTION__)) |
1685 | "expecting floating point or integer type for min/max reduction")((Ty->isIntOrIntVectorTy() && "expecting floating point or integer type for min/max reduction" ) ? static_cast<void> (0) : __assert_fail ("Ty->isIntOrIntVectorTy() && \"expecting floating point or integer type for min/max reduction\"" , "/build/llvm-toolchain-snapshot-11~++20200226111113+80d7e473e0b/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 1685, __PRETTY_FUNCTION__)); |
1686 | CmpOpcode = Instruction::ICmp; |
1687 | } |
1688 | unsigned MinMaxCost = 0; |
1689 | unsigned ShuffleCost = 0; |
1690 | auto *ConcreteTTI = static_cast<T *>(this); |
1691 | std::pair<unsigned, MVT> LT = |
1692 | ConcreteTTI->getTLI()->getTypeLegalizationCost(DL, Ty); |
1693 | unsigned LongVectorCount = 0; |
1694 | unsigned MVTLen = |
1695 | LT.second.isVector() ? LT.second.getVectorNumElements() : 1; |
1696 | while (NumVecElts > MVTLen) { |
1697 | NumVecElts /= 2; |
1698 | Type *SubTy = VectorType::get(ScalarTy, NumVecElts); |
1699 | CondTy = VectorType::get(ScalarCondTy, NumVecElts); |
1700 | |
1701 | // Assume the pairwise shuffles add a cost. |
1702 | ShuffleCost += (IsPairwise + 1) * |
1703 | ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty, |
1704 | NumVecElts, SubTy); |
1705 | MinMaxCost += |
1706 | ConcreteTTI->getCmpSelInstrCost(CmpOpcode, SubTy, CondTy, nullptr) + |
1707 | ConcreteTTI->getCmpSelInstrCost(Instruction::Select, SubTy, CondTy, |
1708 | nullptr); |
1709 | Ty = SubTy; |
1710 | ++LongVectorCount; |
1711 | } |
1712 | |
1713 | NumReduxLevels -= LongVectorCount; |
1714 | |
1715 | // The minimal length of the vector is limited by the real length of vector |
1716 | // operations performed on the current platform. That's why several final |
1717 | // reduction opertions are perfomed on the vectors with the same |
1718 | // architecture-dependent length. |
1719 | |
1720 | // Non pairwise reductions need one shuffle per reduction level. Pairwise |
1721 | // reductions need two shuffles on every level, but the last one. On that |
1722 | // level one of the shuffles is <0, u, u, ...> which is identity. |
1723 | unsigned NumShuffles = NumReduxLevels; |
1724 | if (IsPairwise && NumReduxLevels >= 1) |
1725 | NumShuffles += NumReduxLevels - 1; |
1726 | ShuffleCost += NumShuffles * |
1727 | ConcreteTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty, |
1728 | 0, Ty); |
1729 | MinMaxCost += |
1730 | NumReduxLevels * |
1731 | (ConcreteTTI->getCmpSelInstrCost(CmpOpcode, Ty, CondTy, nullptr) + |
1732 | ConcreteTTI->getCmpSelInstrCost(Instruction::Select, Ty, CondTy, |
1733 | nullptr)); |
1734 | // The last min/max should be in vector registers and we counted it above. |
1735 | // So just need a single extractelement. |
1736 | return ShuffleCost + MinMaxCost + |
1737 | ConcreteTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, 0); |
1738 | } |
1739 | |
1740 | unsigned getVectorSplitCost() { return 1; } |
1741 | |
1742 | /// @} |
1743 | }; |
1744 | |
1745 | /// Concrete BasicTTIImpl that can be used if no further customization |
1746 | /// is needed. |
1747 | class BasicTTIImpl : public BasicTTIImplBase<BasicTTIImpl> { |
1748 | using BaseT = BasicTTIImplBase<BasicTTIImpl>; |
1749 | |
1750 | friend class BasicTTIImplBase<BasicTTIImpl>; |
1751 | |
1752 | const TargetSubtargetInfo *ST; |
1753 | const TargetLoweringBase *TLI; |
1754 | |
1755 | const TargetSubtargetInfo *getST() const { return ST; } |
1756 | const TargetLoweringBase *getTLI() const { return TLI; } |
1757 | |
1758 | public: |
1759 | explicit BasicTTIImpl(const TargetMachine *TM, const Function &F); |
1760 | }; |
1761 | |
1762 | } // end namespace llvm |
1763 | |
1764 | #endif // LLVM_CODEGEN_BASICTTIIMPL_H |