LLVM 18.0.0git
NVPTXTargetTransformInfo.cpp
Go to the documentation of this file.
1//===-- NVPTXTargetTransformInfo.cpp - NVPTX specific TTI -----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10#include "NVPTXUtilities.h"
17#include "llvm/IR/IntrinsicsNVPTX.h"
18#include "llvm/Support/Debug.h"
19#include <optional>
20using namespace llvm;
21
22#define DEBUG_TYPE "NVPTXtti"
23
24// Whether the given intrinsic reads threadIdx.x/y/z.
25static bool readsThreadIndex(const IntrinsicInst *II) {
26 switch (II->getIntrinsicID()) {
27 default: return false;
28 case Intrinsic::nvvm_read_ptx_sreg_tid_x:
29 case Intrinsic::nvvm_read_ptx_sreg_tid_y:
30 case Intrinsic::nvvm_read_ptx_sreg_tid_z:
31 return true;
32 }
33}
34
35static bool readsLaneId(const IntrinsicInst *II) {
36 return II->getIntrinsicID() == Intrinsic::nvvm_read_ptx_sreg_laneid;
37}
38
39// Whether the given intrinsic is an atomic instruction in PTX.
40static bool isNVVMAtomic(const IntrinsicInst *II) {
41 switch (II->getIntrinsicID()) {
42 default: return false;
43 case Intrinsic::nvvm_atomic_load_inc_32:
44 case Intrinsic::nvvm_atomic_load_dec_32:
45
46 case Intrinsic::nvvm_atomic_add_gen_f_cta:
47 case Intrinsic::nvvm_atomic_add_gen_f_sys:
48 case Intrinsic::nvvm_atomic_add_gen_i_cta:
49 case Intrinsic::nvvm_atomic_add_gen_i_sys:
50 case Intrinsic::nvvm_atomic_and_gen_i_cta:
51 case Intrinsic::nvvm_atomic_and_gen_i_sys:
52 case Intrinsic::nvvm_atomic_cas_gen_i_cta:
53 case Intrinsic::nvvm_atomic_cas_gen_i_sys:
54 case Intrinsic::nvvm_atomic_dec_gen_i_cta:
55 case Intrinsic::nvvm_atomic_dec_gen_i_sys:
56 case Intrinsic::nvvm_atomic_inc_gen_i_cta:
57 case Intrinsic::nvvm_atomic_inc_gen_i_sys:
58 case Intrinsic::nvvm_atomic_max_gen_i_cta:
59 case Intrinsic::nvvm_atomic_max_gen_i_sys:
60 case Intrinsic::nvvm_atomic_min_gen_i_cta:
61 case Intrinsic::nvvm_atomic_min_gen_i_sys:
62 case Intrinsic::nvvm_atomic_or_gen_i_cta:
63 case Intrinsic::nvvm_atomic_or_gen_i_sys:
64 case Intrinsic::nvvm_atomic_exch_gen_i_cta:
65 case Intrinsic::nvvm_atomic_exch_gen_i_sys:
66 case Intrinsic::nvvm_atomic_xor_gen_i_cta:
67 case Intrinsic::nvvm_atomic_xor_gen_i_sys:
68 return true;
69 }
70}
71
73 // Without inter-procedural analysis, we conservatively assume that arguments
74 // to __device__ functions are divergent.
75 if (const Argument *Arg = dyn_cast<Argument>(V))
76 return !isKernelFunction(*Arg->getParent());
77
78 if (const Instruction *I = dyn_cast<Instruction>(V)) {
79 // Without pointer analysis, we conservatively assume values loaded from
80 // generic or local address space are divergent.
81 if (const LoadInst *LI = dyn_cast<LoadInst>(I)) {
82 unsigned AS = LI->getPointerAddressSpace();
83 return AS == ADDRESS_SPACE_GENERIC || AS == ADDRESS_SPACE_LOCAL;
84 }
85 // Atomic instructions may cause divergence. Atomic instructions are
86 // executed sequentially across all threads in a warp. Therefore, an earlier
87 // executed thread may see different memory inputs than a later executed
88 // thread. For example, suppose *a = 0 initially.
89 //
90 // atom.global.add.s32 d, [a], 1
91 //
92 // returns 0 for the first thread that enters the critical region, and 1 for
93 // the second thread.
94 if (I->isAtomic())
95 return true;
96 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
97 // Instructions that read threadIdx are obviously divergent.
98 if (readsThreadIndex(II) || readsLaneId(II))
99 return true;
100 // Handle the NVPTX atomic intrinsics that cannot be represented as an
101 // atomic IR instruction.
102 if (isNVVMAtomic(II))
103 return true;
104 }
105 // Conservatively consider the return value of function calls as divergent.
106 // We could analyze callees with bodies more precisely using
107 // inter-procedural analysis.
108 if (isa<CallInst>(I))
109 return true;
110 }
111
112 return false;
113}
114
115// Convert NVVM intrinsics to target-generic LLVM code where possible.
117 // Each NVVM intrinsic we can simplify can be replaced with one of:
118 //
119 // * an LLVM intrinsic,
120 // * an LLVM cast operation,
121 // * an LLVM binary operation, or
122 // * ad-hoc LLVM IR for the particular operation.
123
124 // Some transformations are only valid when the module's
125 // flush-denormals-to-zero (ftz) setting is true/false, whereas other
126 // transformations are valid regardless of the module's ftz setting.
127 enum FtzRequirementTy {
128 FTZ_Any, // Any ftz setting is ok.
129 FTZ_MustBeOn, // Transformation is valid only if ftz is on.
130 FTZ_MustBeOff, // Transformation is valid only if ftz is off.
131 };
132 // Classes of NVVM intrinsics that can't be replaced one-to-one with a
133 // target-generic intrinsic, cast op, or binary op but that we can nonetheless
134 // simplify.
135 enum SpecialCase {
136 SPC_Reciprocal,
137 };
138
139 // SimplifyAction is a poor-man's variant (plus an additional flag) that
140 // represents how to replace an NVVM intrinsic with target-generic LLVM IR.
141 struct SimplifyAction {
142 // Invariant: At most one of these Optionals has a value.
143 std::optional<Intrinsic::ID> IID;
144 std::optional<Instruction::CastOps> CastOp;
145 std::optional<Instruction::BinaryOps> BinaryOp;
146 std::optional<SpecialCase> Special;
147
148 FtzRequirementTy FtzRequirement = FTZ_Any;
149 // Denormal handling is guarded by different attributes depending on the
150 // type (denormal-fp-math vs denormal-fp-math-f32), take note of halfs.
151 bool IsHalfTy = false;
152
153 SimplifyAction() = default;
154
155 SimplifyAction(Intrinsic::ID IID, FtzRequirementTy FtzReq,
156 bool IsHalfTy = false)
157 : IID(IID), FtzRequirement(FtzReq), IsHalfTy(IsHalfTy) {}
158
159 // Cast operations don't have anything to do with FTZ, so we skip that
160 // argument.
161 SimplifyAction(Instruction::CastOps CastOp) : CastOp(CastOp) {}
162
163 SimplifyAction(Instruction::BinaryOps BinaryOp, FtzRequirementTy FtzReq)
164 : BinaryOp(BinaryOp), FtzRequirement(FtzReq) {}
165
166 SimplifyAction(SpecialCase Special, FtzRequirementTy FtzReq)
167 : Special(Special), FtzRequirement(FtzReq) {}
168 };
169
170 // Try to generate a SimplifyAction describing how to replace our
171 // IntrinsicInstr with target-generic LLVM IR.
172 const SimplifyAction Action = [II]() -> SimplifyAction {
173 switch (II->getIntrinsicID()) {
174 // NVVM intrinsics that map directly to LLVM intrinsics.
175 case Intrinsic::nvvm_ceil_d:
176 return {Intrinsic::ceil, FTZ_Any};
177 case Intrinsic::nvvm_ceil_f:
178 return {Intrinsic::ceil, FTZ_MustBeOff};
179 case Intrinsic::nvvm_ceil_ftz_f:
180 return {Intrinsic::ceil, FTZ_MustBeOn};
181 case Intrinsic::nvvm_fabs_d:
182 return {Intrinsic::fabs, FTZ_Any};
183 case Intrinsic::nvvm_fabs_f:
184 return {Intrinsic::fabs, FTZ_MustBeOff};
185 case Intrinsic::nvvm_fabs_ftz_f:
186 return {Intrinsic::fabs, FTZ_MustBeOn};
187 case Intrinsic::nvvm_floor_d:
188 return {Intrinsic::floor, FTZ_Any};
189 case Intrinsic::nvvm_floor_f:
190 return {Intrinsic::floor, FTZ_MustBeOff};
191 case Intrinsic::nvvm_floor_ftz_f:
192 return {Intrinsic::floor, FTZ_MustBeOn};
193 case Intrinsic::nvvm_fma_rn_d:
194 return {Intrinsic::fma, FTZ_Any};
195 case Intrinsic::nvvm_fma_rn_f:
196 return {Intrinsic::fma, FTZ_MustBeOff};
197 case Intrinsic::nvvm_fma_rn_ftz_f:
198 return {Intrinsic::fma, FTZ_MustBeOn};
199 case Intrinsic::nvvm_fma_rn_f16:
200 return {Intrinsic::fma, FTZ_MustBeOff, true};
201 case Intrinsic::nvvm_fma_rn_ftz_f16:
202 return {Intrinsic::fma, FTZ_MustBeOn, true};
203 case Intrinsic::nvvm_fma_rn_f16x2:
204 return {Intrinsic::fma, FTZ_MustBeOff, true};
205 case Intrinsic::nvvm_fma_rn_ftz_f16x2:
206 return {Intrinsic::fma, FTZ_MustBeOn, true};
207 case Intrinsic::nvvm_fma_rn_bf16:
208 return {Intrinsic::fma, FTZ_MustBeOff, true};
209 case Intrinsic::nvvm_fma_rn_ftz_bf16:
210 return {Intrinsic::fma, FTZ_MustBeOn, true};
211 case Intrinsic::nvvm_fma_rn_bf16x2:
212 return {Intrinsic::fma, FTZ_MustBeOff, true};
213 case Intrinsic::nvvm_fma_rn_ftz_bf16x2:
214 return {Intrinsic::fma, FTZ_MustBeOn, true};
215 case Intrinsic::nvvm_fmax_d:
216 return {Intrinsic::maxnum, FTZ_Any};
217 case Intrinsic::nvvm_fmax_f:
218 return {Intrinsic::maxnum, FTZ_MustBeOff};
219 case Intrinsic::nvvm_fmax_ftz_f:
220 return {Intrinsic::maxnum, FTZ_MustBeOn};
221 case Intrinsic::nvvm_fmax_nan_f:
222 return {Intrinsic::maximum, FTZ_MustBeOff};
223 case Intrinsic::nvvm_fmax_ftz_nan_f:
224 return {Intrinsic::maximum, FTZ_MustBeOn};
225 case Intrinsic::nvvm_fmax_f16:
226 return {Intrinsic::maxnum, FTZ_MustBeOff, true};
227 case Intrinsic::nvvm_fmax_ftz_f16:
228 return {Intrinsic::maxnum, FTZ_MustBeOn, true};
229 case Intrinsic::nvvm_fmax_f16x2:
230 return {Intrinsic::maxnum, FTZ_MustBeOff, true};
231 case Intrinsic::nvvm_fmax_ftz_f16x2:
232 return {Intrinsic::maxnum, FTZ_MustBeOn, true};
233 case Intrinsic::nvvm_fmax_nan_f16:
234 return {Intrinsic::maximum, FTZ_MustBeOff, true};
235 case Intrinsic::nvvm_fmax_ftz_nan_f16:
236 return {Intrinsic::maximum, FTZ_MustBeOn, true};
237 case Intrinsic::nvvm_fmax_nan_f16x2:
238 return {Intrinsic::maximum, FTZ_MustBeOff, true};
239 case Intrinsic::nvvm_fmax_ftz_nan_f16x2:
240 return {Intrinsic::maximum, FTZ_MustBeOn, true};
241 case Intrinsic::nvvm_fmin_d:
242 return {Intrinsic::minnum, FTZ_Any};
243 case Intrinsic::nvvm_fmin_f:
244 return {Intrinsic::minnum, FTZ_MustBeOff};
245 case Intrinsic::nvvm_fmin_ftz_f:
246 return {Intrinsic::minnum, FTZ_MustBeOn};
247 case Intrinsic::nvvm_fmin_nan_f:
248 return {Intrinsic::minimum, FTZ_MustBeOff};
249 case Intrinsic::nvvm_fmin_ftz_nan_f:
250 return {Intrinsic::minimum, FTZ_MustBeOn};
251 case Intrinsic::nvvm_fmin_f16:
252 return {Intrinsic::minnum, FTZ_MustBeOff, true};
253 case Intrinsic::nvvm_fmin_ftz_f16:
254 return {Intrinsic::minnum, FTZ_MustBeOn, true};
255 case Intrinsic::nvvm_fmin_f16x2:
256 return {Intrinsic::minnum, FTZ_MustBeOff, true};
257 case Intrinsic::nvvm_fmin_ftz_f16x2:
258 return {Intrinsic::minnum, FTZ_MustBeOn, true};
259 case Intrinsic::nvvm_fmin_nan_f16:
260 return {Intrinsic::minimum, FTZ_MustBeOff, true};
261 case Intrinsic::nvvm_fmin_ftz_nan_f16:
262 return {Intrinsic::minimum, FTZ_MustBeOn, true};
263 case Intrinsic::nvvm_fmin_nan_f16x2:
264 return {Intrinsic::minimum, FTZ_MustBeOff, true};
265 case Intrinsic::nvvm_fmin_ftz_nan_f16x2:
266 return {Intrinsic::minimum, FTZ_MustBeOn, true};
267 case Intrinsic::nvvm_round_d:
268 return {Intrinsic::round, FTZ_Any};
269 case Intrinsic::nvvm_round_f:
270 return {Intrinsic::round, FTZ_MustBeOff};
271 case Intrinsic::nvvm_round_ftz_f:
272 return {Intrinsic::round, FTZ_MustBeOn};
273 case Intrinsic::nvvm_sqrt_rn_d:
274 return {Intrinsic::sqrt, FTZ_Any};
275 case Intrinsic::nvvm_sqrt_f:
276 // nvvm_sqrt_f is a special case. For most intrinsics, foo_ftz_f is the
277 // ftz version, and foo_f is the non-ftz version. But nvvm_sqrt_f adopts
278 // the ftz-ness of the surrounding code. sqrt_rn_f and sqrt_rn_ftz_f are
279 // the versions with explicit ftz-ness.
280 return {Intrinsic::sqrt, FTZ_Any};
281 case Intrinsic::nvvm_sqrt_rn_f:
282 return {Intrinsic::sqrt, FTZ_MustBeOff};
283 case Intrinsic::nvvm_sqrt_rn_ftz_f:
284 return {Intrinsic::sqrt, FTZ_MustBeOn};
285 case Intrinsic::nvvm_trunc_d:
286 return {Intrinsic::trunc, FTZ_Any};
287 case Intrinsic::nvvm_trunc_f:
288 return {Intrinsic::trunc, FTZ_MustBeOff};
289 case Intrinsic::nvvm_trunc_ftz_f:
290 return {Intrinsic::trunc, FTZ_MustBeOn};
291
292 // NVVM intrinsics that map to LLVM cast operations.
293 //
294 // Note that llvm's target-generic conversion operators correspond to the rz
295 // (round to zero) versions of the nvvm conversion intrinsics, even though
296 // most everything else here uses the rn (round to nearest even) nvvm ops.
297 case Intrinsic::nvvm_d2i_rz:
298 case Intrinsic::nvvm_f2i_rz:
299 case Intrinsic::nvvm_d2ll_rz:
300 case Intrinsic::nvvm_f2ll_rz:
301 return {Instruction::FPToSI};
302 case Intrinsic::nvvm_d2ui_rz:
303 case Intrinsic::nvvm_f2ui_rz:
304 case Intrinsic::nvvm_d2ull_rz:
305 case Intrinsic::nvvm_f2ull_rz:
306 return {Instruction::FPToUI};
307 case Intrinsic::nvvm_i2d_rz:
308 case Intrinsic::nvvm_i2f_rz:
309 case Intrinsic::nvvm_ll2d_rz:
310 case Intrinsic::nvvm_ll2f_rz:
311 return {Instruction::SIToFP};
312 case Intrinsic::nvvm_ui2d_rz:
313 case Intrinsic::nvvm_ui2f_rz:
314 case Intrinsic::nvvm_ull2d_rz:
315 case Intrinsic::nvvm_ull2f_rz:
316 return {Instruction::UIToFP};
317
318 // NVVM intrinsics that map to LLVM binary ops.
319 case Intrinsic::nvvm_add_rn_d:
320 return {Instruction::FAdd, FTZ_Any};
321 case Intrinsic::nvvm_add_rn_f:
322 return {Instruction::FAdd, FTZ_MustBeOff};
323 case Intrinsic::nvvm_add_rn_ftz_f:
324 return {Instruction::FAdd, FTZ_MustBeOn};
325 case Intrinsic::nvvm_mul_rn_d:
326 return {Instruction::FMul, FTZ_Any};
327 case Intrinsic::nvvm_mul_rn_f:
328 return {Instruction::FMul, FTZ_MustBeOff};
329 case Intrinsic::nvvm_mul_rn_ftz_f:
330 return {Instruction::FMul, FTZ_MustBeOn};
331 case Intrinsic::nvvm_div_rn_d:
332 return {Instruction::FDiv, FTZ_Any};
333 case Intrinsic::nvvm_div_rn_f:
334 return {Instruction::FDiv, FTZ_MustBeOff};
335 case Intrinsic::nvvm_div_rn_ftz_f:
336 return {Instruction::FDiv, FTZ_MustBeOn};
337
338 // The remainder of cases are NVVM intrinsics that map to LLVM idioms, but
339 // need special handling.
340 //
341 // We seem to be missing intrinsics for rcp.approx.{ftz.}f32, which is just
342 // as well.
343 case Intrinsic::nvvm_rcp_rn_d:
344 return {SPC_Reciprocal, FTZ_Any};
345 case Intrinsic::nvvm_rcp_rn_f:
346 return {SPC_Reciprocal, FTZ_MustBeOff};
347 case Intrinsic::nvvm_rcp_rn_ftz_f:
348 return {SPC_Reciprocal, FTZ_MustBeOn};
349
350 // We do not currently simplify intrinsics that give an approximate
351 // answer. These include:
352 //
353 // - nvvm_cos_approx_{f,ftz_f}
354 // - nvvm_ex2_approx_{d,f,ftz_f}
355 // - nvvm_lg2_approx_{d,f,ftz_f}
356 // - nvvm_sin_approx_{f,ftz_f}
357 // - nvvm_sqrt_approx_{f,ftz_f}
358 // - nvvm_rsqrt_approx_{d,f,ftz_f}
359 // - nvvm_div_approx_{ftz_d,ftz_f,f}
360 // - nvvm_rcp_approx_ftz_d
361 //
362 // Ideally we'd encode them as e.g. "fast call @llvm.cos", where "fast"
363 // means that fastmath is enabled in the intrinsic. Unfortunately only
364 // binary operators (currently) have a fastmath bit in SelectionDAG, so
365 // this information gets lost and we can't select on it.
366 //
367 // TODO: div and rcp are lowered to a binary op, so these we could in
368 // theory lower them to "fast fdiv".
369
370 default:
371 return {};
372 }
373 }();
374
375 // If Action.FtzRequirementTy is not satisfied by the module's ftz state, we
376 // can bail out now. (Notice that in the case that IID is not an NVVM
377 // intrinsic, we don't have to look up any module metadata, as
378 // FtzRequirementTy will be FTZ_Any.)
379 if (Action.FtzRequirement != FTZ_Any) {
380 // FIXME: Broken for f64
382 Action.IsHalfTy ? APFloat::IEEEhalf() : APFloat::IEEEsingle());
383 bool FtzEnabled = Mode.Output == DenormalMode::PreserveSign;
384
385 if (FtzEnabled != (Action.FtzRequirement == FTZ_MustBeOn))
386 return nullptr;
387 }
388
389 // Simplify to target-generic intrinsic.
390 if (Action.IID) {
391 SmallVector<Value *, 4> Args(II->args());
392 // All the target-generic intrinsics currently of interest to us have one
393 // type argument, equal to that of the nvvm intrinsic's argument.
394 Type *Tys[] = {II->getArgOperand(0)->getType()};
395 return CallInst::Create(
396 Intrinsic::getDeclaration(II->getModule(), *Action.IID, Tys), Args);
397 }
398
399 // Simplify to target-generic binary op.
400 if (Action.BinaryOp)
401 return BinaryOperator::Create(*Action.BinaryOp, II->getArgOperand(0),
402 II->getArgOperand(1), II->getName());
403
404 // Simplify to target-generic cast op.
405 if (Action.CastOp)
406 return CastInst::Create(*Action.CastOp, II->getArgOperand(0), II->getType(),
407 II->getName());
408
409 // All that's left are the special cases.
410 if (!Action.Special)
411 return nullptr;
412
413 switch (*Action.Special) {
414 case SPC_Reciprocal:
415 // Simplify reciprocal.
417 Instruction::FDiv, ConstantFP::get(II->getArgOperand(0)->getType(), 1),
418 II->getArgOperand(0), II->getName());
419 }
420 llvm_unreachable("All SpecialCase enumerators should be handled in switch.");
421}
422
423std::optional<Instruction *>
425 if (Instruction *I = simplifyNvvmIntrinsic(&II, IC)) {
426 return I;
427 }
428 return std::nullopt;
429}
430
432 unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
435 const Instruction *CxtI) {
436 // Legalize the type.
437 std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
438
439 int ISD = TLI->InstructionOpcodeToISD(Opcode);
440
441 switch (ISD) {
442 default:
443 return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info,
444 Op2Info);
445 case ISD::ADD:
446 case ISD::MUL:
447 case ISD::XOR:
448 case ISD::OR:
449 case ISD::AND:
450 // The machine code (SASS) simulates an i64 with two i32. Therefore, we
451 // estimate that arithmetic operations on i64 are twice as expensive as
452 // those on types that can fit into one machine register.
453 if (LT.second.SimpleTy == MVT::i64)
454 return 2 * LT.first;
455 // Delegate other cases to the basic TTI.
456 return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info,
457 Op2Info);
458 }
459}
460
464 BaseT::getUnrollingPreferences(L, SE, UP, ORE);
465
466 // Enable partial unrolling and runtime unrolling, but reduce the
467 // threshold. This partially unrolls small loops which are often
468 // unrolled by the PTX to SASS compiler and unrolling earlier can be
469 // beneficial.
470 UP.Partial = UP.Runtime = true;
471 UP.PartialThreshold = UP.Threshold / 4;
472}
473
477}
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Cost tables and simple lookup functions.
#define I(x, y, z)
Definition: MD5.cpp:58
static Instruction * simplifyNvvmIntrinsic(IntrinsicInst *II, InstCombiner &IC)
static bool isNVVMAtomic(const IntrinsicInst *II)
static bool readsLaneId(const IntrinsicInst *II)
static bool readsThreadIndex(const IntrinsicInst *II)
This file a TargetTransformInfo::Concept conforming object specific to the NVPTX target machine.
This file describes how to lower LLVM code to machine code.
This pass exposes codegen information to IR-level passes.
This class represents an incoming formal argument to a Function.
Definition: Argument.h:28
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
Definition: BasicTTIImpl.h:547
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr)
Definition: BasicTTIImpl.h:856
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
Definition: BasicTTIImpl.h:619
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
Definition: BasicTTIImpl.h:820
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), Instruction *InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1357
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
Definition: InstrTypes.h:1348
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", Instruction *InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
static Constant * get(Type *Ty, double V)
This returns a ConstantFP, or a vector containing a splat of a ConstantFP, for the specified value in...
Definition: Constants.cpp:927
DenormalMode getDenormalMode(const fltSemantics &FPType) const
Returns the denormal handling type for the default rounding mode of the function.
Definition: Function.cpp:704
The core instruction combiner logic.
Definition: InstCombiner.h:46
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:71
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:75
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:47
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:54
An instruction for reading from memory.
Definition: Instructions.h:177
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:47
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr)
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
bool isSourceOfDivergence(const Value *V)
The optimization diagnostic interface.
The main scalar evolution driver.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
TargetCostKind
The kind of cost model.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:239
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:680
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1422
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ ADDRESS_SPACE_GENERIC
Definition: NVPTXBaseInfo.h:22
@ ADDRESS_SPACE_LOCAL
Definition: NVPTXBaseInfo.h:26
bool isKernelFunction(const Function &F)
static const fltSemantics & IEEEsingle() LLVM_READNONE
Definition: APFloat.cpp:249
static const fltSemantics & IEEEhalf() LLVM_READNONE
Definition: APFloat.cpp:247
Represent subnormal handling kind for floating point instruction inputs and outputs.
@ PreserveSign
The sign of a flushed-to-zero number is preserved in the sign of 0.
Parameters that control the generic loop unrolling transformation.
unsigned Threshold
The cost threshold for the unrolled loop.
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...