LLVM 22.0.0git
NVVMIntrinsicUtils.h
Go to the documentation of this file.
1//===--- NVVMIntrinsicUtils.h -----------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file contains the definitions of the enumerations and flags
11/// associated with NVVM Intrinsics, along with some helper functions.
12//
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_IR_NVVMINTRINSICUTILS_H
16#define LLVM_IR_NVVMINTRINSICUTILS_H
17
18#include <stdint.h>
19
20#include "llvm/ADT/APFloat.h"
21#include "llvm/ADT/APInt.h"
22#include "llvm/IR/Constants.h"
23#include "llvm/IR/Intrinsics.h"
24#include "llvm/IR/IntrinsicsNVPTX.h"
26
27namespace llvm {
28namespace nvvm {
29
30// Reduction Ops supported with TMA Copy from Shared
31// to Global Memory for the "cp.reduce.async.bulk.tensor.*"
32// family of PTX instructions.
33enum class TMAReductionOp : uint8_t {
34 ADD = 0,
35 MIN = 1,
36 MAX = 2,
37 INC = 3,
38 DEC = 4,
39 AND = 5,
40 OR = 6,
41 XOR = 7,
42};
43
44// Enum to represent the cta_group::1 and
45// cta_group::2 variants in TMA/TCGEN05 family of
46// PTX instructions.
47enum class CTAGroupKind : uint8_t {
48 CG_NONE = 0, // default with no cta_group modifier
49 CG_1 = 1, // cta_group::1 modifier
50 CG_2 = 2, // cta_group::2 modifier
51};
52
53enum class Tcgen05MMAKind : uint8_t { F16 = 0, TF32 = 1, F8F6F4 = 2, I8 = 3 };
54
58 FILL = 2,
59 USE = 3,
60};
61
62void printTcgen05MMAKind(raw_ostream &OS, const Constant *ImmArgVal);
63
64void printTcgen05CollectorUsageOp(raw_ostream &OS, const Constant *ImmArgVal);
65
67 switch (IntrinsicID) {
68 case Intrinsic::nvvm_f2i_rm_ftz:
69 case Intrinsic::nvvm_f2i_rn_ftz:
70 case Intrinsic::nvvm_f2i_rp_ftz:
71 case Intrinsic::nvvm_f2i_rz_ftz:
72
73 case Intrinsic::nvvm_f2ui_rm_ftz:
74 case Intrinsic::nvvm_f2ui_rn_ftz:
75 case Intrinsic::nvvm_f2ui_rp_ftz:
76 case Intrinsic::nvvm_f2ui_rz_ftz:
77
78 case Intrinsic::nvvm_f2ll_rm_ftz:
79 case Intrinsic::nvvm_f2ll_rn_ftz:
80 case Intrinsic::nvvm_f2ll_rp_ftz:
81 case Intrinsic::nvvm_f2ll_rz_ftz:
82
83 case Intrinsic::nvvm_f2ull_rm_ftz:
84 case Intrinsic::nvvm_f2ull_rn_ftz:
85 case Intrinsic::nvvm_f2ull_rp_ftz:
86 case Intrinsic::nvvm_f2ull_rz_ftz:
87 return true;
88
89 case Intrinsic::nvvm_f2i_rm:
90 case Intrinsic::nvvm_f2i_rn:
91 case Intrinsic::nvvm_f2i_rp:
92 case Intrinsic::nvvm_f2i_rz:
93
94 case Intrinsic::nvvm_f2ui_rm:
95 case Intrinsic::nvvm_f2ui_rn:
96 case Intrinsic::nvvm_f2ui_rp:
97 case Intrinsic::nvvm_f2ui_rz:
98
99 case Intrinsic::nvvm_d2i_rm:
100 case Intrinsic::nvvm_d2i_rn:
101 case Intrinsic::nvvm_d2i_rp:
102 case Intrinsic::nvvm_d2i_rz:
103
104 case Intrinsic::nvvm_d2ui_rm:
105 case Intrinsic::nvvm_d2ui_rn:
106 case Intrinsic::nvvm_d2ui_rp:
107 case Intrinsic::nvvm_d2ui_rz:
108
109 case Intrinsic::nvvm_f2ll_rm:
110 case Intrinsic::nvvm_f2ll_rn:
111 case Intrinsic::nvvm_f2ll_rp:
112 case Intrinsic::nvvm_f2ll_rz:
113
114 case Intrinsic::nvvm_f2ull_rm:
115 case Intrinsic::nvvm_f2ull_rn:
116 case Intrinsic::nvvm_f2ull_rp:
117 case Intrinsic::nvvm_f2ull_rz:
118
119 case Intrinsic::nvvm_d2ll_rm:
120 case Intrinsic::nvvm_d2ll_rn:
121 case Intrinsic::nvvm_d2ll_rp:
122 case Intrinsic::nvvm_d2ll_rz:
123
124 case Intrinsic::nvvm_d2ull_rm:
125 case Intrinsic::nvvm_d2ull_rn:
126 case Intrinsic::nvvm_d2ull_rp:
127 case Intrinsic::nvvm_d2ull_rz:
128 return false;
129 }
130 llvm_unreachable("Checking FTZ flag for invalid f2i/d2i intrinsic");
131}
132
134 switch (IntrinsicID) {
135 // f2i
136 case Intrinsic::nvvm_f2i_rm:
137 case Intrinsic::nvvm_f2i_rm_ftz:
138 case Intrinsic::nvvm_f2i_rn:
139 case Intrinsic::nvvm_f2i_rn_ftz:
140 case Intrinsic::nvvm_f2i_rp:
141 case Intrinsic::nvvm_f2i_rp_ftz:
142 case Intrinsic::nvvm_f2i_rz:
143 case Intrinsic::nvvm_f2i_rz_ftz:
144 // d2i
145 case Intrinsic::nvvm_d2i_rm:
146 case Intrinsic::nvvm_d2i_rn:
147 case Intrinsic::nvvm_d2i_rp:
148 case Intrinsic::nvvm_d2i_rz:
149 // f2ll
150 case Intrinsic::nvvm_f2ll_rm:
151 case Intrinsic::nvvm_f2ll_rm_ftz:
152 case Intrinsic::nvvm_f2ll_rn:
153 case Intrinsic::nvvm_f2ll_rn_ftz:
154 case Intrinsic::nvvm_f2ll_rp:
155 case Intrinsic::nvvm_f2ll_rp_ftz:
156 case Intrinsic::nvvm_f2ll_rz:
157 case Intrinsic::nvvm_f2ll_rz_ftz:
158 // d2ll
159 case Intrinsic::nvvm_d2ll_rm:
160 case Intrinsic::nvvm_d2ll_rn:
161 case Intrinsic::nvvm_d2ll_rp:
162 case Intrinsic::nvvm_d2ll_rz:
163 return true;
164
165 // f2ui
166 case Intrinsic::nvvm_f2ui_rm:
167 case Intrinsic::nvvm_f2ui_rm_ftz:
168 case Intrinsic::nvvm_f2ui_rn:
169 case Intrinsic::nvvm_f2ui_rn_ftz:
170 case Intrinsic::nvvm_f2ui_rp:
171 case Intrinsic::nvvm_f2ui_rp_ftz:
172 case Intrinsic::nvvm_f2ui_rz:
173 case Intrinsic::nvvm_f2ui_rz_ftz:
174 // d2ui
175 case Intrinsic::nvvm_d2ui_rm:
176 case Intrinsic::nvvm_d2ui_rn:
177 case Intrinsic::nvvm_d2ui_rp:
178 case Intrinsic::nvvm_d2ui_rz:
179 // f2ull
180 case Intrinsic::nvvm_f2ull_rm:
181 case Intrinsic::nvvm_f2ull_rm_ftz:
182 case Intrinsic::nvvm_f2ull_rn:
183 case Intrinsic::nvvm_f2ull_rn_ftz:
184 case Intrinsic::nvvm_f2ull_rp:
185 case Intrinsic::nvvm_f2ull_rp_ftz:
186 case Intrinsic::nvvm_f2ull_rz:
187 case Intrinsic::nvvm_f2ull_rz_ftz:
188 // d2ull
189 case Intrinsic::nvvm_d2ull_rm:
190 case Intrinsic::nvvm_d2ull_rn:
191 case Intrinsic::nvvm_d2ull_rp:
192 case Intrinsic::nvvm_d2ull_rz:
193 return false;
194 }
196 "Checking invalid f2i/d2i intrinsic for signed int conversion");
197}
198
200 switch (IntrinsicID) {
201 // f2i
202 case Intrinsic::nvvm_f2i_rm:
203 case Intrinsic::nvvm_f2i_rn:
204 case Intrinsic::nvvm_f2i_rp:
205 case Intrinsic::nvvm_f2i_rz:
206 case Intrinsic::nvvm_f2i_rm_ftz:
207 case Intrinsic::nvvm_f2i_rn_ftz:
208 case Intrinsic::nvvm_f2i_rp_ftz:
209 case Intrinsic::nvvm_f2i_rz_ftz:
210 // f2ui
211 case Intrinsic::nvvm_f2ui_rm:
212 case Intrinsic::nvvm_f2ui_rn:
213 case Intrinsic::nvvm_f2ui_rp:
214 case Intrinsic::nvvm_f2ui_rz:
215 case Intrinsic::nvvm_f2ui_rm_ftz:
216 case Intrinsic::nvvm_f2ui_rn_ftz:
217 case Intrinsic::nvvm_f2ui_rp_ftz:
218 case Intrinsic::nvvm_f2ui_rz_ftz:
219 return true;
220 // d2i
221 case Intrinsic::nvvm_d2i_rm:
222 case Intrinsic::nvvm_d2i_rn:
223 case Intrinsic::nvvm_d2i_rp:
224 case Intrinsic::nvvm_d2i_rz:
225 // d2ui
226 case Intrinsic::nvvm_d2ui_rm:
227 case Intrinsic::nvvm_d2ui_rn:
228 case Intrinsic::nvvm_d2ui_rp:
229 case Intrinsic::nvvm_d2ui_rz:
230 // f2ll
231 case Intrinsic::nvvm_f2ll_rm:
232 case Intrinsic::nvvm_f2ll_rn:
233 case Intrinsic::nvvm_f2ll_rp:
234 case Intrinsic::nvvm_f2ll_rz:
235 case Intrinsic::nvvm_f2ll_rm_ftz:
236 case Intrinsic::nvvm_f2ll_rn_ftz:
237 case Intrinsic::nvvm_f2ll_rp_ftz:
238 case Intrinsic::nvvm_f2ll_rz_ftz:
239 // f2ull
240 case Intrinsic::nvvm_f2ull_rm:
241 case Intrinsic::nvvm_f2ull_rn:
242 case Intrinsic::nvvm_f2ull_rp:
243 case Intrinsic::nvvm_f2ull_rz:
244 case Intrinsic::nvvm_f2ull_rm_ftz:
245 case Intrinsic::nvvm_f2ull_rn_ftz:
246 case Intrinsic::nvvm_f2ull_rp_ftz:
247 case Intrinsic::nvvm_f2ull_rz_ftz:
248 // d2ll
249 case Intrinsic::nvvm_d2ll_rm:
250 case Intrinsic::nvvm_d2ll_rn:
251 case Intrinsic::nvvm_d2ll_rp:
252 case Intrinsic::nvvm_d2ll_rz:
253 // d2ull
254 case Intrinsic::nvvm_d2ull_rm:
255 case Intrinsic::nvvm_d2ull_rn:
256 case Intrinsic::nvvm_d2ull_rp:
257 case Intrinsic::nvvm_d2ull_rz:
258 return false;
259 }
260 llvm_unreachable("Checking NaN result for invalid f2i/d2i intrinsic");
261}
262
265 switch (IntrinsicID) {
266 // RM:
267 case Intrinsic::nvvm_f2i_rm:
268 case Intrinsic::nvvm_f2ui_rm:
269 case Intrinsic::nvvm_f2i_rm_ftz:
270 case Intrinsic::nvvm_f2ui_rm_ftz:
271 case Intrinsic::nvvm_d2i_rm:
272 case Intrinsic::nvvm_d2ui_rm:
273
274 case Intrinsic::nvvm_f2ll_rm:
275 case Intrinsic::nvvm_f2ull_rm:
276 case Intrinsic::nvvm_f2ll_rm_ftz:
277 case Intrinsic::nvvm_f2ull_rm_ftz:
278 case Intrinsic::nvvm_d2ll_rm:
279 case Intrinsic::nvvm_d2ull_rm:
281
282 // RN:
283 case Intrinsic::nvvm_f2i_rn:
284 case Intrinsic::nvvm_f2ui_rn:
285 case Intrinsic::nvvm_f2i_rn_ftz:
286 case Intrinsic::nvvm_f2ui_rn_ftz:
287 case Intrinsic::nvvm_d2i_rn:
288 case Intrinsic::nvvm_d2ui_rn:
289
290 case Intrinsic::nvvm_f2ll_rn:
291 case Intrinsic::nvvm_f2ull_rn:
292 case Intrinsic::nvvm_f2ll_rn_ftz:
293 case Intrinsic::nvvm_f2ull_rn_ftz:
294 case Intrinsic::nvvm_d2ll_rn:
295 case Intrinsic::nvvm_d2ull_rn:
297
298 // RP:
299 case Intrinsic::nvvm_f2i_rp:
300 case Intrinsic::nvvm_f2ui_rp:
301 case Intrinsic::nvvm_f2i_rp_ftz:
302 case Intrinsic::nvvm_f2ui_rp_ftz:
303 case Intrinsic::nvvm_d2i_rp:
304 case Intrinsic::nvvm_d2ui_rp:
305
306 case Intrinsic::nvvm_f2ll_rp:
307 case Intrinsic::nvvm_f2ull_rp:
308 case Intrinsic::nvvm_f2ll_rp_ftz:
309 case Intrinsic::nvvm_f2ull_rp_ftz:
310 case Intrinsic::nvvm_d2ll_rp:
311 case Intrinsic::nvvm_d2ull_rp:
313
314 // RZ:
315 case Intrinsic::nvvm_f2i_rz:
316 case Intrinsic::nvvm_f2ui_rz:
317 case Intrinsic::nvvm_f2i_rz_ftz:
318 case Intrinsic::nvvm_f2ui_rz_ftz:
319 case Intrinsic::nvvm_d2i_rz:
320 case Intrinsic::nvvm_d2ui_rz:
321
322 case Intrinsic::nvvm_f2ll_rz:
323 case Intrinsic::nvvm_f2ull_rz:
324 case Intrinsic::nvvm_f2ll_rz_ftz:
325 case Intrinsic::nvvm_f2ull_rz_ftz:
326 case Intrinsic::nvvm_d2ll_rz:
327 case Intrinsic::nvvm_d2ull_rz:
329 }
330 llvm_unreachable("Checking rounding mode for invalid f2i/d2i intrinsic");
331}
332
333inline bool FMinFMaxShouldFTZ(Intrinsic::ID IntrinsicID) {
334 switch (IntrinsicID) {
335 case Intrinsic::nvvm_fmax_ftz_f:
336 case Intrinsic::nvvm_fmax_ftz_nan_f:
337 case Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f:
338 case Intrinsic::nvvm_fmax_ftz_xorsign_abs_f:
339
340 case Intrinsic::nvvm_fmin_ftz_f:
341 case Intrinsic::nvvm_fmin_ftz_nan_f:
342 case Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f:
343 case Intrinsic::nvvm_fmin_ftz_xorsign_abs_f:
344 return true;
345
346 case Intrinsic::nvvm_fmax_d:
347 case Intrinsic::nvvm_fmax_f:
348 case Intrinsic::nvvm_fmax_nan_f:
349 case Intrinsic::nvvm_fmax_nan_xorsign_abs_f:
350 case Intrinsic::nvvm_fmax_xorsign_abs_f:
351
352 case Intrinsic::nvvm_fmin_d:
353 case Intrinsic::nvvm_fmin_f:
354 case Intrinsic::nvvm_fmin_nan_f:
355 case Intrinsic::nvvm_fmin_nan_xorsign_abs_f:
356 case Intrinsic::nvvm_fmin_xorsign_abs_f:
357 return false;
358 }
359 llvm_unreachable("Checking FTZ flag for invalid fmin/fmax intrinsic");
360}
361
362inline bool FMinFMaxPropagatesNaNs(Intrinsic::ID IntrinsicID) {
363 switch (IntrinsicID) {
364 case Intrinsic::nvvm_fmax_ftz_nan_f:
365 case Intrinsic::nvvm_fmax_nan_f:
366 case Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f:
367 case Intrinsic::nvvm_fmax_nan_xorsign_abs_f:
368
369 case Intrinsic::nvvm_fmin_ftz_nan_f:
370 case Intrinsic::nvvm_fmin_nan_f:
371 case Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f:
372 case Intrinsic::nvvm_fmin_nan_xorsign_abs_f:
373 return true;
374
375 case Intrinsic::nvvm_fmax_d:
376 case Intrinsic::nvvm_fmax_f:
377 case Intrinsic::nvvm_fmax_ftz_f:
378 case Intrinsic::nvvm_fmax_ftz_xorsign_abs_f:
379 case Intrinsic::nvvm_fmax_xorsign_abs_f:
380
381 case Intrinsic::nvvm_fmin_d:
382 case Intrinsic::nvvm_fmin_f:
383 case Intrinsic::nvvm_fmin_ftz_f:
384 case Intrinsic::nvvm_fmin_ftz_xorsign_abs_f:
385 case Intrinsic::nvvm_fmin_xorsign_abs_f:
386 return false;
387 }
388 llvm_unreachable("Checking NaN flag for invalid fmin/fmax intrinsic");
389}
390
391inline bool FMinFMaxIsXorSignAbs(Intrinsic::ID IntrinsicID) {
392 switch (IntrinsicID) {
393 case Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f:
394 case Intrinsic::nvvm_fmax_ftz_xorsign_abs_f:
395 case Intrinsic::nvvm_fmax_nan_xorsign_abs_f:
396 case Intrinsic::nvvm_fmax_xorsign_abs_f:
397
398 case Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f:
399 case Intrinsic::nvvm_fmin_ftz_xorsign_abs_f:
400 case Intrinsic::nvvm_fmin_nan_xorsign_abs_f:
401 case Intrinsic::nvvm_fmin_xorsign_abs_f:
402 return true;
403
404 case Intrinsic::nvvm_fmax_d:
405 case Intrinsic::nvvm_fmax_f:
406 case Intrinsic::nvvm_fmax_ftz_f:
407 case Intrinsic::nvvm_fmax_ftz_nan_f:
408 case Intrinsic::nvvm_fmax_nan_f:
409
410 case Intrinsic::nvvm_fmin_d:
411 case Intrinsic::nvvm_fmin_f:
412 case Intrinsic::nvvm_fmin_ftz_f:
413 case Intrinsic::nvvm_fmin_ftz_nan_f:
414 case Intrinsic::nvvm_fmin_nan_f:
415 return false;
416 }
417 llvm_unreachable("Checking XorSignAbs flag for invalid fmin/fmax intrinsic");
418}
419
421 switch (IntrinsicID) {
422 case Intrinsic::nvvm_ceil_ftz_f:
423 case Intrinsic::nvvm_fabs_ftz:
424 case Intrinsic::nvvm_floor_ftz_f:
425 case Intrinsic::nvvm_round_ftz_f:
426 case Intrinsic::nvvm_saturate_ftz_f:
427 case Intrinsic::nvvm_sqrt_rn_ftz_f:
428 return true;
429 case Intrinsic::nvvm_ceil_f:
430 case Intrinsic::nvvm_ceil_d:
431 case Intrinsic::nvvm_fabs:
432 case Intrinsic::nvvm_floor_f:
433 case Intrinsic::nvvm_floor_d:
434 case Intrinsic::nvvm_round_f:
435 case Intrinsic::nvvm_round_d:
436 case Intrinsic::nvvm_saturate_d:
437 case Intrinsic::nvvm_saturate_f:
438 case Intrinsic::nvvm_sqrt_f:
439 case Intrinsic::nvvm_sqrt_rn_d:
440 case Intrinsic::nvvm_sqrt_rn_f:
441 return false;
442 }
443 llvm_unreachable("Checking FTZ flag for invalid unary intrinsic");
444}
445
446inline bool RCPShouldFTZ(Intrinsic::ID IntrinsicID) {
447 switch (IntrinsicID) {
448 case Intrinsic::nvvm_rcp_rm_ftz_f:
449 case Intrinsic::nvvm_rcp_rn_ftz_f:
450 case Intrinsic::nvvm_rcp_rp_ftz_f:
451 case Intrinsic::nvvm_rcp_rz_ftz_f:
452 return true;
453 case Intrinsic::nvvm_rcp_rm_d:
454 case Intrinsic::nvvm_rcp_rm_f:
455 case Intrinsic::nvvm_rcp_rn_d:
456 case Intrinsic::nvvm_rcp_rn_f:
457 case Intrinsic::nvvm_rcp_rp_d:
458 case Intrinsic::nvvm_rcp_rp_f:
459 case Intrinsic::nvvm_rcp_rz_d:
460 case Intrinsic::nvvm_rcp_rz_f:
461 return false;
462 }
463 llvm_unreachable("Checking FTZ flag for invalid rcp intrinsic");
464}
465
467 switch (IntrinsicID) {
468 case Intrinsic::nvvm_rcp_rm_f:
469 case Intrinsic::nvvm_rcp_rm_d:
470 case Intrinsic::nvvm_rcp_rm_ftz_f:
472
473 case Intrinsic::nvvm_rcp_rn_f:
474 case Intrinsic::nvvm_rcp_rn_d:
475 case Intrinsic::nvvm_rcp_rn_ftz_f:
477
478 case Intrinsic::nvvm_rcp_rp_f:
479 case Intrinsic::nvvm_rcp_rp_d:
480 case Intrinsic::nvvm_rcp_rp_ftz_f:
482
483 case Intrinsic::nvvm_rcp_rz_f:
484 case Intrinsic::nvvm_rcp_rz_d:
485 case Intrinsic::nvvm_rcp_rz_ftz_f:
487 }
488 llvm_unreachable("Checking rounding mode for invalid rcp intrinsic");
489}
490
491inline DenormalMode GetNVVMDenormMode(bool ShouldFTZ) {
492 if (ShouldFTZ)
494 return DenormalMode::getIEEE();
495}
496
497inline bool FAddShouldFTZ(Intrinsic::ID IntrinsicID) {
498 switch (IntrinsicID) {
499 case Intrinsic::nvvm_add_rm_ftz_f:
500 case Intrinsic::nvvm_add_rn_ftz_f:
501 case Intrinsic::nvvm_add_rp_ftz_f:
502 case Intrinsic::nvvm_add_rz_ftz_f:
503 return true;
504
505 case Intrinsic::nvvm_add_rm_f:
506 case Intrinsic::nvvm_add_rn_f:
507 case Intrinsic::nvvm_add_rp_f:
508 case Intrinsic::nvvm_add_rz_f:
509 case Intrinsic::nvvm_add_rm_d:
510 case Intrinsic::nvvm_add_rn_d:
511 case Intrinsic::nvvm_add_rp_d:
512 case Intrinsic::nvvm_add_rz_d:
513 return false;
514 }
515 llvm_unreachable("Checking FTZ flag for invalid NVVM add intrinsic");
516}
517
519 switch (IntrinsicID) {
520 case Intrinsic::nvvm_add_rm_f:
521 case Intrinsic::nvvm_add_rm_d:
522 case Intrinsic::nvvm_add_rm_ftz_f:
524 case Intrinsic::nvvm_add_rn_f:
525 case Intrinsic::nvvm_add_rn_d:
526 case Intrinsic::nvvm_add_rn_ftz_f:
528 case Intrinsic::nvvm_add_rp_f:
529 case Intrinsic::nvvm_add_rp_d:
530 case Intrinsic::nvvm_add_rp_ftz_f:
532 case Intrinsic::nvvm_add_rz_f:
533 case Intrinsic::nvvm_add_rz_d:
534 case Intrinsic::nvvm_add_rz_ftz_f:
536 }
537 llvm_unreachable("Invalid FP instrinsic rounding mode for NVVM add");
538}
539
540inline bool FMulShouldFTZ(Intrinsic::ID IntrinsicID) {
541 switch (IntrinsicID) {
542 case Intrinsic::nvvm_mul_rm_ftz_f:
543 case Intrinsic::nvvm_mul_rn_ftz_f:
544 case Intrinsic::nvvm_mul_rp_ftz_f:
545 case Intrinsic::nvvm_mul_rz_ftz_f:
546 return true;
547
548 case Intrinsic::nvvm_mul_rm_f:
549 case Intrinsic::nvvm_mul_rn_f:
550 case Intrinsic::nvvm_mul_rp_f:
551 case Intrinsic::nvvm_mul_rz_f:
552 case Intrinsic::nvvm_mul_rm_d:
553 case Intrinsic::nvvm_mul_rn_d:
554 case Intrinsic::nvvm_mul_rp_d:
555 case Intrinsic::nvvm_mul_rz_d:
556 return false;
557 }
558 llvm_unreachable("Checking FTZ flag for invalid NVVM mul intrinsic");
559}
560
562 switch (IntrinsicID) {
563 case Intrinsic::nvvm_mul_rm_f:
564 case Intrinsic::nvvm_mul_rm_d:
565 case Intrinsic::nvvm_mul_rm_ftz_f:
567 case Intrinsic::nvvm_mul_rn_f:
568 case Intrinsic::nvvm_mul_rn_d:
569 case Intrinsic::nvvm_mul_rn_ftz_f:
571 case Intrinsic::nvvm_mul_rp_f:
572 case Intrinsic::nvvm_mul_rp_d:
573 case Intrinsic::nvvm_mul_rp_ftz_f:
575 case Intrinsic::nvvm_mul_rz_f:
576 case Intrinsic::nvvm_mul_rz_d:
577 case Intrinsic::nvvm_mul_rz_ftz_f:
579 }
580 llvm_unreachable("Invalid FP instrinsic rounding mode for NVVM mul");
581}
582
583inline bool FDivShouldFTZ(Intrinsic::ID IntrinsicID) {
584 switch (IntrinsicID) {
585 case Intrinsic::nvvm_div_rm_ftz_f:
586 case Intrinsic::nvvm_div_rn_ftz_f:
587 case Intrinsic::nvvm_div_rp_ftz_f:
588 case Intrinsic::nvvm_div_rz_ftz_f:
589 return true;
590
591 case Intrinsic::nvvm_div_rm_f:
592 case Intrinsic::nvvm_div_rn_f:
593 case Intrinsic::nvvm_div_rp_f:
594 case Intrinsic::nvvm_div_rz_f:
595 case Intrinsic::nvvm_div_rm_d:
596 case Intrinsic::nvvm_div_rn_d:
597 case Intrinsic::nvvm_div_rp_d:
598 case Intrinsic::nvvm_div_rz_d:
599 return false;
600 }
601 llvm_unreachable("Checking FTZ flag for invalid NVVM div intrinsic");
602}
603
605 switch (IntrinsicID) {
606 case Intrinsic::nvvm_div_rm_f:
607 case Intrinsic::nvvm_div_rm_d:
608 case Intrinsic::nvvm_div_rm_ftz_f:
610 case Intrinsic::nvvm_div_rn_f:
611 case Intrinsic::nvvm_div_rn_d:
612 case Intrinsic::nvvm_div_rn_ftz_f:
614 case Intrinsic::nvvm_div_rp_f:
615 case Intrinsic::nvvm_div_rp_d:
616 case Intrinsic::nvvm_div_rp_ftz_f:
618 case Intrinsic::nvvm_div_rz_f:
619 case Intrinsic::nvvm_div_rz_d:
620 case Intrinsic::nvvm_div_rz_ftz_f:
622 }
623 llvm_unreachable("Invalid FP instrinsic rounding mode for NVVM div");
624}
625
626inline bool FMAShouldFTZ(Intrinsic::ID IntrinsicID) {
627 switch (IntrinsicID) {
628 case Intrinsic::nvvm_fma_rm_ftz_f:
629 case Intrinsic::nvvm_fma_rn_ftz_f:
630 case Intrinsic::nvvm_fma_rp_ftz_f:
631 case Intrinsic::nvvm_fma_rz_ftz_f:
632 return true;
633
634 case Intrinsic::nvvm_fma_rm_f:
635 case Intrinsic::nvvm_fma_rn_f:
636 case Intrinsic::nvvm_fma_rp_f:
637 case Intrinsic::nvvm_fma_rz_f:
638 case Intrinsic::nvvm_fma_rm_d:
639 case Intrinsic::nvvm_fma_rn_d:
640 case Intrinsic::nvvm_fma_rp_d:
641 case Intrinsic::nvvm_fma_rz_d:
642 return false;
643 }
644 llvm_unreachable("Checking FTZ flag for invalid NVVM fma intrinsic");
645}
646
648 switch (IntrinsicID) {
649 case Intrinsic::nvvm_fma_rm_f:
650 case Intrinsic::nvvm_fma_rm_d:
651 case Intrinsic::nvvm_fma_rm_ftz_f:
653 case Intrinsic::nvvm_fma_rn_f:
654 case Intrinsic::nvvm_fma_rn_d:
655 case Intrinsic::nvvm_fma_rn_ftz_f:
657 case Intrinsic::nvvm_fma_rp_f:
658 case Intrinsic::nvvm_fma_rp_d:
659 case Intrinsic::nvvm_fma_rp_ftz_f:
661 case Intrinsic::nvvm_fma_rz_f:
662 case Intrinsic::nvvm_fma_rz_d:
663 case Intrinsic::nvvm_fma_rz_ftz_f:
665 }
666 llvm_unreachable("Invalid FP instrinsic rounding mode for NVVM fma");
667}
668
669} // namespace nvvm
670} // namespace llvm
671#endif // LLVM_IR_NVVMINTRINSICUTILS_H
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static constexpr roundingMode rmTowardZero
Definition APFloat.h:348
llvm::RoundingMode roundingMode
IEEE-754R 4.3: Rounding-direction attributes.
Definition APFloat.h:342
static constexpr roundingMode rmTowardNegative
Definition APFloat.h:347
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
static constexpr roundingMode rmTowardPositive
Definition APFloat.h:346
This is an important base class in LLVM.
Definition Constant.h:43
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
APFloat::roundingMode GetFMARoundingMode(Intrinsic::ID IntrinsicID)
DenormalMode GetNVVMDenormMode(bool ShouldFTZ)
bool FPToIntegerIntrinsicNaNZero(Intrinsic::ID IntrinsicID)
APFloat::roundingMode GetFDivRoundingMode(Intrinsic::ID IntrinsicID)
bool FPToIntegerIntrinsicResultIsSigned(Intrinsic::ID IntrinsicID)
APFloat::roundingMode GetFPToIntegerRoundingMode(Intrinsic::ID IntrinsicID)
bool RCPShouldFTZ(Intrinsic::ID IntrinsicID)
bool FPToIntegerIntrinsicShouldFTZ(Intrinsic::ID IntrinsicID)
bool FDivShouldFTZ(Intrinsic::ID IntrinsicID)
bool FAddShouldFTZ(Intrinsic::ID IntrinsicID)
bool FMinFMaxIsXorSignAbs(Intrinsic::ID IntrinsicID)
APFloat::roundingMode GetFMulRoundingMode(Intrinsic::ID IntrinsicID)
void printTcgen05MMAKind(raw_ostream &OS, const Constant *ImmArgVal)
void printTcgen05CollectorUsageOp(raw_ostream &OS, const Constant *ImmArgVal)
bool UnaryMathIntrinsicShouldFTZ(Intrinsic::ID IntrinsicID)
bool FMinFMaxShouldFTZ(Intrinsic::ID IntrinsicID)
APFloat::roundingMode GetFAddRoundingMode(Intrinsic::ID IntrinsicID)
bool FMAShouldFTZ(Intrinsic::ID IntrinsicID)
bool FMulShouldFTZ(Intrinsic::ID IntrinsicID)
APFloat::roundingMode GetRCPRoundingMode(Intrinsic::ID IntrinsicID)
bool FMinFMaxPropagatesNaNs(Intrinsic::ID IntrinsicID)
This is an optimization pass for GlobalISel generic memory operations.
Represent subnormal handling kind for floating point instruction inputs and outputs.
static constexpr DenormalMode getPreserveSign()
static constexpr DenormalMode getIEEE()