LLVM 22.0.0git
NVVMIntrinsicUtils.h
Go to the documentation of this file.
1//===--- NVVMIntrinsicUtils.h -----------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file contains the definitions of the enumerations and flags
11/// associated with NVVM Intrinsics, along with some helper functions.
12//
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_IR_NVVMINTRINSICUTILS_H
16#define LLVM_IR_NVVMINTRINSICUTILS_H
17
18#include <stdint.h>
19
20#include "llvm/ADT/APFloat.h"
21#include "llvm/ADT/APInt.h"
22#include "llvm/IR/Constants.h"
23#include "llvm/IR/Intrinsics.h"
24#include "llvm/IR/IntrinsicsNVPTX.h"
26
27namespace llvm {
28namespace nvvm {
29
30// Reduction Ops supported with TMA Copy from Shared
31// to Global Memory for the "cp.reduce.async.bulk.tensor.*"
32// family of PTX instructions.
33enum class TMAReductionOp : uint8_t {
34 ADD = 0,
35 MIN = 1,
36 MAX = 2,
37 INC = 3,
38 DEC = 4,
39 AND = 5,
40 OR = 6,
41 XOR = 7,
42};
43
44// Enum to represent the cta_group::1 and
45// cta_group::2 variants in TMA/TCGEN05 family of
46// PTX instructions.
47enum class CTAGroupKind : uint8_t {
48 CG_NONE = 0, // default with no cta_group modifier
49 CG_1 = 1, // cta_group::1 modifier
50 CG_2 = 2, // cta_group::2 modifier
51};
52
53enum class Tcgen05MMAKind : uint8_t { F16 = 0, TF32 = 1, F8F6F4 = 2, I8 = 3 };
54
58 FILL = 2,
59 USE = 3,
60};
61
63 U8 = 0,
64 U16 = 1,
65 U32 = 2,
66 S32 = 3,
67 U64 = 4,
68 S64 = 5,
69 F16 = 6,
70 F32 = 7,
72 F64 = 9,
73 BF16 = 10,
74 TF32 = 11,
76 B4x16 = 13,
79};
80
86
94
101
106
107void printTcgen05MMAKind(raw_ostream &OS, const Constant *ImmArgVal);
108
109void printTcgen05CollectorUsageOp(raw_ostream &OS, const Constant *ImmArgVal);
110
111void printTensormapElemType(raw_ostream &OS, const Constant *ImmArgVal);
112void printTensormapInterleaveLayout(raw_ostream &OS, const Constant *ImmArgVal);
113void printTensormapSwizzleMode(raw_ostream &OS, const Constant *ImmArgVal);
114void printTensormapSwizzleAtomicity(raw_ostream &OS, const Constant *ImmArgVal);
115void printTensormapFillMode(raw_ostream &OS, const Constant *ImmArgVal);
116
118 switch (IntrinsicID) {
119 case Intrinsic::nvvm_f2i_rm_ftz:
120 case Intrinsic::nvvm_f2i_rn_ftz:
121 case Intrinsic::nvvm_f2i_rp_ftz:
122 case Intrinsic::nvvm_f2i_rz_ftz:
123
124 case Intrinsic::nvvm_f2ui_rm_ftz:
125 case Intrinsic::nvvm_f2ui_rn_ftz:
126 case Intrinsic::nvvm_f2ui_rp_ftz:
127 case Intrinsic::nvvm_f2ui_rz_ftz:
128
129 case Intrinsic::nvvm_f2ll_rm_ftz:
130 case Intrinsic::nvvm_f2ll_rn_ftz:
131 case Intrinsic::nvvm_f2ll_rp_ftz:
132 case Intrinsic::nvvm_f2ll_rz_ftz:
133
134 case Intrinsic::nvvm_f2ull_rm_ftz:
135 case Intrinsic::nvvm_f2ull_rn_ftz:
136 case Intrinsic::nvvm_f2ull_rp_ftz:
137 case Intrinsic::nvvm_f2ull_rz_ftz:
138 return true;
139
140 case Intrinsic::nvvm_f2i_rm:
141 case Intrinsic::nvvm_f2i_rn:
142 case Intrinsic::nvvm_f2i_rp:
143 case Intrinsic::nvvm_f2i_rz:
144
145 case Intrinsic::nvvm_f2ui_rm:
146 case Intrinsic::nvvm_f2ui_rn:
147 case Intrinsic::nvvm_f2ui_rp:
148 case Intrinsic::nvvm_f2ui_rz:
149
150 case Intrinsic::nvvm_d2i_rm:
151 case Intrinsic::nvvm_d2i_rn:
152 case Intrinsic::nvvm_d2i_rp:
153 case Intrinsic::nvvm_d2i_rz:
154
155 case Intrinsic::nvvm_d2ui_rm:
156 case Intrinsic::nvvm_d2ui_rn:
157 case Intrinsic::nvvm_d2ui_rp:
158 case Intrinsic::nvvm_d2ui_rz:
159
160 case Intrinsic::nvvm_f2ll_rm:
161 case Intrinsic::nvvm_f2ll_rn:
162 case Intrinsic::nvvm_f2ll_rp:
163 case Intrinsic::nvvm_f2ll_rz:
164
165 case Intrinsic::nvvm_f2ull_rm:
166 case Intrinsic::nvvm_f2ull_rn:
167 case Intrinsic::nvvm_f2ull_rp:
168 case Intrinsic::nvvm_f2ull_rz:
169
170 case Intrinsic::nvvm_d2ll_rm:
171 case Intrinsic::nvvm_d2ll_rn:
172 case Intrinsic::nvvm_d2ll_rp:
173 case Intrinsic::nvvm_d2ll_rz:
174
175 case Intrinsic::nvvm_d2ull_rm:
176 case Intrinsic::nvvm_d2ull_rn:
177 case Intrinsic::nvvm_d2ull_rp:
178 case Intrinsic::nvvm_d2ull_rz:
179 return false;
180 }
181 llvm_unreachable("Checking FTZ flag for invalid f2i/d2i intrinsic");
182}
183
185 switch (IntrinsicID) {
186 // f2i
187 case Intrinsic::nvvm_f2i_rm:
188 case Intrinsic::nvvm_f2i_rm_ftz:
189 case Intrinsic::nvvm_f2i_rn:
190 case Intrinsic::nvvm_f2i_rn_ftz:
191 case Intrinsic::nvvm_f2i_rp:
192 case Intrinsic::nvvm_f2i_rp_ftz:
193 case Intrinsic::nvvm_f2i_rz:
194 case Intrinsic::nvvm_f2i_rz_ftz:
195 // d2i
196 case Intrinsic::nvvm_d2i_rm:
197 case Intrinsic::nvvm_d2i_rn:
198 case Intrinsic::nvvm_d2i_rp:
199 case Intrinsic::nvvm_d2i_rz:
200 // f2ll
201 case Intrinsic::nvvm_f2ll_rm:
202 case Intrinsic::nvvm_f2ll_rm_ftz:
203 case Intrinsic::nvvm_f2ll_rn:
204 case Intrinsic::nvvm_f2ll_rn_ftz:
205 case Intrinsic::nvvm_f2ll_rp:
206 case Intrinsic::nvvm_f2ll_rp_ftz:
207 case Intrinsic::nvvm_f2ll_rz:
208 case Intrinsic::nvvm_f2ll_rz_ftz:
209 // d2ll
210 case Intrinsic::nvvm_d2ll_rm:
211 case Intrinsic::nvvm_d2ll_rn:
212 case Intrinsic::nvvm_d2ll_rp:
213 case Intrinsic::nvvm_d2ll_rz:
214 return true;
215
216 // f2ui
217 case Intrinsic::nvvm_f2ui_rm:
218 case Intrinsic::nvvm_f2ui_rm_ftz:
219 case Intrinsic::nvvm_f2ui_rn:
220 case Intrinsic::nvvm_f2ui_rn_ftz:
221 case Intrinsic::nvvm_f2ui_rp:
222 case Intrinsic::nvvm_f2ui_rp_ftz:
223 case Intrinsic::nvvm_f2ui_rz:
224 case Intrinsic::nvvm_f2ui_rz_ftz:
225 // d2ui
226 case Intrinsic::nvvm_d2ui_rm:
227 case Intrinsic::nvvm_d2ui_rn:
228 case Intrinsic::nvvm_d2ui_rp:
229 case Intrinsic::nvvm_d2ui_rz:
230 // f2ull
231 case Intrinsic::nvvm_f2ull_rm:
232 case Intrinsic::nvvm_f2ull_rm_ftz:
233 case Intrinsic::nvvm_f2ull_rn:
234 case Intrinsic::nvvm_f2ull_rn_ftz:
235 case Intrinsic::nvvm_f2ull_rp:
236 case Intrinsic::nvvm_f2ull_rp_ftz:
237 case Intrinsic::nvvm_f2ull_rz:
238 case Intrinsic::nvvm_f2ull_rz_ftz:
239 // d2ull
240 case Intrinsic::nvvm_d2ull_rm:
241 case Intrinsic::nvvm_d2ull_rn:
242 case Intrinsic::nvvm_d2ull_rp:
243 case Intrinsic::nvvm_d2ull_rz:
244 return false;
245 }
247 "Checking invalid f2i/d2i intrinsic for signed int conversion");
248}
249
251 switch (IntrinsicID) {
252 // f2i
253 case Intrinsic::nvvm_f2i_rm:
254 case Intrinsic::nvvm_f2i_rn:
255 case Intrinsic::nvvm_f2i_rp:
256 case Intrinsic::nvvm_f2i_rz:
257 case Intrinsic::nvvm_f2i_rm_ftz:
258 case Intrinsic::nvvm_f2i_rn_ftz:
259 case Intrinsic::nvvm_f2i_rp_ftz:
260 case Intrinsic::nvvm_f2i_rz_ftz:
261 // f2ui
262 case Intrinsic::nvvm_f2ui_rm:
263 case Intrinsic::nvvm_f2ui_rn:
264 case Intrinsic::nvvm_f2ui_rp:
265 case Intrinsic::nvvm_f2ui_rz:
266 case Intrinsic::nvvm_f2ui_rm_ftz:
267 case Intrinsic::nvvm_f2ui_rn_ftz:
268 case Intrinsic::nvvm_f2ui_rp_ftz:
269 case Intrinsic::nvvm_f2ui_rz_ftz:
270 return true;
271 // d2i
272 case Intrinsic::nvvm_d2i_rm:
273 case Intrinsic::nvvm_d2i_rn:
274 case Intrinsic::nvvm_d2i_rp:
275 case Intrinsic::nvvm_d2i_rz:
276 // d2ui
277 case Intrinsic::nvvm_d2ui_rm:
278 case Intrinsic::nvvm_d2ui_rn:
279 case Intrinsic::nvvm_d2ui_rp:
280 case Intrinsic::nvvm_d2ui_rz:
281 // f2ll
282 case Intrinsic::nvvm_f2ll_rm:
283 case Intrinsic::nvvm_f2ll_rn:
284 case Intrinsic::nvvm_f2ll_rp:
285 case Intrinsic::nvvm_f2ll_rz:
286 case Intrinsic::nvvm_f2ll_rm_ftz:
287 case Intrinsic::nvvm_f2ll_rn_ftz:
288 case Intrinsic::nvvm_f2ll_rp_ftz:
289 case Intrinsic::nvvm_f2ll_rz_ftz:
290 // f2ull
291 case Intrinsic::nvvm_f2ull_rm:
292 case Intrinsic::nvvm_f2ull_rn:
293 case Intrinsic::nvvm_f2ull_rp:
294 case Intrinsic::nvvm_f2ull_rz:
295 case Intrinsic::nvvm_f2ull_rm_ftz:
296 case Intrinsic::nvvm_f2ull_rn_ftz:
297 case Intrinsic::nvvm_f2ull_rp_ftz:
298 case Intrinsic::nvvm_f2ull_rz_ftz:
299 // d2ll
300 case Intrinsic::nvvm_d2ll_rm:
301 case Intrinsic::nvvm_d2ll_rn:
302 case Intrinsic::nvvm_d2ll_rp:
303 case Intrinsic::nvvm_d2ll_rz:
304 // d2ull
305 case Intrinsic::nvvm_d2ull_rm:
306 case Intrinsic::nvvm_d2ull_rn:
307 case Intrinsic::nvvm_d2ull_rp:
308 case Intrinsic::nvvm_d2ull_rz:
309 return false;
310 }
311 llvm_unreachable("Checking NaN result for invalid f2i/d2i intrinsic");
312}
313
316 switch (IntrinsicID) {
317 // RM:
318 case Intrinsic::nvvm_f2i_rm:
319 case Intrinsic::nvvm_f2ui_rm:
320 case Intrinsic::nvvm_f2i_rm_ftz:
321 case Intrinsic::nvvm_f2ui_rm_ftz:
322 case Intrinsic::nvvm_d2i_rm:
323 case Intrinsic::nvvm_d2ui_rm:
324
325 case Intrinsic::nvvm_f2ll_rm:
326 case Intrinsic::nvvm_f2ull_rm:
327 case Intrinsic::nvvm_f2ll_rm_ftz:
328 case Intrinsic::nvvm_f2ull_rm_ftz:
329 case Intrinsic::nvvm_d2ll_rm:
330 case Intrinsic::nvvm_d2ull_rm:
332
333 // RN:
334 case Intrinsic::nvvm_f2i_rn:
335 case Intrinsic::nvvm_f2ui_rn:
336 case Intrinsic::nvvm_f2i_rn_ftz:
337 case Intrinsic::nvvm_f2ui_rn_ftz:
338 case Intrinsic::nvvm_d2i_rn:
339 case Intrinsic::nvvm_d2ui_rn:
340
341 case Intrinsic::nvvm_f2ll_rn:
342 case Intrinsic::nvvm_f2ull_rn:
343 case Intrinsic::nvvm_f2ll_rn_ftz:
344 case Intrinsic::nvvm_f2ull_rn_ftz:
345 case Intrinsic::nvvm_d2ll_rn:
346 case Intrinsic::nvvm_d2ull_rn:
348
349 // RP:
350 case Intrinsic::nvvm_f2i_rp:
351 case Intrinsic::nvvm_f2ui_rp:
352 case Intrinsic::nvvm_f2i_rp_ftz:
353 case Intrinsic::nvvm_f2ui_rp_ftz:
354 case Intrinsic::nvvm_d2i_rp:
355 case Intrinsic::nvvm_d2ui_rp:
356
357 case Intrinsic::nvvm_f2ll_rp:
358 case Intrinsic::nvvm_f2ull_rp:
359 case Intrinsic::nvvm_f2ll_rp_ftz:
360 case Intrinsic::nvvm_f2ull_rp_ftz:
361 case Intrinsic::nvvm_d2ll_rp:
362 case Intrinsic::nvvm_d2ull_rp:
364
365 // RZ:
366 case Intrinsic::nvvm_f2i_rz:
367 case Intrinsic::nvvm_f2ui_rz:
368 case Intrinsic::nvvm_f2i_rz_ftz:
369 case Intrinsic::nvvm_f2ui_rz_ftz:
370 case Intrinsic::nvvm_d2i_rz:
371 case Intrinsic::nvvm_d2ui_rz:
372
373 case Intrinsic::nvvm_f2ll_rz:
374 case Intrinsic::nvvm_f2ull_rz:
375 case Intrinsic::nvvm_f2ll_rz_ftz:
376 case Intrinsic::nvvm_f2ull_rz_ftz:
377 case Intrinsic::nvvm_d2ll_rz:
378 case Intrinsic::nvvm_d2ull_rz:
380 }
381 llvm_unreachable("Checking rounding mode for invalid f2i/d2i intrinsic");
382}
383
384inline bool FMinFMaxShouldFTZ(Intrinsic::ID IntrinsicID) {
385 switch (IntrinsicID) {
386 case Intrinsic::nvvm_fmax_ftz_f:
387 case Intrinsic::nvvm_fmax_ftz_nan_f:
388 case Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f:
389 case Intrinsic::nvvm_fmax_ftz_xorsign_abs_f:
390
391 case Intrinsic::nvvm_fmin_ftz_f:
392 case Intrinsic::nvvm_fmin_ftz_nan_f:
393 case Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f:
394 case Intrinsic::nvvm_fmin_ftz_xorsign_abs_f:
395 return true;
396
397 case Intrinsic::nvvm_fmax_d:
398 case Intrinsic::nvvm_fmax_f:
399 case Intrinsic::nvvm_fmax_nan_f:
400 case Intrinsic::nvvm_fmax_nan_xorsign_abs_f:
401 case Intrinsic::nvvm_fmax_xorsign_abs_f:
402
403 case Intrinsic::nvvm_fmin_d:
404 case Intrinsic::nvvm_fmin_f:
405 case Intrinsic::nvvm_fmin_nan_f:
406 case Intrinsic::nvvm_fmin_nan_xorsign_abs_f:
407 case Intrinsic::nvvm_fmin_xorsign_abs_f:
408 return false;
409 }
410 llvm_unreachable("Checking FTZ flag for invalid fmin/fmax intrinsic");
411}
412
413inline bool FMinFMaxPropagatesNaNs(Intrinsic::ID IntrinsicID) {
414 switch (IntrinsicID) {
415 case Intrinsic::nvvm_fmax_ftz_nan_f:
416 case Intrinsic::nvvm_fmax_nan_f:
417 case Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f:
418 case Intrinsic::nvvm_fmax_nan_xorsign_abs_f:
419
420 case Intrinsic::nvvm_fmin_ftz_nan_f:
421 case Intrinsic::nvvm_fmin_nan_f:
422 case Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f:
423 case Intrinsic::nvvm_fmin_nan_xorsign_abs_f:
424 return true;
425
426 case Intrinsic::nvvm_fmax_d:
427 case Intrinsic::nvvm_fmax_f:
428 case Intrinsic::nvvm_fmax_ftz_f:
429 case Intrinsic::nvvm_fmax_ftz_xorsign_abs_f:
430 case Intrinsic::nvvm_fmax_xorsign_abs_f:
431
432 case Intrinsic::nvvm_fmin_d:
433 case Intrinsic::nvvm_fmin_f:
434 case Intrinsic::nvvm_fmin_ftz_f:
435 case Intrinsic::nvvm_fmin_ftz_xorsign_abs_f:
436 case Intrinsic::nvvm_fmin_xorsign_abs_f:
437 return false;
438 }
439 llvm_unreachable("Checking NaN flag for invalid fmin/fmax intrinsic");
440}
441
442inline bool FMinFMaxIsXorSignAbs(Intrinsic::ID IntrinsicID) {
443 switch (IntrinsicID) {
444 case Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f:
445 case Intrinsic::nvvm_fmax_ftz_xorsign_abs_f:
446 case Intrinsic::nvvm_fmax_nan_xorsign_abs_f:
447 case Intrinsic::nvvm_fmax_xorsign_abs_f:
448
449 case Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f:
450 case Intrinsic::nvvm_fmin_ftz_xorsign_abs_f:
451 case Intrinsic::nvvm_fmin_nan_xorsign_abs_f:
452 case Intrinsic::nvvm_fmin_xorsign_abs_f:
453 return true;
454
455 case Intrinsic::nvvm_fmax_d:
456 case Intrinsic::nvvm_fmax_f:
457 case Intrinsic::nvvm_fmax_ftz_f:
458 case Intrinsic::nvvm_fmax_ftz_nan_f:
459 case Intrinsic::nvvm_fmax_nan_f:
460
461 case Intrinsic::nvvm_fmin_d:
462 case Intrinsic::nvvm_fmin_f:
463 case Intrinsic::nvvm_fmin_ftz_f:
464 case Intrinsic::nvvm_fmin_ftz_nan_f:
465 case Intrinsic::nvvm_fmin_nan_f:
466 return false;
467 }
468 llvm_unreachable("Checking XorSignAbs flag for invalid fmin/fmax intrinsic");
469}
470
472 switch (IntrinsicID) {
473 case Intrinsic::nvvm_ceil_ftz_f:
474 case Intrinsic::nvvm_fabs_ftz:
475 case Intrinsic::nvvm_floor_ftz_f:
476 case Intrinsic::nvvm_round_ftz_f:
477 case Intrinsic::nvvm_saturate_ftz_f:
478 case Intrinsic::nvvm_sqrt_rn_ftz_f:
479 return true;
480 case Intrinsic::nvvm_ceil_f:
481 case Intrinsic::nvvm_ceil_d:
482 case Intrinsic::nvvm_fabs:
483 case Intrinsic::nvvm_floor_f:
484 case Intrinsic::nvvm_floor_d:
485 case Intrinsic::nvvm_round_f:
486 case Intrinsic::nvvm_round_d:
487 case Intrinsic::nvvm_saturate_d:
488 case Intrinsic::nvvm_saturate_f:
489 case Intrinsic::nvvm_sqrt_f:
490 case Intrinsic::nvvm_sqrt_rn_d:
491 case Intrinsic::nvvm_sqrt_rn_f:
492 return false;
493 }
494 llvm_unreachable("Checking FTZ flag for invalid unary intrinsic");
495}
496
497inline bool RCPShouldFTZ(Intrinsic::ID IntrinsicID) {
498 switch (IntrinsicID) {
499 case Intrinsic::nvvm_rcp_rm_ftz_f:
500 case Intrinsic::nvvm_rcp_rn_ftz_f:
501 case Intrinsic::nvvm_rcp_rp_ftz_f:
502 case Intrinsic::nvvm_rcp_rz_ftz_f:
503 return true;
504 case Intrinsic::nvvm_rcp_rm_d:
505 case Intrinsic::nvvm_rcp_rm_f:
506 case Intrinsic::nvvm_rcp_rn_d:
507 case Intrinsic::nvvm_rcp_rn_f:
508 case Intrinsic::nvvm_rcp_rp_d:
509 case Intrinsic::nvvm_rcp_rp_f:
510 case Intrinsic::nvvm_rcp_rz_d:
511 case Intrinsic::nvvm_rcp_rz_f:
512 return false;
513 }
514 llvm_unreachable("Checking FTZ flag for invalid rcp intrinsic");
515}
516
518 switch (IntrinsicID) {
519 case Intrinsic::nvvm_rcp_rm_f:
520 case Intrinsic::nvvm_rcp_rm_d:
521 case Intrinsic::nvvm_rcp_rm_ftz_f:
523
524 case Intrinsic::nvvm_rcp_rn_f:
525 case Intrinsic::nvvm_rcp_rn_d:
526 case Intrinsic::nvvm_rcp_rn_ftz_f:
528
529 case Intrinsic::nvvm_rcp_rp_f:
530 case Intrinsic::nvvm_rcp_rp_d:
531 case Intrinsic::nvvm_rcp_rp_ftz_f:
533
534 case Intrinsic::nvvm_rcp_rz_f:
535 case Intrinsic::nvvm_rcp_rz_d:
536 case Intrinsic::nvvm_rcp_rz_ftz_f:
538 }
539 llvm_unreachable("Checking rounding mode for invalid rcp intrinsic");
540}
541
542inline DenormalMode GetNVVMDenormMode(bool ShouldFTZ) {
543 if (ShouldFTZ)
545 return DenormalMode::getIEEE();
546}
547
548inline bool FAddShouldFTZ(Intrinsic::ID IntrinsicID) {
549 switch (IntrinsicID) {
550 case Intrinsic::nvvm_add_rm_ftz_f:
551 case Intrinsic::nvvm_add_rn_ftz_f:
552 case Intrinsic::nvvm_add_rp_ftz_f:
553 case Intrinsic::nvvm_add_rz_ftz_f:
554 return true;
555
556 case Intrinsic::nvvm_add_rm_f:
557 case Intrinsic::nvvm_add_rn_f:
558 case Intrinsic::nvvm_add_rp_f:
559 case Intrinsic::nvvm_add_rz_f:
560 case Intrinsic::nvvm_add_rm_d:
561 case Intrinsic::nvvm_add_rn_d:
562 case Intrinsic::nvvm_add_rp_d:
563 case Intrinsic::nvvm_add_rz_d:
564 return false;
565 }
566 llvm_unreachable("Checking FTZ flag for invalid NVVM add intrinsic");
567}
568
570 switch (IntrinsicID) {
571 case Intrinsic::nvvm_add_rm_f:
572 case Intrinsic::nvvm_add_rm_d:
573 case Intrinsic::nvvm_add_rm_ftz_f:
575 case Intrinsic::nvvm_add_rn_f:
576 case Intrinsic::nvvm_add_rn_d:
577 case Intrinsic::nvvm_add_rn_ftz_f:
579 case Intrinsic::nvvm_add_rp_f:
580 case Intrinsic::nvvm_add_rp_d:
581 case Intrinsic::nvvm_add_rp_ftz_f:
583 case Intrinsic::nvvm_add_rz_f:
584 case Intrinsic::nvvm_add_rz_d:
585 case Intrinsic::nvvm_add_rz_ftz_f:
587 }
588 llvm_unreachable("Invalid FP instrinsic rounding mode for NVVM add");
589}
590
591inline bool FMulShouldFTZ(Intrinsic::ID IntrinsicID) {
592 switch (IntrinsicID) {
593 case Intrinsic::nvvm_mul_rm_ftz_f:
594 case Intrinsic::nvvm_mul_rn_ftz_f:
595 case Intrinsic::nvvm_mul_rp_ftz_f:
596 case Intrinsic::nvvm_mul_rz_ftz_f:
597 return true;
598
599 case Intrinsic::nvvm_mul_rm_f:
600 case Intrinsic::nvvm_mul_rn_f:
601 case Intrinsic::nvvm_mul_rp_f:
602 case Intrinsic::nvvm_mul_rz_f:
603 case Intrinsic::nvvm_mul_rm_d:
604 case Intrinsic::nvvm_mul_rn_d:
605 case Intrinsic::nvvm_mul_rp_d:
606 case Intrinsic::nvvm_mul_rz_d:
607 return false;
608 }
609 llvm_unreachable("Checking FTZ flag for invalid NVVM mul intrinsic");
610}
611
613 switch (IntrinsicID) {
614 case Intrinsic::nvvm_mul_rm_f:
615 case Intrinsic::nvvm_mul_rm_d:
616 case Intrinsic::nvvm_mul_rm_ftz_f:
618 case Intrinsic::nvvm_mul_rn_f:
619 case Intrinsic::nvvm_mul_rn_d:
620 case Intrinsic::nvvm_mul_rn_ftz_f:
622 case Intrinsic::nvvm_mul_rp_f:
623 case Intrinsic::nvvm_mul_rp_d:
624 case Intrinsic::nvvm_mul_rp_ftz_f:
626 case Intrinsic::nvvm_mul_rz_f:
627 case Intrinsic::nvvm_mul_rz_d:
628 case Intrinsic::nvvm_mul_rz_ftz_f:
630 }
631 llvm_unreachable("Invalid FP instrinsic rounding mode for NVVM mul");
632}
633
634inline bool FDivShouldFTZ(Intrinsic::ID IntrinsicID) {
635 switch (IntrinsicID) {
636 case Intrinsic::nvvm_div_rm_ftz_f:
637 case Intrinsic::nvvm_div_rn_ftz_f:
638 case Intrinsic::nvvm_div_rp_ftz_f:
639 case Intrinsic::nvvm_div_rz_ftz_f:
640 return true;
641
642 case Intrinsic::nvvm_div_rm_f:
643 case Intrinsic::nvvm_div_rn_f:
644 case Intrinsic::nvvm_div_rp_f:
645 case Intrinsic::nvvm_div_rz_f:
646 case Intrinsic::nvvm_div_rm_d:
647 case Intrinsic::nvvm_div_rn_d:
648 case Intrinsic::nvvm_div_rp_d:
649 case Intrinsic::nvvm_div_rz_d:
650 return false;
651 }
652 llvm_unreachable("Checking FTZ flag for invalid NVVM div intrinsic");
653}
654
656 switch (IntrinsicID) {
657 case Intrinsic::nvvm_div_rm_f:
658 case Intrinsic::nvvm_div_rm_d:
659 case Intrinsic::nvvm_div_rm_ftz_f:
661 case Intrinsic::nvvm_div_rn_f:
662 case Intrinsic::nvvm_div_rn_d:
663 case Intrinsic::nvvm_div_rn_ftz_f:
665 case Intrinsic::nvvm_div_rp_f:
666 case Intrinsic::nvvm_div_rp_d:
667 case Intrinsic::nvvm_div_rp_ftz_f:
669 case Intrinsic::nvvm_div_rz_f:
670 case Intrinsic::nvvm_div_rz_d:
671 case Intrinsic::nvvm_div_rz_ftz_f:
673 }
674 llvm_unreachable("Invalid FP instrinsic rounding mode for NVVM div");
675}
676
677inline bool FMAShouldFTZ(Intrinsic::ID IntrinsicID) {
678 switch (IntrinsicID) {
679 case Intrinsic::nvvm_fma_rm_ftz_f:
680 case Intrinsic::nvvm_fma_rn_ftz_f:
681 case Intrinsic::nvvm_fma_rp_ftz_f:
682 case Intrinsic::nvvm_fma_rz_ftz_f:
683 return true;
684
685 case Intrinsic::nvvm_fma_rm_f:
686 case Intrinsic::nvvm_fma_rn_f:
687 case Intrinsic::nvvm_fma_rp_f:
688 case Intrinsic::nvvm_fma_rz_f:
689 case Intrinsic::nvvm_fma_rm_d:
690 case Intrinsic::nvvm_fma_rn_d:
691 case Intrinsic::nvvm_fma_rp_d:
692 case Intrinsic::nvvm_fma_rz_d:
693 return false;
694 }
695 llvm_unreachable("Checking FTZ flag for invalid NVVM fma intrinsic");
696}
697
699 switch (IntrinsicID) {
700 case Intrinsic::nvvm_fma_rm_f:
701 case Intrinsic::nvvm_fma_rm_d:
702 case Intrinsic::nvvm_fma_rm_ftz_f:
704 case Intrinsic::nvvm_fma_rn_f:
705 case Intrinsic::nvvm_fma_rn_d:
706 case Intrinsic::nvvm_fma_rn_ftz_f:
708 case Intrinsic::nvvm_fma_rp_f:
709 case Intrinsic::nvvm_fma_rp_d:
710 case Intrinsic::nvvm_fma_rp_ftz_f:
712 case Intrinsic::nvvm_fma_rz_f:
713 case Intrinsic::nvvm_fma_rz_d:
714 case Intrinsic::nvvm_fma_rz_ftz_f:
716 }
717 llvm_unreachable("Invalid FP instrinsic rounding mode for NVVM fma");
718}
719
720} // namespace nvvm
721} // namespace llvm
722#endif // LLVM_IR_NVVMINTRINSICUTILS_H
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static constexpr roundingMode rmTowardZero
Definition APFloat.h:348
llvm::RoundingMode roundingMode
IEEE-754R 4.3: Rounding-direction attributes.
Definition APFloat.h:342
static constexpr roundingMode rmTowardNegative
Definition APFloat.h:347
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
static constexpr roundingMode rmTowardPositive
Definition APFloat.h:346
This is an important base class in LLVM.
Definition Constant.h:43
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
APFloat::roundingMode GetFMARoundingMode(Intrinsic::ID IntrinsicID)
void printTensormapSwizzleMode(raw_ostream &OS, const Constant *ImmArgVal)
DenormalMode GetNVVMDenormMode(bool ShouldFTZ)
bool FPToIntegerIntrinsicNaNZero(Intrinsic::ID IntrinsicID)
void printTensormapInterleaveLayout(raw_ostream &OS, const Constant *ImmArgVal)
APFloat::roundingMode GetFDivRoundingMode(Intrinsic::ID IntrinsicID)
void printTensormapSwizzleAtomicity(raw_ostream &OS, const Constant *ImmArgVal)
bool FPToIntegerIntrinsicResultIsSigned(Intrinsic::ID IntrinsicID)
APFloat::roundingMode GetFPToIntegerRoundingMode(Intrinsic::ID IntrinsicID)
bool RCPShouldFTZ(Intrinsic::ID IntrinsicID)
bool FPToIntegerIntrinsicShouldFTZ(Intrinsic::ID IntrinsicID)
bool FDivShouldFTZ(Intrinsic::ID IntrinsicID)
bool FAddShouldFTZ(Intrinsic::ID IntrinsicID)
bool FMinFMaxIsXorSignAbs(Intrinsic::ID IntrinsicID)
APFloat::roundingMode GetFMulRoundingMode(Intrinsic::ID IntrinsicID)
void printTcgen05MMAKind(raw_ostream &OS, const Constant *ImmArgVal)
void printTcgen05CollectorUsageOp(raw_ostream &OS, const Constant *ImmArgVal)
bool UnaryMathIntrinsicShouldFTZ(Intrinsic::ID IntrinsicID)
bool FMinFMaxShouldFTZ(Intrinsic::ID IntrinsicID)
APFloat::roundingMode GetFAddRoundingMode(Intrinsic::ID IntrinsicID)
bool FMAShouldFTZ(Intrinsic::ID IntrinsicID)
bool FMulShouldFTZ(Intrinsic::ID IntrinsicID)
void printTensormapFillMode(raw_ostream &OS, const Constant *ImmArgVal)
APFloat::roundingMode GetRCPRoundingMode(Intrinsic::ID IntrinsicID)
bool FMinFMaxPropagatesNaNs(Intrinsic::ID IntrinsicID)
void printTensormapElemType(raw_ostream &OS, const Constant *ImmArgVal)
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
Represent subnormal handling kind for floating point instruction inputs and outputs.
static constexpr DenormalMode getPreserveSign()
static constexpr DenormalMode getIEEE()