LLVM 23.0.0git
APFloat.cpp
Go to the documentation of this file.
1//===-- APFloat.cpp - Implement APFloat class -----------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements a class to represent arbitrary precision floating
10// point values and provide a variety of arithmetic operations on them.
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/ADT/APFloat.h"
15#include "llvm/ADT/APSInt.h"
16#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/FoldingSet.h"
19#include "llvm/ADT/Hashing.h"
20#include "llvm/ADT/STLExtras.h"
22#include "llvm/ADT/StringRef.h"
24#include "llvm/Config/llvm-config.h"
25#include "llvm/Support/Debug.h"
26#include "llvm/Support/Error.h"
29#include <cstring>
30#include <limits.h>
31
32/// Shared headers from LLVM libc
33/// Make sure to add ${LLVM_SOURCE_DIR}/../libc to include directories.
34///
35/// Notes: So far it looks like APFloat does not check errnos or floating-point
36/// exceptions after calling the math functions, so we will configure LLVM libc
37/// math functions to skip setting errnos and floating-point exceptions
38/// explicitly. We also put them in a separate namespace so that the symbols
39/// do not clash with other libc math builds just in case.
40#define LIBC_NAMESPACE __llvm_libc_apfloat
41#define LIBC_MATH (LIBC_MATH_NO_ERRNO | LIBC_MATH_NO_EXCEPT)
42
43#include "shared/math.h"
44
45#define APFLOAT_DISPATCH_ON_SEMANTICS(METHOD_CALL) \
46 do { \
47 if (usesLayout<IEEEFloat>(getSemantics())) \
48 return U.IEEE.METHOD_CALL; \
49 if (usesLayout<DoubleAPFloat>(getSemantics())) \
50 return U.Double.METHOD_CALL; \
51 llvm_unreachable("Unexpected semantics"); \
52 } while (false)
53
54using namespace llvm;
55
56/// A macro used to combine two fcCategory enums into one key which can be used
57/// in a switch statement to classify how the interaction of two APFloat's
58/// categories affects an operation.
59///
60/// TODO: If clang source code is ever allowed to use constexpr in its own
61/// codebase, change this into a static inline function.
62#define PackCategoriesIntoKey(_lhs, _rhs) ((_lhs) * 4 + (_rhs))
63
64/* Assumed in hexadecimal significand parsing, and conversion to
65 hexadecimal strings. */
66static_assert(APFloatBase::integerPartWidth % 4 == 0, "Part width must be divisible by 4!");
67
68namespace llvm {
69
70constexpr fltSemantics APFloatBase::semIEEEhalf = {15, -14, 11, 16};
71constexpr fltSemantics APFloatBase::semBFloat = {127, -126, 8, 16};
72constexpr fltSemantics APFloatBase::semIEEEsingle = {127, -126, 24, 32};
73constexpr fltSemantics APFloatBase::semIEEEdouble = {1023, -1022, 53, 64};
74constexpr fltSemantics APFloatBase::semIEEEquad = {16383, -16382, 113, 128};
75constexpr fltSemantics APFloatBase::semFloat8E5M2 = {15, -14, 3, 8};
76constexpr fltSemantics APFloatBase::semFloat8E5M2FNUZ = {
78constexpr fltSemantics APFloatBase::semFloat8E4M3 = {7, -6, 4, 8};
79constexpr fltSemantics APFloatBase::semFloat8E4M3FN = {
81constexpr fltSemantics APFloatBase::semFloat8E4M3FNUZ = {
83constexpr fltSemantics APFloatBase::semFloat8E4M3B11FNUZ = {
85constexpr fltSemantics APFloatBase::semFloat8E3M4 = {3, -2, 5, 8};
86constexpr fltSemantics APFloatBase::semFloatTF32 = {127, -126, 11, 19};
87constexpr fltSemantics APFloatBase::semFloat8E8M0FNU = {
88 127,
89 -127,
90 1,
91 8,
94 false,
95 false,
96 false};
97
98constexpr fltSemantics APFloatBase::semFloat6E3M2FN = {
100constexpr fltSemantics APFloatBase::semFloat6E2M3FN = {
102constexpr fltSemantics APFloatBase::semFloat4E2M1FN = {
104constexpr fltSemantics APFloatBase::semX87DoubleExtended = {16383, -16382, 64,
105 80};
106constexpr fltSemantics APFloatBase::semBogus = {0, 0, 0, 0};
107constexpr fltSemantics APFloatBase::semPPCDoubleDouble = {-1, 0, 0, 128};
108constexpr fltSemantics APFloatBase::semPPCDoubleDoubleLegacy = {
109 1023, -1022 + 53, 53 + 53, 128};
110
112 switch (S) {
113 case S_IEEEhalf:
114 return IEEEhalf();
115 case S_BFloat:
116 return BFloat();
117 case S_IEEEsingle:
118 return IEEEsingle();
119 case S_IEEEdouble:
120 return IEEEdouble();
121 case S_IEEEquad:
122 return IEEEquad();
124 return PPCDoubleDouble();
126 return PPCDoubleDoubleLegacy();
127 case S_Float8E5M2:
128 return Float8E5M2();
129 case S_Float8E5M2FNUZ:
130 return Float8E5M2FNUZ();
131 case S_Float8E4M3:
132 return Float8E4M3();
133 case S_Float8E4M3FN:
134 return Float8E4M3FN();
135 case S_Float8E4M3FNUZ:
136 return Float8E4M3FNUZ();
138 return Float8E4M3B11FNUZ();
139 case S_Float8E3M4:
140 return Float8E3M4();
141 case S_FloatTF32:
142 return FloatTF32();
143 case S_Float8E8M0FNU:
144 return Float8E8M0FNU();
145 case S_Float6E3M2FN:
146 return Float6E3M2FN();
147 case S_Float6E2M3FN:
148 return Float6E2M3FN();
149 case S_Float4E2M1FN:
150 return Float4E2M1FN();
152 return x87DoubleExtended();
153 }
154 llvm_unreachable("Unrecognised floating semantics");
155}
156
159 if (&Sem == &llvm::APFloat::IEEEhalf())
160 return S_IEEEhalf;
161 else if (&Sem == &llvm::APFloat::BFloat())
162 return S_BFloat;
163 else if (&Sem == &llvm::APFloat::IEEEsingle())
164 return S_IEEEsingle;
165 else if (&Sem == &llvm::APFloat::IEEEdouble())
166 return S_IEEEdouble;
167 else if (&Sem == &llvm::APFloat::IEEEquad())
168 return S_IEEEquad;
169 else if (&Sem == &llvm::APFloat::PPCDoubleDouble())
170 return S_PPCDoubleDouble;
171 else if (&Sem == &llvm::APFloat::PPCDoubleDoubleLegacy())
173 else if (&Sem == &llvm::APFloat::Float8E5M2())
174 return S_Float8E5M2;
175 else if (&Sem == &llvm::APFloat::Float8E5M2FNUZ())
176 return S_Float8E5M2FNUZ;
177 else if (&Sem == &llvm::APFloat::Float8E4M3())
178 return S_Float8E4M3;
179 else if (&Sem == &llvm::APFloat::Float8E4M3FN())
180 return S_Float8E4M3FN;
181 else if (&Sem == &llvm::APFloat::Float8E4M3FNUZ())
182 return S_Float8E4M3FNUZ;
183 else if (&Sem == &llvm::APFloat::Float8E4M3B11FNUZ())
184 return S_Float8E4M3B11FNUZ;
185 else if (&Sem == &llvm::APFloat::Float8E3M4())
186 return S_Float8E3M4;
187 else if (&Sem == &llvm::APFloat::FloatTF32())
188 return S_FloatTF32;
189 else if (&Sem == &llvm::APFloat::Float8E8M0FNU())
190 return S_Float8E8M0FNU;
191 else if (&Sem == &llvm::APFloat::Float6E3M2FN())
192 return S_Float6E3M2FN;
193 else if (&Sem == &llvm::APFloat::Float6E2M3FN())
194 return S_Float6E2M3FN;
195 else if (&Sem == &llvm::APFloat::Float4E2M1FN())
196 return S_Float4E2M1FN;
197 else if (&Sem == &llvm::APFloat::x87DoubleExtended())
198 return S_x87DoubleExtended;
199 else
200 llvm_unreachable("Unknown floating semantics");
201}
202
204 const fltSemantics &B) {
205 return A.maxExponent <= B.maxExponent && A.minExponent >= B.minExponent &&
206 A.precision <= B.precision;
207}
208
209/* A tight upper bound on number of parts required to hold the value
210 pow(5, power) is
211
212 power * 815 / (351 * integerPartWidth) + 1
213
214 However, whilst the result may require only this many parts,
215 because we are multiplying two values to get it, the
216 multiplication may require an extra part with the excess part
217 being zero (consider the trivial case of 1 * 1, tcFullMultiply
218 requires two parts to hold the single-part result). So we add an
219 extra one to guarantee enough space whilst multiplying. */
220const unsigned int maxExponent = 16383;
221const unsigned int maxPrecision = 113;
223const unsigned int maxPowerOfFiveParts =
224 2 +
226
227unsigned int APFloatBase::semanticsPrecision(const fltSemantics &semantics) {
228 return semantics.precision;
229}
232 return semantics.maxExponent;
233}
236 return semantics.minExponent;
237}
238unsigned int APFloatBase::semanticsSizeInBits(const fltSemantics &semantics) {
239 return semantics.sizeInBits;
240}
242 bool isSigned) {
243 // The max FP value is pow(2, MaxExponent) * (1 + MaxFraction), so we need
244 // at least one more bit than the MaxExponent to hold the max FP value.
245 unsigned int MinBitWidth = semanticsMaxExponent(semantics) + 1;
246 // Extra sign bit needed.
247 if (isSigned)
248 ++MinBitWidth;
249 return MinBitWidth;
250}
251
253 return semantics.hasZero;
254}
255
257 return semantics.hasSignedRepr;
258}
259
263
267
269 // Keep in sync with Type::isIEEELikeFPTy
270 return SemanticsToEnum(semantics) <= S_IEEEquad;
271}
272
274 return semantics.hasSignBitInMSB;
275}
276
278 const fltSemantics &Dst) {
279 // Exponent range must be larger.
280 if (Src.maxExponent >= Dst.maxExponent || Src.minExponent <= Dst.minExponent)
281 return false;
282
283 // If the mantissa is long enough, the result value could still be denormal
284 // with a larger exponent range.
285 //
286 // FIXME: This condition is probably not accurate but also shouldn't be a
287 // practical concern with existing types.
288 return Dst.precision >= Src.precision;
289}
290
292 return Sem.sizeInBits;
293}
294
295static constexpr APFloatBase::ExponentType
296exponentZero(const fltSemantics &semantics) {
297 return semantics.minExponent - 1;
298}
299
300static constexpr APFloatBase::ExponentType
301exponentInf(const fltSemantics &semantics) {
302 return semantics.maxExponent + 1;
303}
304
305static constexpr APFloatBase::ExponentType
306exponentNaN(const fltSemantics &semantics) {
309 return exponentZero(semantics);
310 if (semantics.hasSignedRepr)
311 return semantics.maxExponent;
312 }
313 return semantics.maxExponent + 1;
314}
315
316/* A bunch of private, handy routines. */
317
318static inline Error createError(const Twine &Err) {
320}
321
322static constexpr inline unsigned int partCountForBits(unsigned int bits) {
323 return std::max(1u, (bits + APFloatBase::integerPartWidth - 1) /
325}
326
327/* Returns 0U-9U. Return values >= 10U are not digits. */
328static inline unsigned int
329decDigitValue(unsigned int c)
330{
331 return c - '0';
332}
333
334/* Return the value of a decimal exponent of the form
335 [+-]ddddddd.
336
337 If the exponent overflows, returns a large exponent with the
338 appropriate sign. */
341 const unsigned int overlargeExponent = 24000; /* FIXME. */
342 StringRef::iterator p = begin;
343
344 // Treat no exponent as 0 to match binutils
345 if (p == end || ((*p == '-' || *p == '+') && (p + 1) == end))
346 return 0;
347
348 bool isNegative = *p == '-';
349 if (*p == '-' || *p == '+') {
350 p++;
351 if (p == end)
352 return createError("Exponent has no digits");
353 }
354
355 unsigned absExponent = decDigitValue(*p++);
356 if (absExponent >= 10U)
357 return createError("Invalid character in exponent");
358
359 for (; p != end; ++p) {
360 unsigned value = decDigitValue(*p);
361 if (value >= 10U)
362 return createError("Invalid character in exponent");
363
364 absExponent = absExponent * 10U + value;
365 if (absExponent >= overlargeExponent) {
366 absExponent = overlargeExponent;
367 break;
368 }
369 }
370
371 if (isNegative)
372 return -(int) absExponent;
373 else
374 return (int) absExponent;
375}
376
377/* This is ugly and needs cleaning up, but I don't immediately see
378 how whilst remaining safe. */
381 int exponentAdjustment) {
382 int exponent = 0;
383
384 if (p == end)
385 return createError("Exponent has no digits");
386
387 bool negative = *p == '-';
388 if (*p == '-' || *p == '+') {
389 p++;
390 if (p == end)
391 return createError("Exponent has no digits");
392 }
393
394 int unsignedExponent = 0;
395 bool overflow = false;
396 for (; p != end; ++p) {
397 unsigned int value;
398
399 value = decDigitValue(*p);
400 if (value >= 10U)
401 return createError("Invalid character in exponent");
402
403 unsignedExponent = unsignedExponent * 10 + value;
404 if (unsignedExponent > 32767) {
405 overflow = true;
406 break;
407 }
408 }
409
410 if (exponentAdjustment > 32767 || exponentAdjustment < -32768)
411 overflow = true;
412
413 if (!overflow) {
414 exponent = unsignedExponent;
415 if (negative)
416 exponent = -exponent;
417 exponent += exponentAdjustment;
418 if (exponent > 32767 || exponent < -32768)
419 overflow = true;
420 }
421
422 if (overflow)
423 exponent = negative ? -32768: 32767;
424
425 return exponent;
426}
427
430 StringRef::iterator *dot) {
431 StringRef::iterator p = begin;
432 *dot = end;
433 while (p != end && *p == '0')
434 p++;
435
436 if (p != end && *p == '.') {
437 *dot = p++;
438
439 if (end - begin == 1)
440 return createError("Significand has no digits");
441
442 while (p != end && *p == '0')
443 p++;
444 }
445
446 return p;
447}
448
449/* Given a normal decimal floating point number of the form
450
451 dddd.dddd[eE][+-]ddd
452
453 where the decimal point and exponent are optional, fill out the
454 structure D. Exponent is appropriate if the significand is
455 treated as an integer, and normalizedExponent if the significand
456 is taken to have the decimal point after a single leading
457 non-zero digit.
458
459 If the value is zero, V->firstSigDigit points to a non-digit, and
460 the return exponent is zero.
461*/
463 const char *firstSigDigit;
464 const char *lastSigDigit;
467};
468
471 StringRef::iterator dot = end;
472
473 auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot);
474 if (!PtrOrErr)
475 return PtrOrErr.takeError();
476 StringRef::iterator p = *PtrOrErr;
477
478 D->firstSigDigit = p;
479 D->exponent = 0;
480 D->normalizedExponent = 0;
481
482 for (; p != end; ++p) {
483 if (*p == '.') {
484 if (dot != end)
485 return createError("String contains multiple dots");
486 dot = p++;
487 if (p == end)
488 break;
489 }
490 if (decDigitValue(*p) >= 10U)
491 break;
492 }
493
494 if (p != end) {
495 if (*p != 'e' && *p != 'E')
496 return createError("Invalid character in significand");
497 if (p == begin)
498 return createError("Significand has no digits");
499 if (dot != end && p - begin == 1)
500 return createError("Significand has no digits");
501
502 /* p points to the first non-digit in the string */
503 auto ExpOrErr = readExponent(p + 1, end);
504 if (!ExpOrErr)
505 return ExpOrErr.takeError();
506 D->exponent = *ExpOrErr;
507
508 /* Implied decimal point? */
509 if (dot == end)
510 dot = p;
511 }
512
513 /* If number is all zeroes accept any exponent. */
514 if (p != D->firstSigDigit) {
515 /* Drop insignificant trailing zeroes. */
516 if (p != begin) {
517 do
518 do
519 p--;
520 while (p != begin && *p == '0');
521 while (p != begin && *p == '.');
522 }
523
524 /* Adjust the exponents for any decimal point. */
525 D->exponent += static_cast<APFloat::ExponentType>((dot - p) - (dot > p));
526 D->normalizedExponent = (D->exponent +
527 static_cast<APFloat::ExponentType>((p - D->firstSigDigit)
528 - (dot > D->firstSigDigit && dot < p)));
529 }
530
531 D->lastSigDigit = p;
532 return Error::success();
533}
534
535/* Return the trailing fraction of a hexadecimal number.
536 DIGITVALUE is the first hex digit of the fraction, P points to
537 the next digit. */
540 unsigned int digitValue) {
541 /* If the first trailing digit isn't 0 or 8 we can work out the
542 fraction immediately. */
543 if (digitValue > 8)
544 return lfMoreThanHalf;
545 else if (digitValue < 8 && digitValue > 0)
546 return lfLessThanHalf;
547
548 // Otherwise we need to find the first non-zero digit.
549 while (p != end && (*p == '0' || *p == '.'))
550 p++;
551
552 if (p == end)
553 return createError("Invalid trailing hexadecimal fraction!");
554
555 unsigned hexDigit = hexDigitValue(*p);
556
557 /* If we ran off the end it is exactly zero or one-half, otherwise
558 a little more. */
559 if (hexDigit == UINT_MAX)
560 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
561 else
562 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
563}
564
565/* Return the fraction lost were a bignum truncated losing the least
566 significant BITS bits. */
567static lostFraction
569 unsigned int partCount,
570 unsigned int bits)
571{
572 unsigned lsb = APInt::tcLSB(parts, partCount);
573
574 /* Note this is guaranteed true if bits == 0, or LSB == UINT_MAX. */
575 if (bits <= lsb)
576 return lfExactlyZero;
577 if (bits == lsb + 1)
578 return lfExactlyHalf;
579 if (bits <= partCount * APFloatBase::integerPartWidth &&
580 APInt::tcExtractBit(parts, bits - 1))
581 return lfMoreThanHalf;
582
583 return lfLessThanHalf;
584}
585
586/* Shift DST right BITS bits noting lost fraction. */
587static lostFraction
588shiftRight(APFloatBase::integerPart *dst, unsigned int parts, unsigned int bits)
589{
590 lostFraction lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
591
592 APInt::tcShiftRight(dst, parts, bits);
593
594 return lost_fraction;
595}
596
597/* Combine the effect of two lost fractions. */
598static lostFraction
600 lostFraction lessSignificant)
601{
602 if (lessSignificant != lfExactlyZero) {
603 if (moreSignificant == lfExactlyZero)
604 moreSignificant = lfLessThanHalf;
605 else if (moreSignificant == lfExactlyHalf)
606 moreSignificant = lfMoreThanHalf;
607 }
608
609 return moreSignificant;
610}
611
612/* The error from the true value, in half-ulps, on multiplying two
613 floating point numbers, which differ from the value they
614 approximate by at most HUE1 and HUE2 half-ulps, is strictly less
615 than the returned value.
616
617 See "How to Read Floating Point Numbers Accurately" by William D
618 Clinger. */
619static unsigned int
620HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
621{
622 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
623
624 if (HUerr1 + HUerr2 == 0)
625 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */
626 else
627 return inexactMultiply + 2 * (HUerr1 + HUerr2);
628}
629
630/* The number of ulps from the boundary (zero, or half if ISNEAREST)
631 when the least significant BITS are truncated. BITS cannot be
632 zero. */
634ulpsFromBoundary(const APFloatBase::integerPart *parts, unsigned int bits,
635 bool isNearest) {
636 assert(bits != 0);
637
638 bits--;
639 unsigned count = bits / APFloatBase::integerPartWidth;
640 unsigned partBits = bits % APFloatBase::integerPartWidth + 1;
641
643 parts[count] & (~(APFloatBase::integerPart)0 >>
644 (APFloatBase::integerPartWidth - partBits));
645
647 if (isNearest)
648 boundary = (APFloatBase::integerPart) 1 << (partBits - 1);
649 else
650 boundary = 0;
651
652 if (count == 0) {
653 if (part - boundary <= boundary - part)
654 return part - boundary;
655 else
656 return boundary - part;
657 }
658
659 if (part == boundary) {
660 while (--count)
661 if (parts[count])
662 return ~(APFloatBase::integerPart) 0; /* A lot. */
663
664 return parts[0];
665 } else if (part == boundary - 1) {
666 while (--count)
667 if (~parts[count])
668 return ~(APFloatBase::integerPart) 0; /* A lot. */
669
670 return -parts[0];
671 }
672
673 return ~(APFloatBase::integerPart) 0; /* A lot. */
674}
675
676/* Place pow(5, power) in DST, and return the number of parts used.
677 DST must be at least one part larger than size of the answer. */
678static unsigned int
679powerOf5(APFloatBase::integerPart *dst, unsigned int power) {
680 static const APFloatBase::integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125, 15625, 78125 };
682 pow5s[0] = 78125 * 5;
683
684 unsigned int partsCount = 1;
685 APFloatBase::integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
686 assert(power <= maxExponent);
687
688 p1 = dst;
689 p2 = scratch;
690
691 *p1 = firstEightPowers[power & 7];
692 power >>= 3;
693
694 unsigned result = 1;
695 pow5 = pow5s;
696
697 for (unsigned int n = 0; power; power >>= 1, n++) {
698 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */
699 if (n != 0) {
700 APInt::tcFullMultiply(pow5, pow5 - partsCount, pow5 - partsCount,
701 partsCount, partsCount);
702 partsCount *= 2;
703 if (pow5[partsCount - 1] == 0)
704 partsCount--;
705 }
706
707 if (power & 1) {
709
710 APInt::tcFullMultiply(p2, p1, pow5, result, partsCount);
711 result += partsCount;
712 if (p2[result - 1] == 0)
713 result--;
714
715 /* Now result is in p1 with partsCount parts and p2 is scratch
716 space. */
717 tmp = p1;
718 p1 = p2;
719 p2 = tmp;
720 }
721
722 pow5 += partsCount;
723 }
724
725 if (p1 != dst)
726 APInt::tcAssign(dst, p1, result);
727
728 return result;
729}
730
731/* Zero at the end to avoid modular arithmetic when adding one; used
732 when rounding up during hexadecimal output. */
733static const char hexDigitsLower[] = "0123456789abcdef0";
734static const char hexDigitsUpper[] = "0123456789ABCDEF0";
735static const char infinityL[] = "infinity";
736static const char infinityU[] = "INFINITY";
737static const char NaNL[] = "nan";
738static const char NaNU[] = "NAN";
739
740/* Write out an integerPart in hexadecimal, starting with the most
741 significant nibble. Write out exactly COUNT hexdigits, return
742 COUNT. */
743static unsigned int
744partAsHex (char *dst, APFloatBase::integerPart part, unsigned int count,
745 const char *hexDigitChars)
746{
747 unsigned int result = count;
748
750
751 part >>= (APFloatBase::integerPartWidth - 4 * count);
752 while (count--) {
753 dst[count] = hexDigitChars[part & 0xf];
754 part >>= 4;
755 }
756
757 return result;
758}
759
760/* Write out an unsigned decimal integer. */
761static char *writeUnsignedDecimal(char *dst, unsigned int n) {
762 char buff[40], *p;
763
764 p = buff;
765 do
766 *p++ = '0' + n % 10;
767 while (n /= 10);
768
769 do
770 *dst++ = *--p;
771 while (p != buff);
772
773 return dst;
774}
775
776/* Write out a signed decimal integer. */
777static char *writeSignedDecimal(char *dst, int value) {
778 if (value < 0) {
779 *dst++ = '-';
780 dst = writeUnsignedDecimal(dst, -(unsigned) value);
781 } else {
782 dst = writeUnsignedDecimal(dst, value);
783 }
784
785 return dst;
786}
787
788// Compute the ULP of the input using a definition from:
789// Jean-Michel Muller. On the definition of ulp(x). [Research Report] RR-5504,
790// LIP RR-2005-09, INRIA, LIP. 2005, pp.16. inria-00070503
791static APFloat harrisonUlp(const APFloat &X) {
792 const fltSemantics &Sem = X.getSemantics();
793 switch (X.getCategory()) {
794 case APFloat::fcNaN:
795 return APFloat::getQNaN(Sem);
797 return APFloat::getInf(Sem);
798 case APFloat::fcZero:
799 return APFloat::getSmallest(Sem);
801 break;
802 }
803 if (X.isDenormal() || X.isSmallestNormalized())
804 return APFloat::getSmallest(Sem);
805 int Exp = ilogb(X);
806 if (X.getExactLog2() != INT_MIN)
807 Exp -= 1;
808 return scalbn(APFloat::getOne(Sem), Exp - (Sem.precision - 1),
810}
811
812namespace detail {
813/* Constructors. */
814void IEEEFloat::initialize(const fltSemantics *ourSemantics) {
815 semantics = ourSemantics;
816 unsigned count = partCount();
817 if (count > 1)
818 significand.parts = new integerPart[count];
819}
820
821void IEEEFloat::freeSignificand() {
822 if (needsCleanup())
823 delete [] significand.parts;
824}
825
826void IEEEFloat::assign(const IEEEFloat &rhs) {
827 assert(semantics == rhs.semantics);
828
829 sign = rhs.sign;
830 category = rhs.category;
831 exponent = rhs.exponent;
832 if (isFiniteNonZero() || category == fcNaN)
833 copySignificand(rhs);
834}
835
836void IEEEFloat::copySignificand(const IEEEFloat &rhs) {
837 assert(isFiniteNonZero() || category == fcNaN);
838 assert(rhs.partCount() >= partCount());
839
840 APInt::tcAssign(significandParts(), rhs.significandParts(),
841 partCount());
842}
843
844/* Make this number a NaN, with an arbitrary but deterministic value
845 for the significand. If double or longer, this is a signalling NaN,
846 which may not be ideal. If float, this is QNaN(0). */
847void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) {
848 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
849 llvm_unreachable("This floating point format does not support NaN");
850
851 if (Negative && !semantics->hasSignedRepr)
853 "This floating point format does not support signed values");
854
855 category = fcNaN;
856 sign = Negative;
857 exponent = exponentNaN();
858
859 integerPart *significand = significandParts();
860 unsigned numParts = partCount();
861
862 APInt fill_storage;
863 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
864 // Finite-only types do not distinguish signalling and quiet NaN, so
865 // make them all signalling.
866 SNaN = false;
867 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
868 sign = true;
869 fill_storage = APInt::getZero(semantics->precision - 1);
870 } else {
871 fill_storage = APInt::getAllOnes(semantics->precision - 1);
872 }
873 fill = &fill_storage;
874 }
875
876 // Set the significand bits to the fill.
877 if (!fill || fill->getNumWords() < numParts)
878 APInt::tcSet(significand, 0, numParts);
879 if (fill) {
880 APInt::tcAssign(significand, fill->getRawData(),
881 std::min(fill->getNumWords(), numParts));
882
883 // Zero out the excess bits of the significand.
884 unsigned bitsToPreserve = semantics->precision - 1;
885 unsigned part = bitsToPreserve / 64;
886 bitsToPreserve %= 64;
887 significand[part] &= ((1ULL << bitsToPreserve) - 1);
888 for (part++; part != numParts; ++part)
889 significand[part] = 0;
890 }
891
892 unsigned QNaNBit =
893 (semantics->precision >= 2) ? (semantics->precision - 2) : 0;
894
895 if (SNaN) {
896 // We always have to clear the QNaN bit to make it an SNaN.
897 APInt::tcClearBit(significand, QNaNBit);
898
899 // If there are no bits set in the payload, we have to set
900 // *something* to make it a NaN instead of an infinity;
901 // conventionally, this is the next bit down from the QNaN bit.
902 if (APInt::tcIsZero(significand, numParts))
903 APInt::tcSetBit(significand, QNaNBit - 1);
904 } else if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
905 // The only NaN is a quiet NaN, and it has no bits sets in the significand.
906 // Do nothing.
907 } else {
908 // We always have to set the QNaN bit to make it a QNaN.
909 APInt::tcSetBit(significand, QNaNBit);
910 }
911
912 // For x87 extended precision, we want to make a NaN, not a
913 // pseudo-NaN. Maybe we should expose the ability to make
914 // pseudo-NaNs?
915 if (semantics == &APFloatBase::semX87DoubleExtended)
916 APInt::tcSetBit(significand, QNaNBit + 1);
917}
918
920 if (this != &rhs) {
921 if (semantics != rhs.semantics) {
922 freeSignificand();
923 initialize(rhs.semantics);
924 }
925 assign(rhs);
926 }
927
928 return *this;
929}
930
932 freeSignificand();
933
934 semantics = rhs.semantics;
935 significand = rhs.significand;
936 exponent = rhs.exponent;
937 category = rhs.category;
938 sign = rhs.sign;
939
940 rhs.semantics = &APFloatBase::semBogus;
941 return *this;
942}
943
945 return isFiniteNonZero() && (exponent == semantics->minExponent) &&
946 (APInt::tcExtractBit(significandParts(),
947 semantics->precision - 1) == 0);
948}
949
951 // The smallest number by magnitude in our format will be the smallest
952 // denormal, i.e. the floating point number with exponent being minimum
953 // exponent and significand bitwise equal to 1 (i.e. with MSB equal to 0).
954 return isFiniteNonZero() && exponent == semantics->minExponent &&
955 significandMSB() == 0;
956}
957
959 return getCategory() == fcNormal && exponent == semantics->minExponent &&
960 isSignificandAllZerosExceptMSB();
961}
962
963unsigned int IEEEFloat::getNumHighBits() const {
964 const unsigned int PartCount = partCountForBits(semantics->precision);
965 const unsigned int Bits = PartCount * integerPartWidth;
966
967 // Compute how many bits are used in the final word.
968 // When precision is just 1, it represents the 'Pth'
969 // Precision bit and not the actual significand bit.
970 const unsigned int NumHighBits = (semantics->precision > 1)
971 ? (Bits - semantics->precision + 1)
972 : (Bits - semantics->precision);
973 return NumHighBits;
974}
975
976bool IEEEFloat::isSignificandAllOnes() const {
977 // Test if the significand excluding the integral bit is all ones. This allows
978 // us to test for binade boundaries.
979 const integerPart *Parts = significandParts();
980 const unsigned PartCount = partCountForBits(semantics->precision);
981 for (unsigned i = 0; i < PartCount - 1; i++)
982 if (~Parts[i])
983 return false;
984
985 // Set the unused high bits to all ones when we compare.
986 const unsigned NumHighBits = getNumHighBits();
987 assert(NumHighBits <= integerPartWidth && NumHighBits > 0 &&
988 "Can not have more high bits to fill than integerPartWidth");
989 const integerPart HighBitFill =
990 ~integerPart(0) << (integerPartWidth - NumHighBits);
991 if ((semantics->precision <= 1) || (~(Parts[PartCount - 1] | HighBitFill)))
992 return false;
993
994 return true;
995}
996
997bool IEEEFloat::isSignificandAllOnesExceptLSB() const {
998 // Test if the significand excluding the integral bit is all ones except for
999 // the least significant bit.
1000 const integerPart *Parts = significandParts();
1001
1002 if (Parts[0] & 1)
1003 return false;
1004
1005 const unsigned PartCount = partCountForBits(semantics->precision);
1006 for (unsigned i = 0; i < PartCount - 1; i++) {
1007 if (~Parts[i] & ~unsigned{!i})
1008 return false;
1009 }
1010
1011 // Set the unused high bits to all ones when we compare.
1012 const unsigned NumHighBits = getNumHighBits();
1013 assert(NumHighBits <= integerPartWidth && NumHighBits > 0 &&
1014 "Can not have more high bits to fill than integerPartWidth");
1015 const integerPart HighBitFill = ~integerPart(0)
1016 << (integerPartWidth - NumHighBits);
1017 if (~(Parts[PartCount - 1] | HighBitFill | 0x1))
1018 return false;
1019
1020 return true;
1021}
1022
1023bool IEEEFloat::isSignificandAllZeros() const {
1024 // Test if the significand excluding the integral bit is all zeros. This
1025 // allows us to test for binade boundaries.
1026 const integerPart *Parts = significandParts();
1027 const unsigned PartCount = partCountForBits(semantics->precision);
1028
1029 for (unsigned i = 0; i < PartCount - 1; i++)
1030 if (Parts[i])
1031 return false;
1032
1033 // Compute how many bits are used in the final word.
1034 const unsigned NumHighBits = getNumHighBits();
1035 assert(NumHighBits < integerPartWidth && "Can not have more high bits to "
1036 "clear than integerPartWidth");
1037 const integerPart HighBitMask = ~integerPart(0) >> NumHighBits;
1038
1039 if ((semantics->precision > 1) && (Parts[PartCount - 1] & HighBitMask))
1040 return false;
1041
1042 return true;
1043}
1044
1045bool IEEEFloat::isSignificandAllZerosExceptMSB() const {
1046 const integerPart *Parts = significandParts();
1047 const unsigned PartCount = partCountForBits(semantics->precision);
1048
1049 for (unsigned i = 0; i < PartCount - 1; i++) {
1050 if (Parts[i])
1051 return false;
1052 }
1053
1054 const unsigned NumHighBits = getNumHighBits();
1055 const integerPart MSBMask = integerPart(1)
1056 << (integerPartWidth - NumHighBits);
1057 return ((semantics->precision <= 1) || (Parts[PartCount - 1] == MSBMask));
1058}
1059
1061 bool IsMaxExp = isFiniteNonZero() && exponent == semantics->maxExponent;
1062 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1063 semantics->nanEncoding == fltNanEncoding::AllOnes) {
1064 // The largest number by magnitude in our format will be the floating point
1065 // number with maximum exponent and with significand that is all ones except
1066 // the LSB.
1067 return (IsMaxExp && APFloat::hasSignificand(*semantics))
1068 ? isSignificandAllOnesExceptLSB()
1069 : IsMaxExp;
1070 } else {
1071 // The largest number by magnitude in our format will be the floating point
1072 // number with maximum exponent and with significand that is all ones.
1073 return IsMaxExp && isSignificandAllOnes();
1074 }
1075}
1076
1078 // This could be made more efficient; I'm going for obviously correct.
1079 if (!isFinite()) return false;
1080 IEEEFloat truncated = *this;
1081 truncated.roundToIntegral(rmTowardZero);
1082 return compare(truncated) == cmpEqual;
1083}
1084
1085bool IEEEFloat::bitwiseIsEqual(const IEEEFloat &rhs) const {
1086 if (this == &rhs)
1087 return true;
1088 if (semantics != rhs.semantics ||
1089 category != rhs.category ||
1090 sign != rhs.sign)
1091 return false;
1092 if (category==fcZero || category==fcInfinity)
1093 return true;
1094
1095 if (isFiniteNonZero() && exponent != rhs.exponent)
1096 return false;
1097
1098 return std::equal(significandParts(), significandParts() + partCount(),
1099 rhs.significandParts());
1100}
1101
1103 initialize(&ourSemantics);
1104 sign = 0;
1105 category = fcNormal;
1106 zeroSignificand();
1107 exponent = ourSemantics.precision - 1;
1108 significandParts()[0] = value;
1110}
1111
1113 initialize(&ourSemantics);
1114 // The Float8E8MOFNU format does not have a representation
1115 // for zero. So, use the closest representation instead.
1116 // Moreover, the all-zero encoding represents a valid
1117 // normal value (which is the smallestNormalized here).
1118 // Hence, we call makeSmallestNormalized (where category is
1119 // 'fcNormal') instead of makeZero (where category is 'fcZero').
1120 ourSemantics.hasZero ? makeZero(false) : makeSmallestNormalized(false);
1121}
1122
1123// Delegate to the previous constructor, because later copy constructor may
1124// actually inspects category, which can't be garbage.
1126 : IEEEFloat(ourSemantics) {}
1127
1129 initialize(rhs.semantics);
1130 assign(rhs);
1131}
1132
1133IEEEFloat::IEEEFloat(IEEEFloat &&rhs) : semantics(&APFloatBase::semBogus) {
1134 *this = std::move(rhs);
1135}
1136
1137IEEEFloat::~IEEEFloat() { freeSignificand(); }
1138
1139unsigned int IEEEFloat::partCount() const {
1140 return partCountForBits(semantics->precision + 1);
1141}
1142
1143const APFloat::integerPart *IEEEFloat::significandParts() const {
1144 return const_cast<IEEEFloat *>(this)->significandParts();
1145}
1146
1147APFloat::integerPart *IEEEFloat::significandParts() {
1148 if (partCount() > 1)
1149 return significand.parts;
1150 else
1151 return &significand.part;
1152}
1153
1154void IEEEFloat::zeroSignificand() {
1155 APInt::tcSet(significandParts(), 0, partCount());
1156}
1157
1158/* Increment an fcNormal floating point number's significand. */
1159void IEEEFloat::incrementSignificand() {
1160 [[maybe_unused]] integerPart carry =
1161 APInt::tcIncrement(significandParts(), partCount());
1162
1163 /* Our callers should never cause us to overflow. */
1164 assert(carry == 0);
1165}
1166
1167/* Add the significand of the RHS. Returns the carry flag. */
1168APFloat::integerPart IEEEFloat::addSignificand(const IEEEFloat &rhs) {
1169 integerPart *parts = significandParts();
1170
1171 assert(semantics == rhs.semantics);
1172 assert(exponent == rhs.exponent);
1173
1174 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
1175}
1176
1177/* Subtract the significand of the RHS with a borrow flag. Returns
1178 the borrow flag. */
1179APFloat::integerPart IEEEFloat::subtractSignificand(const IEEEFloat &rhs,
1180 integerPart borrow) {
1181 integerPart *parts = significandParts();
1182
1183 assert(semantics == rhs.semantics);
1184 assert(exponent == rhs.exponent);
1185
1186 return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
1187 partCount());
1188}
1189
1190/* Multiply the significand of the RHS. If ADDEND is non-NULL, add it
1191 on to the full-precision result of the multiplication. Returns the
1192 lost fraction. */
1193lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs,
1194 IEEEFloat addend,
1195 bool ignoreAddend) {
1196 integerPart scratch[4];
1197 bool ignored;
1198
1199 assert(semantics == rhs.semantics);
1200
1201 unsigned precision = semantics->precision;
1202
1203 // Allocate space for twice as many bits as the original significand, plus one
1204 // extra bit for the addition to overflow into.
1205 unsigned newPartsCount = partCountForBits(precision * 2 + 1);
1206
1207 // FIXME: Replace with SmallVector<4>.
1208 integerPart *fullSignificand =
1209 newPartsCount > 4 ? new integerPart[newPartsCount] : scratch;
1210
1211 integerPart *lhsSignificand = significandParts();
1212 unsigned partsCount = partCount();
1213
1214 APInt::tcFullMultiply(fullSignificand, lhsSignificand,
1215 rhs.significandParts(), partsCount, partsCount);
1216
1217 lostFraction lost_fraction = lfExactlyZero;
1218 // One, not zero, based MSB.
1219 unsigned omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
1220 exponent += rhs.exponent;
1221
1222 // Assume the operands involved in the multiplication are single-precision
1223 // FP, and the two multiplicants are:
1224 // *this = a23 . a22 ... a0 * 2^e1
1225 // rhs = b23 . b22 ... b0 * 2^e2
1226 // the result of multiplication is:
1227 // *this = c48 c47 c46 . c45 ... c0 * 2^(e1+e2)
1228 // Note that there are three significant bits at the left-hand side of the
1229 // radix point: two for the multiplication, and an overflow bit for the
1230 // addition (that will always be zero at this point). Move the radix point
1231 // toward left by two bits, and adjust exponent accordingly.
1232 exponent += 2;
1233
1234 if (!ignoreAddend && addend.isNonZero()) {
1235 // The intermediate result of the multiplication has "2 * precision"
1236 // signicant bit; adjust the addend to be consistent with mul result.
1237 //
1238 Significand savedSignificand = significand;
1239 const fltSemantics *savedSemantics = semantics;
1240
1241 // Normalize our MSB to one below the top bit to allow for overflow.
1242 unsigned extendedPrecision = 2 * precision + 1;
1243 if (omsb != extendedPrecision - 1) {
1244 assert(extendedPrecision > omsb);
1245 APInt::tcShiftLeft(fullSignificand, newPartsCount,
1246 (extendedPrecision - 1) - omsb);
1247 exponent -= (extendedPrecision - 1) - omsb;
1248 }
1249
1250 /* Create new semantics. */
1251 fltSemantics extendedSemantics = *semantics;
1252 extendedSemantics.precision = extendedPrecision;
1253
1254 if (newPartsCount == 1)
1255 significand.part = fullSignificand[0];
1256 else
1257 significand.parts = fullSignificand;
1258 semantics = &extendedSemantics;
1259
1260 // Make a copy so we can convert it to the extended semantics.
1261 // Note that we cannot convert the addend directly, as the extendedSemantics
1262 // is a local variable (which we take a reference to).
1263 IEEEFloat extendedAddend(addend);
1264 [[maybe_unused]] opStatus status = extendedAddend.convert(
1265 extendedSemantics, APFloat::rmTowardZero, &ignored);
1266 assert(status == APFloat::opOK);
1267
1268 // Shift the significand of the addend right by one bit. This guarantees
1269 // that the high bit of the significand is zero (same as fullSignificand),
1270 // so the addition will overflow (if it does overflow at all) into the top bit.
1271 lost_fraction = extendedAddend.shiftSignificandRight(1);
1272 assert(lost_fraction == lfExactlyZero &&
1273 "Lost precision while shifting addend for fused-multiply-add.");
1274
1275 lost_fraction = addOrSubtractSignificand(extendedAddend, false);
1276
1277 /* Restore our state. */
1278 if (newPartsCount == 1)
1279 fullSignificand[0] = significand.part;
1280 significand = savedSignificand;
1281 semantics = savedSemantics;
1282
1283 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
1284 }
1285
1286 // Convert the result having "2 * precision" significant-bits back to the one
1287 // having "precision" significant-bits. First, move the radix point from
1288 // poision "2*precision - 1" to "precision - 1". The exponent need to be
1289 // adjusted by "2*precision - 1" - "precision - 1" = "precision".
1290 exponent -= precision + 1;
1291
1292 // In case MSB resides at the left-hand side of radix point, shift the
1293 // mantissa right by some amount to make sure the MSB reside right before
1294 // the radix point (i.e. "MSB . rest-significant-bits").
1295 //
1296 // Note that the result is not normalized when "omsb < precision". So, the
1297 // caller needs to call IEEEFloat::normalize() if normalized value is
1298 // expected.
1299 if (omsb > precision) {
1300 unsigned int bits, significantParts;
1301 lostFraction lf;
1302
1303 bits = omsb - precision;
1304 significantParts = partCountForBits(omsb);
1305 lf = shiftRight(fullSignificand, significantParts, bits);
1306 lost_fraction = combineLostFractions(lf, lost_fraction);
1307 exponent += bits;
1308 }
1309
1310 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
1311
1312 if (newPartsCount > 4)
1313 delete [] fullSignificand;
1314
1315 return lost_fraction;
1316}
1317
1318lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs) {
1319 // When the given semantics has zero, the addend here is a zero.
1320 // i.e . it belongs to the 'fcZero' category.
1321 // But when the semantics does not support zero, we need to
1322 // explicitly convey that this addend should be ignored
1323 // for multiplication.
1324 return multiplySignificand(rhs, IEEEFloat(*semantics), !semantics->hasZero);
1325}
1326
1327/* Multiply the significands of LHS and RHS to DST. */
1328lostFraction IEEEFloat::divideSignificand(const IEEEFloat &rhs) {
1329 integerPart scratch[4];
1330
1331 assert(semantics == rhs.semantics);
1332
1333 integerPart *lhsSignificand = significandParts();
1334 const integerPart *rhsSignificand = rhs.significandParts();
1335 unsigned partsCount = partCount();
1336
1337 integerPart *dividend =
1338 partsCount > 2 ? new integerPart[partsCount * 2] : scratch;
1339 integerPart *divisor = dividend + partsCount;
1340
1341 /* Copy the dividend and divisor as they will be modified in-place. */
1342 for (unsigned i = 0; i < partsCount; i++) {
1343 dividend[i] = lhsSignificand[i];
1344 divisor[i] = rhsSignificand[i];
1345 lhsSignificand[i] = 0;
1346 }
1347
1348 exponent -= rhs.exponent;
1349
1350 unsigned int precision = semantics->precision;
1351
1352 /* Normalize the divisor. */
1353 unsigned bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
1354 if (bit) {
1355 exponent += bit;
1356 APInt::tcShiftLeft(divisor, partsCount, bit);
1357 }
1358
1359 /* Normalize the dividend. */
1360 bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
1361 if (bit) {
1362 exponent -= bit;
1363 APInt::tcShiftLeft(dividend, partsCount, bit);
1364 }
1365
1366 /* Ensure the dividend >= divisor initially for the loop below.
1367 Incidentally, this means that the division loop below is
1368 guaranteed to set the integer bit to one. */
1369 if (APInt::tcCompare(dividend, divisor, partsCount) < 0) {
1370 exponent--;
1371 APInt::tcShiftLeft(dividend, partsCount, 1);
1372 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
1373 }
1374
1375 /* Long division. */
1376 for (bit = precision; bit; bit -= 1) {
1377 if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
1378 APInt::tcSubtract(dividend, divisor, 0, partsCount);
1379 APInt::tcSetBit(lhsSignificand, bit - 1);
1380 }
1381
1382 APInt::tcShiftLeft(dividend, partsCount, 1);
1383 }
1384
1385 /* Figure out the lost fraction. */
1386 int cmp = APInt::tcCompare(dividend, divisor, partsCount);
1387
1388 lostFraction lost_fraction;
1389 if (cmp > 0)
1390 lost_fraction = lfMoreThanHalf;
1391 else if (cmp == 0)
1392 lost_fraction = lfExactlyHalf;
1393 else if (APInt::tcIsZero(dividend, partsCount))
1394 lost_fraction = lfExactlyZero;
1395 else
1396 lost_fraction = lfLessThanHalf;
1397
1398 if (partsCount > 2)
1399 delete [] dividend;
1400
1401 return lost_fraction;
1402}
1403
1404unsigned int IEEEFloat::significandMSB() const {
1405 return APInt::tcMSB(significandParts(), partCount());
1406}
1407
1408unsigned int IEEEFloat::significandLSB() const {
1409 return APInt::tcLSB(significandParts(), partCount());
1410}
1411
1412/* Note that a zero result is NOT normalized to fcZero. */
1413lostFraction IEEEFloat::shiftSignificandRight(unsigned int bits) {
1414 /* Our exponent should not overflow. */
1415 assert((ExponentType) (exponent + bits) >= exponent);
1416
1417 exponent += bits;
1418
1419 return shiftRight(significandParts(), partCount(), bits);
1420}
1421
1422/* Shift the significand left BITS bits, subtract BITS from its exponent. */
1423void IEEEFloat::shiftSignificandLeft(unsigned int bits) {
1424 assert(bits < semantics->precision ||
1425 (semantics->precision == 1 && bits <= 1));
1426
1427 if (bits) {
1428 unsigned int partsCount = partCount();
1429
1430 APInt::tcShiftLeft(significandParts(), partsCount, bits);
1431 exponent -= bits;
1432
1433 assert(!APInt::tcIsZero(significandParts(), partsCount));
1434 }
1435}
1436
1438 assert(semantics == rhs.semantics);
1440 assert(rhs.isFiniteNonZero());
1441
1442 int compare = exponent - rhs.exponent;
1443
1444 /* If exponents are equal, do an unsigned bignum comparison of the
1445 significands. */
1446 if (compare == 0)
1447 compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
1448 partCount());
1449
1450 if (compare > 0)
1451 return cmpGreaterThan;
1452 else if (compare < 0)
1453 return cmpLessThan;
1454 else
1455 return cmpEqual;
1456}
1457
1458/* Set the least significant BITS bits of a bignum, clear the
1459 rest. */
1460static void tcSetLeastSignificantBits(APInt::WordType *dst, unsigned parts,
1461 unsigned bits) {
1462 unsigned i = 0;
1463 while (bits > APInt::APINT_BITS_PER_WORD) {
1464 dst[i++] = ~(APInt::WordType)0;
1466 }
1467
1468 if (bits)
1469 dst[i++] = ~(APInt::WordType)0 >> (APInt::APINT_BITS_PER_WORD - bits);
1470
1471 while (i < parts)
1472 dst[i++] = 0;
1473}
1474
1475/* Handle overflow. Sign is preserved. We either become infinity or
1476 the largest finite number. */
1477APFloat::opStatus IEEEFloat::handleOverflow(roundingMode rounding_mode) {
1479 /* Infinity? */
1480 if (rounding_mode == rmNearestTiesToEven ||
1481 rounding_mode == rmNearestTiesToAway ||
1482 (rounding_mode == rmTowardPositive && !sign) ||
1483 (rounding_mode == rmTowardNegative && sign)) {
1485 makeNaN(false, sign);
1486 else
1487 category = fcInfinity;
1488 return static_cast<opStatus>(opOverflow | opInexact);
1489 }
1490 }
1491
1492 /* Otherwise we become the largest finite number. */
1493 category = fcNormal;
1494 exponent = semantics->maxExponent;
1495 tcSetLeastSignificantBits(significandParts(), partCount(),
1496 semantics->precision);
1497 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1498 semantics->nanEncoding == fltNanEncoding::AllOnes)
1499 APInt::tcClearBit(significandParts(), 0);
1500
1501 return opInexact;
1502}
1503
1504/* Returns TRUE if, when truncating the current number, with BIT the
1505 new LSB, with the given lost fraction and rounding mode, the result
1506 would need to be rounded away from zero (i.e., by increasing the
1507 signficand). This routine must work for fcZero of both signs, and
1508 fcNormal numbers. */
1509bool IEEEFloat::roundAwayFromZero(roundingMode rounding_mode,
1510 lostFraction lost_fraction,
1511 unsigned int bit) const {
1512 /* NaNs and infinities should not have lost fractions. */
1513 assert(isFiniteNonZero() || category == fcZero);
1514
1515 /* Current callers never pass this so we don't handle it. */
1516 assert(lost_fraction != lfExactlyZero);
1517
1518 switch (rounding_mode) {
1520 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
1521
1523 if (lost_fraction == lfMoreThanHalf)
1524 return true;
1525
1526 /* Our zeroes don't have a significand to test. */
1527 if (lost_fraction == lfExactlyHalf && category != fcZero)
1528 return APInt::tcExtractBit(significandParts(), bit);
1529
1530 return false;
1531
1532 case rmTowardZero:
1533 return false;
1534
1535 case rmTowardPositive:
1536 return !sign;
1537
1538 case rmTowardNegative:
1539 return sign;
1540
1541 default:
1542 break;
1543 }
1544 llvm_unreachable("Invalid rounding mode found");
1545}
1546
1547APFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode,
1548 lostFraction lost_fraction) {
1549 if (!isFiniteNonZero())
1550 return opOK;
1551
1552 /* Before rounding normalize the exponent of fcNormal numbers. */
1553 /* One, not zero, based MSB. */
1554 unsigned omsb = significandMSB() + 1;
1555
1556 // Only skip this `if` if the value is exactly zero.
1557 if (omsb || lost_fraction != lfExactlyZero) {
1558 /* OMSB is numbered from 1. We want to place it in the integer
1559 bit numbered PRECISION if possible, with a compensating change in
1560 the exponent. */
1561 int exponentChange = omsb - semantics->precision;
1562
1563 /* If the resulting exponent is too high, overflow according to
1564 the rounding mode. */
1565 if (exponent + exponentChange > semantics->maxExponent)
1566 return handleOverflow(rounding_mode);
1567
1568 /* Subnormal numbers have exponent minExponent, and their MSB
1569 is forced based on that. */
1570 if (exponent + exponentChange < semantics->minExponent)
1571 exponentChange = semantics->minExponent - exponent;
1572
1573 /* Shifting left is easy as we don't lose precision. */
1574 if (exponentChange < 0) {
1575 assert(lost_fraction == lfExactlyZero);
1576
1577 shiftSignificandLeft(-exponentChange);
1578
1579 return opOK;
1580 }
1581
1582 if (exponentChange > 0) {
1583 lostFraction lf;
1584
1585 /* Shift right and capture any new lost fraction. */
1586 lf = shiftSignificandRight(exponentChange);
1587
1588 lost_fraction = combineLostFractions(lf, lost_fraction);
1589
1590 /* Keep OMSB up-to-date. */
1591 if (omsb > (unsigned) exponentChange)
1592 omsb -= exponentChange;
1593 else
1594 omsb = 0;
1595 }
1596 }
1597
1598 // The all-ones values is an overflow if NaN is all ones. If NaN is
1599 // represented by negative zero, then it is a valid finite value.
1600 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1601 semantics->nanEncoding == fltNanEncoding::AllOnes &&
1602 exponent == semantics->maxExponent && isSignificandAllOnes())
1603 return handleOverflow(rounding_mode);
1604
1605 /* Now round the number according to rounding_mode given the lost
1606 fraction. */
1607
1608 /* As specified in IEEE 754, since we do not trap we do not report
1609 underflow for exact results. */
1610 if (lost_fraction == lfExactlyZero) {
1611 /* Canonicalize zeroes. */
1612 if (omsb == 0) {
1613 category = fcZero;
1614 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
1615 sign = false;
1616 if (!semantics->hasZero)
1618 }
1619
1620 return opOK;
1621 }
1622
1623 /* Increment the significand if we're rounding away from zero. */
1624 if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
1625 if (omsb == 0)
1626 exponent = semantics->minExponent;
1627
1628 incrementSignificand();
1629 omsb = significandMSB() + 1;
1630
1631 /* Did the significand increment overflow? */
1632 if (omsb == (unsigned) semantics->precision + 1) {
1633 /* Renormalize by incrementing the exponent and shifting our
1634 significand right one. However if we already have the
1635 maximum exponent we overflow to infinity. */
1636 if (exponent == semantics->maxExponent)
1637 // Invoke overflow handling with a rounding mode that will guarantee
1638 // that the result gets turned into the correct infinity representation.
1639 // This is needed instead of just setting the category to infinity to
1640 // account for 8-bit floating point types that have no inf, only NaN.
1641 return handleOverflow(sign ? rmTowardNegative : rmTowardPositive);
1642
1643 shiftSignificandRight(1);
1644
1645 return opInexact;
1646 }
1647
1648 // The all-ones values is an overflow if NaN is all ones. If NaN is
1649 // represented by negative zero, then it is a valid finite value.
1650 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1651 semantics->nanEncoding == fltNanEncoding::AllOnes &&
1652 exponent == semantics->maxExponent && isSignificandAllOnes())
1653 return handleOverflow(rounding_mode);
1654 }
1655
1656 /* The normal case - we were and are not denormal, and any
1657 significand increment above didn't overflow. */
1658 if (omsb == semantics->precision)
1659 return opInexact;
1660
1661 /* We have a non-zero denormal. */
1662 assert(omsb < semantics->precision);
1663
1664 /* Canonicalize zeroes. */
1665 if (omsb == 0) {
1666 category = fcZero;
1667 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
1668 sign = false;
1669 // This condition handles the case where the semantics
1670 // does not have zero but uses the all-zero encoding
1671 // to represent the smallest normal value.
1672 if (!semantics->hasZero)
1674 }
1675
1676 /* The fcZero case is a denormal that underflowed to zero. */
1677 return (opStatus) (opUnderflow | opInexact);
1678}
1679
1680APFloat::opStatus IEEEFloat::addOrSubtractSpecials(const IEEEFloat &rhs,
1681 bool subtract) {
1682 switch (PackCategoriesIntoKey(category, rhs.category)) {
1683 default:
1684 llvm_unreachable(nullptr);
1685
1689 assign(rhs);
1690 [[fallthrough]];
1695 if (isSignaling()) {
1696 makeQuiet();
1697 return opInvalidOp;
1698 }
1699 return rhs.isSignaling() ? opInvalidOp : opOK;
1700
1704 return opOK;
1705
1708 category = fcInfinity;
1709 sign = rhs.sign ^ subtract;
1710 return opOK;
1711
1713 assign(rhs);
1714 sign = rhs.sign ^ subtract;
1715 return opOK;
1716
1718 /* Sign depends on rounding mode; handled by caller. */
1719 return opOK;
1720
1722 /* Differently signed infinities can only be validly
1723 subtracted. */
1724 if (((sign ^ rhs.sign)!=0) != subtract) {
1725 makeNaN();
1726 return opInvalidOp;
1727 }
1728
1729 return opOK;
1730
1732 return opDivByZero;
1733 }
1734}
1735
1736/* Add or subtract two normal numbers. */
1737lostFraction IEEEFloat::addOrSubtractSignificand(const IEEEFloat &rhs,
1738 bool subtract) {
1739 [[maybe_unused]] integerPart carry = 0;
1740 lostFraction lost_fraction;
1741
1742 /* Determine if the operation on the absolute values is effectively
1743 an addition or subtraction. */
1744 subtract ^= static_cast<bool>(sign ^ rhs.sign);
1745
1746 /* Are we bigger exponent-wise than the RHS? */
1747 int bits = exponent - rhs.exponent;
1748
1749 /* Subtraction is more subtle than one might naively expect. */
1750 if (subtract) {
1751 if ((bits < 0) && !semantics->hasSignedRepr)
1753 "This floating point format does not support signed values");
1754
1755 IEEEFloat temp_rhs(rhs);
1756 bool lost_fraction_is_from_rhs = false;
1757
1758 if (bits == 0)
1759 lost_fraction = lfExactlyZero;
1760 else if (bits > 0) {
1761 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
1762 lost_fraction_is_from_rhs = true;
1763 shiftSignificandLeft(1);
1764 } else {
1765 lost_fraction = shiftSignificandRight(-bits - 1);
1766 temp_rhs.shiftSignificandLeft(1);
1767 }
1768
1769 // Should we reverse the subtraction.
1770 cmpResult cmp_result = compareAbsoluteValue(temp_rhs);
1771 if (cmp_result == cmpLessThan) {
1772 bool borrow =
1773 lost_fraction != lfExactlyZero && !lost_fraction_is_from_rhs;
1774 if (borrow) {
1775 // The lost fraction is being subtracted, borrow from the significand
1776 // and invert `lost_fraction`.
1777 if (lost_fraction == lfLessThanHalf)
1778 lost_fraction = lfMoreThanHalf;
1779 else if (lost_fraction == lfMoreThanHalf)
1780 lost_fraction = lfLessThanHalf;
1781 }
1782 carry = temp_rhs.subtractSignificand(*this, borrow);
1783 copySignificand(temp_rhs);
1784 sign = !sign;
1785 } else if (cmp_result == cmpGreaterThan) {
1786 bool borrow = lost_fraction != lfExactlyZero && lost_fraction_is_from_rhs;
1787 if (borrow) {
1788 // The lost fraction is being subtracted, borrow from the significand
1789 // and invert `lost_fraction`.
1790 if (lost_fraction == lfLessThanHalf)
1791 lost_fraction = lfMoreThanHalf;
1792 else if (lost_fraction == lfMoreThanHalf)
1793 lost_fraction = lfLessThanHalf;
1794 }
1795 carry = subtractSignificand(temp_rhs, borrow);
1796 } else { // cmpEqual
1797 zeroSignificand();
1798 if (lost_fraction != lfExactlyZero && lost_fraction_is_from_rhs) {
1799 // rhs is slightly larger due to the lost fraction, flip the sign.
1800 sign = !sign;
1801 }
1802 }
1803
1804 /* The code above is intended to ensure that no borrow is
1805 necessary. */
1806 assert(!carry);
1807 } else {
1808 if (bits > 0) {
1809 IEEEFloat temp_rhs(rhs);
1810
1811 lost_fraction = temp_rhs.shiftSignificandRight(bits);
1812 carry = addSignificand(temp_rhs);
1813 } else {
1814 lost_fraction = shiftSignificandRight(-bits);
1815 carry = addSignificand(rhs);
1816 }
1817
1818 /* We have a guard bit; generating a carry cannot happen. */
1819 assert(!carry);
1820 }
1821
1822 return lost_fraction;
1823}
1824
1825APFloat::opStatus IEEEFloat::multiplySpecials(const IEEEFloat &rhs) {
1826 switch (PackCategoriesIntoKey(category, rhs.category)) {
1827 default:
1828 llvm_unreachable(nullptr);
1829
1833 assign(rhs);
1834 sign = false;
1835 [[fallthrough]];
1840 sign ^= rhs.sign; // restore the original sign
1841 if (isSignaling()) {
1842 makeQuiet();
1843 return opInvalidOp;
1844 }
1845 return rhs.isSignaling() ? opInvalidOp : opOK;
1846
1850 category = fcInfinity;
1851 return opOK;
1852
1856 category = fcZero;
1857 return opOK;
1858
1861 makeNaN();
1862 return opInvalidOp;
1863
1865 return opOK;
1866 }
1867}
1868
1869APFloat::opStatus IEEEFloat::divideSpecials(const IEEEFloat &rhs) {
1870 switch (PackCategoriesIntoKey(category, rhs.category)) {
1871 default:
1872 llvm_unreachable(nullptr);
1873
1877 assign(rhs);
1878 sign = false;
1879 [[fallthrough]];
1884 sign ^= rhs.sign; // restore the original sign
1885 if (isSignaling()) {
1886 makeQuiet();
1887 return opInvalidOp;
1888 }
1889 return rhs.isSignaling() ? opInvalidOp : opOK;
1890
1895 return opOK;
1896
1898 category = fcZero;
1899 return opOK;
1900
1902 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly)
1903 makeNaN(false, sign);
1904 else
1905 category = fcInfinity;
1906 return opDivByZero;
1907
1910 makeNaN();
1911 return opInvalidOp;
1912
1914 return opOK;
1915 }
1916}
1917
1918APFloat::opStatus IEEEFloat::modSpecials(const IEEEFloat &rhs) {
1919 switch (PackCategoriesIntoKey(category, rhs.category)) {
1920 default:
1921 llvm_unreachable(nullptr);
1922
1926 assign(rhs);
1927 [[fallthrough]];
1932 if (isSignaling()) {
1933 makeQuiet();
1934 return opInvalidOp;
1935 }
1936 return rhs.isSignaling() ? opInvalidOp : opOK;
1937
1941 return opOK;
1942
1948 makeNaN();
1949 return opInvalidOp;
1950
1952 return opOK;
1953 }
1954}
1955
1956APFloat::opStatus IEEEFloat::remainderSpecials(const IEEEFloat &rhs) {
1957 switch (PackCategoriesIntoKey(category, rhs.category)) {
1958 default:
1959 llvm_unreachable(nullptr);
1960
1964 assign(rhs);
1965 [[fallthrough]];
1970 if (isSignaling()) {
1971 makeQuiet();
1972 return opInvalidOp;
1973 }
1974 return rhs.isSignaling() ? opInvalidOp : opOK;
1975
1979 return opOK;
1980
1986 makeNaN();
1987 return opInvalidOp;
1988
1990 return opDivByZero; // fake status, indicating this is not a special case
1991 }
1992}
1993
1994/* Change sign. */
1996 // With NaN-as-negative-zero, neither NaN or negative zero can change
1997 // their signs.
1998 if (semantics->nanEncoding == fltNanEncoding::NegativeZero &&
1999 (isZero() || isNaN()))
2000 return;
2001 /* Look mummy, this one's easy. */
2002 sign = !sign;
2003}
2004
2005/* Normalized addition or subtraction. */
2006APFloat::opStatus IEEEFloat::addOrSubtract(const IEEEFloat &rhs,
2007 roundingMode rounding_mode,
2008 bool subtract) {
2009 opStatus fs = addOrSubtractSpecials(rhs, subtract);
2010
2011 /* This return code means it was not a simple case. */
2012 if (fs == opDivByZero) {
2013 lostFraction lost_fraction;
2014
2015 lost_fraction = addOrSubtractSignificand(rhs, subtract);
2016 fs = normalize(rounding_mode, lost_fraction);
2017
2018 /* Can only be zero if we lost no fraction. */
2019 assert(category != fcZero || lost_fraction == lfExactlyZero);
2020 }
2021
2022 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
2023 positive zero unless rounding to minus infinity, except that
2024 adding two like-signed zeroes gives that zero. */
2025 if (category == fcZero) {
2026 if (rhs.category != fcZero || (sign == rhs.sign) == subtract)
2027 sign = (rounding_mode == rmTowardNegative);
2028 // NaN-in-negative-zero means zeros need to be normalized to +0.
2029 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2030 sign = false;
2031 }
2032
2033 return fs;
2034}
2035
2036/* Normalized addition. */
2038 roundingMode rounding_mode) {
2039 return addOrSubtract(rhs, rounding_mode, false);
2040}
2041
2042/* Normalized subtraction. */
2044 roundingMode rounding_mode) {
2045 return addOrSubtract(rhs, rounding_mode, true);
2046}
2047
2048/* Normalized multiply. */
2050 roundingMode rounding_mode) {
2051 sign ^= rhs.sign;
2052 opStatus fs = multiplySpecials(rhs);
2053
2054 if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero)
2055 sign = false;
2056 if (isFiniteNonZero()) {
2057 lostFraction lost_fraction = multiplySignificand(rhs);
2058 fs = normalize(rounding_mode, lost_fraction);
2059 if (lost_fraction != lfExactlyZero)
2060 fs = (opStatus) (fs | opInexact);
2061 }
2062
2063 return fs;
2064}
2065
2066/* Normalized divide. */
2068 roundingMode rounding_mode) {
2069 sign ^= rhs.sign;
2070 opStatus fs = divideSpecials(rhs);
2071
2072 if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero)
2073 sign = false;
2074 if (isFiniteNonZero()) {
2075 lostFraction lost_fraction = divideSignificand(rhs);
2076 fs = normalize(rounding_mode, lost_fraction);
2077 if (lost_fraction != lfExactlyZero)
2078 fs = (opStatus) (fs | opInexact);
2079 }
2080
2081 return fs;
2082}
2083
2084/* Normalized remainder. */
2086 unsigned int origSign = sign;
2087
2088 // First handle the special cases.
2089 opStatus fs = remainderSpecials(rhs);
2090 if (fs != opDivByZero)
2091 return fs;
2092
2093 fs = opOK;
2094
2095 // Make sure the current value is less than twice the denom. If the addition
2096 // did not succeed (an overflow has happened), which means that the finite
2097 // value we currently posses must be less than twice the denom (as we are
2098 // using the same semantics).
2099 IEEEFloat P2 = rhs;
2100 if (P2.add(rhs, rmNearestTiesToEven) == opOK) {
2101 fs = mod(P2);
2102 assert(fs == opOK);
2103 }
2104
2105 // Lets work with absolute numbers.
2106 IEEEFloat P = rhs;
2107 P.sign = false;
2108 sign = false;
2109
2110 //
2111 // To calculate the remainder we use the following scheme.
2112 //
2113 // The remainder is defained as follows:
2114 //
2115 // remainder = numer - rquot * denom = x - r * p
2116 //
2117 // Where r is the result of: x/p, rounded toward the nearest integral value
2118 // (with halfway cases rounded toward the even number).
2119 //
2120 // Currently, (after x mod 2p):
2121 // r is the number of 2p's present inside x, which is inherently, an even
2122 // number of p's.
2123 //
2124 // We may split the remaining calculation into 4 options:
2125 // - if x < 0.5p then we round to the nearest number with is 0, and are done.
2126 // - if x == 0.5p then we round to the nearest even number which is 0, and we
2127 // are done as well.
2128 // - if 0.5p < x < p then we round to nearest number which is 1, and we have
2129 // to subtract 1p at least once.
2130 // - if x >= p then we must subtract p at least once, as x must be a
2131 // remainder.
2132 //
2133 // By now, we were done, or we added 1 to r, which in turn, now an odd number.
2134 //
2135 // We can now split the remaining calculation to the following 3 options:
2136 // - if x < 0.5p then we round to the nearest number with is 0, and are done.
2137 // - if x == 0.5p then we round to the nearest even number. As r is odd, we
2138 // must round up to the next even number. so we must subtract p once more.
2139 // - if x > 0.5p (and inherently x < p) then we must round r up to the next
2140 // integral, and subtract p once more.
2141 //
2142
2143 // Extend the semantics to prevent an overflow/underflow or inexact result.
2144 bool losesInfo;
2145 fltSemantics extendedSemantics = *semantics;
2146 extendedSemantics.maxExponent++;
2147 extendedSemantics.minExponent--;
2148 extendedSemantics.precision += 2;
2149
2150 IEEEFloat VEx = *this;
2151 fs = VEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2152 assert(fs == opOK && !losesInfo);
2153 IEEEFloat PEx = P;
2154 fs = PEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2155 assert(fs == opOK && !losesInfo);
2156
2157 // It is simpler to work with 2x instead of 0.5p, and we do not need to lose
2158 // any fraction.
2159 fs = VEx.add(VEx, rmNearestTiesToEven);
2160 assert(fs == opOK);
2161
2162 if (VEx.compare(PEx) == cmpGreaterThan) {
2164 assert(fs == opOK);
2165
2166 // Make VEx = this.add(this), but because we have different semantics, we do
2167 // not want to `convert` again, so we just subtract PEx twice (which equals
2168 // to the desired value).
2169 fs = VEx.subtract(PEx, rmNearestTiesToEven);
2170 assert(fs == opOK);
2171 fs = VEx.subtract(PEx, rmNearestTiesToEven);
2172 assert(fs == opOK);
2173
2174 cmpResult result = VEx.compare(PEx);
2175 if (result == cmpGreaterThan || result == cmpEqual) {
2177 assert(fs == opOK);
2178 }
2179 }
2180
2181 if (isZero()) {
2182 sign = origSign; // IEEE754 requires this
2183 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2184 // But some 8-bit floats only have positive 0.
2185 sign = false;
2186 } else {
2187 sign ^= origSign;
2188 }
2189 return fs;
2190}
2191
2192/* Normalized llvm frem (C fmod). */
2194 opStatus fs = modSpecials(rhs);
2195 unsigned int origSign = sign;
2196
2197 while (isFiniteNonZero() && rhs.isFiniteNonZero() &&
2199 int Exp = ilogb(*this) - ilogb(rhs);
2200 IEEEFloat V = scalbn(rhs, Exp, rmNearestTiesToEven);
2201 // V can overflow to NaN with fltNonfiniteBehavior::NanOnly, so explicitly
2202 // check for it.
2203 if (V.isNaN() || compareAbsoluteValue(V) == cmpLessThan)
2204 V = scalbn(rhs, Exp - 1, rmNearestTiesToEven);
2205 V.sign = sign;
2206
2208
2209 // When the semantics supports zero, this loop's
2210 // exit-condition is handled by the 'isFiniteNonZero'
2211 // category check above. However, when the semantics
2212 // does not have 'fcZero' and we have reached the
2213 // minimum possible value, (and any further subtract
2214 // will underflow to the same value) explicitly
2215 // provide an exit-path here.
2216 if (!semantics->hasZero && this->isSmallest())
2217 break;
2218
2219 assert(fs==opOK);
2220 }
2221 if (isZero()) {
2222 sign = origSign; // fmod requires this
2223 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2224 sign = false;
2225 }
2226 return fs;
2227}
2228
2229/* Normalized fused-multiply-add. */
2231 const IEEEFloat &addend,
2232 roundingMode rounding_mode) {
2233 opStatus fs;
2234
2235 /* Post-multiplication sign, before addition. */
2236 sign ^= multiplicand.sign;
2237
2238 /* If and only if all arguments are normal do we need to do an
2239 extended-precision calculation. */
2240 if (isFiniteNonZero() &&
2241 multiplicand.isFiniteNonZero() &&
2242 addend.isFinite()) {
2243 lostFraction lost_fraction;
2244
2245 lost_fraction = multiplySignificand(multiplicand, addend);
2246 fs = normalize(rounding_mode, lost_fraction);
2247 if (lost_fraction != lfExactlyZero)
2248 fs = (opStatus) (fs | opInexact);
2249
2250 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
2251 positive zero unless rounding to minus infinity, except that
2252 adding two like-signed zeroes gives that zero. */
2253 if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign) {
2254 sign = (rounding_mode == rmTowardNegative);
2255 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2256 sign = false;
2257 }
2258 } else {
2259 fs = multiplySpecials(multiplicand);
2260
2261 /* FS can only be opOK or opInvalidOp. There is no more work
2262 to do in the latter case. The IEEE-754R standard says it is
2263 implementation-defined in this case whether, if ADDEND is a
2264 quiet NaN, we raise invalid op; this implementation does so.
2265
2266 If we need to do the addition we can do so with normal
2267 precision. */
2268 if (fs == opOK)
2269 fs = addOrSubtract(addend, rounding_mode, false);
2270 }
2271
2272 return fs;
2273}
2274
2275/* Rounding-mode correct round to integral value. */
2277 if (isInfinity())
2278 // [IEEE Std 754-2008 6.1]:
2279 // The behavior of infinity in floating-point arithmetic is derived from the
2280 // limiting cases of real arithmetic with operands of arbitrarily
2281 // large magnitude, when such a limit exists.
2282 // ...
2283 // Operations on infinite operands are usually exact and therefore signal no
2284 // exceptions ...
2285 return opOK;
2286
2287 if (isNaN()) {
2288 if (isSignaling()) {
2289 // [IEEE Std 754-2008 6.2]:
2290 // Under default exception handling, any operation signaling an invalid
2291 // operation exception and for which a floating-point result is to be
2292 // delivered shall deliver a quiet NaN.
2293 makeQuiet();
2294 // [IEEE Std 754-2008 6.2]:
2295 // Signaling NaNs shall be reserved operands that, under default exception
2296 // handling, signal the invalid operation exception(see 7.2) for every
2297 // general-computational and signaling-computational operation except for
2298 // the conversions described in 5.12.
2299 return opInvalidOp;
2300 } else {
2301 // [IEEE Std 754-2008 6.2]:
2302 // For an operation with quiet NaN inputs, other than maximum and minimum
2303 // operations, if a floating-point result is to be delivered the result
2304 // shall be a quiet NaN which should be one of the input NaNs.
2305 // ...
2306 // Every general-computational and quiet-computational operation involving
2307 // one or more input NaNs, none of them signaling, shall signal no
2308 // exception, except fusedMultiplyAdd might signal the invalid operation
2309 // exception(see 7.2).
2310 return opOK;
2311 }
2312 }
2313
2314 if (isZero()) {
2315 // [IEEE Std 754-2008 6.3]:
2316 // ... the sign of the result of conversions, the quantize operation, the
2317 // roundToIntegral operations, and the roundToIntegralExact(see 5.3.1) is
2318 // the sign of the first or only operand.
2319 return opOK;
2320 }
2321
2322 // If the exponent is large enough, we know that this value is already
2323 // integral, and the arithmetic below would potentially cause it to saturate
2324 // to +/-Inf. Bail out early instead.
2325 if (exponent + 1 >= (int)APFloat::semanticsPrecision(*semantics))
2326 return opOK;
2327
2328 // The algorithm here is quite simple: we add 2^(p-1), where p is the
2329 // precision of our format, and then subtract it back off again. The choice
2330 // of rounding modes for the addition/subtraction determines the rounding mode
2331 // for our integral rounding as well.
2332 // NOTE: When the input value is negative, we do subtraction followed by
2333 // addition instead.
2334 APInt IntegerConstant(NextPowerOf2(APFloat::semanticsPrecision(*semantics)),
2335 1);
2336 IntegerConstant <<= APFloat::semanticsPrecision(*semantics) - 1;
2337 IEEEFloat MagicConstant(*semantics);
2338 opStatus fs = MagicConstant.convertFromAPInt(IntegerConstant, false,
2340 assert(fs == opOK);
2341 MagicConstant.sign = sign;
2342
2343 // Preserve the input sign so that we can handle the case of zero result
2344 // correctly.
2345 bool inputSign = isNegative();
2346
2347 fs = add(MagicConstant, rounding_mode);
2348
2349 // Current value and 'MagicConstant' are both integers, so the result of the
2350 // subtraction is always exact according to Sterbenz' lemma.
2351 subtract(MagicConstant, rounding_mode);
2352
2353 // Restore the input sign.
2354 if (inputSign != isNegative())
2355 changeSign();
2356
2357 return fs;
2358}
2359
2360/* Comparison requires normalized numbers. */
2362 assert(semantics == rhs.semantics);
2363
2364 switch (PackCategoriesIntoKey(category, rhs.category)) {
2365 default:
2366 llvm_unreachable(nullptr);
2367
2375 return cmpUnordered;
2376
2380 if (sign)
2381 return cmpLessThan;
2382 else
2383 return cmpGreaterThan;
2384
2388 if (rhs.sign)
2389 return cmpGreaterThan;
2390 else
2391 return cmpLessThan;
2392
2394 if (sign == rhs.sign)
2395 return cmpEqual;
2396 else if (sign)
2397 return cmpLessThan;
2398 else
2399 return cmpGreaterThan;
2400
2402 return cmpEqual;
2403
2405 break;
2406 }
2407
2408 cmpResult result;
2409 /* Two normal numbers. Do they have the same sign? */
2410 if (sign != rhs.sign) {
2411 if (sign)
2412 result = cmpLessThan;
2413 else
2414 result = cmpGreaterThan;
2415 } else {
2416 /* Compare absolute values; invert result if negative. */
2417 result = compareAbsoluteValue(rhs);
2418
2419 if (sign) {
2420 if (result == cmpLessThan)
2421 result = cmpGreaterThan;
2422 else if (result == cmpGreaterThan)
2423 result = cmpLessThan;
2424 }
2425 }
2426
2427 return result;
2428}
2429
2430/// IEEEFloat::convert - convert a value of one floating point type to another.
2431/// The return value corresponds to the IEEE754 exceptions. *losesInfo
2432/// records whether the transformation lost information, i.e. whether
2433/// converting the result back to the original type will produce the
2434/// original value (this is almost the same as return value==fsOK, but there
2435/// are edge cases where this is not so).
2436
2438 roundingMode rounding_mode,
2439 bool *losesInfo) {
2440 opStatus fs;
2441 const fltSemantics &fromSemantics = *semantics;
2442 bool is_signaling = isSignaling();
2443
2445 unsigned newPartCount = partCountForBits(toSemantics.precision + 1);
2446 unsigned oldPartCount = partCount();
2447 int shift = toSemantics.precision - fromSemantics.precision;
2448
2449 bool X86SpecialNan = false;
2450 if (&fromSemantics == &APFloatBase::semX87DoubleExtended &&
2451 &toSemantics != &APFloatBase::semX87DoubleExtended && category == fcNaN &&
2452 (!(*significandParts() & 0x8000000000000000ULL) ||
2453 !(*significandParts() & 0x4000000000000000ULL))) {
2454 // x86 has some unusual NaNs which cannot be represented in any other
2455 // format; note them here.
2456 X86SpecialNan = true;
2457 }
2458
2459 // If this is a truncation of a denormal number, and the target semantics
2460 // has larger exponent range than the source semantics (this can happen
2461 // when truncating from PowerPC double-double to double format), the
2462 // right shift could lose result mantissa bits. Adjust exponent instead
2463 // of performing excessive shift.
2464 // Also do a similar trick in case shifting denormal would produce zero
2465 // significand as this case isn't handled correctly by normalize.
2466 if (shift < 0 && isFiniteNonZero()) {
2467 int omsb = significandMSB() + 1;
2468 int exponentChange = omsb - fromSemantics.precision;
2469 if (exponent + exponentChange < toSemantics.minExponent)
2470 exponentChange = toSemantics.minExponent - exponent;
2471 exponentChange = std::max(exponentChange, shift);
2472 if (exponentChange < 0) {
2473 shift -= exponentChange;
2474 exponent += exponentChange;
2475 } else if (omsb <= -shift) {
2476 exponentChange = omsb + shift - 1; // leave at least one bit set
2477 shift -= exponentChange;
2478 exponent += exponentChange;
2479 }
2480 }
2481
2482 // If this is a truncation, perform the shift before we narrow the storage.
2483 if (shift < 0 && (isFiniteNonZero() ||
2484 (category == fcNaN && semantics->nonFiniteBehavior !=
2486 lostFraction = shiftRight(significandParts(), oldPartCount, -shift);
2487
2488 // Fix the storage so it can hold to new value.
2489 if (newPartCount > oldPartCount) {
2490 // The new type requires more storage; make it available.
2491 integerPart *newParts;
2492 newParts = new integerPart[newPartCount];
2493 APInt::tcSet(newParts, 0, newPartCount);
2494 if (isFiniteNonZero() || category==fcNaN)
2495 APInt::tcAssign(newParts, significandParts(), oldPartCount);
2496 freeSignificand();
2497 significand.parts = newParts;
2498 } else if (newPartCount == 1 && oldPartCount != 1) {
2499 // Switch to built-in storage for a single part.
2500 integerPart newPart = 0;
2501 if (isFiniteNonZero() || category==fcNaN)
2502 newPart = significandParts()[0];
2503 freeSignificand();
2504 significand.part = newPart;
2505 }
2506
2507 // Now that we have the right storage, switch the semantics.
2508 semantics = &toSemantics;
2509
2510 // If this is an extension, perform the shift now that the storage is
2511 // available.
2512 if (shift > 0 && (isFiniteNonZero() || category==fcNaN))
2513 APInt::tcShiftLeft(significandParts(), newPartCount, shift);
2514
2515 if (isFiniteNonZero()) {
2516 fs = normalize(rounding_mode, lostFraction);
2517 *losesInfo = (fs != opOK);
2518 } else if (category == fcNaN) {
2519 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
2520 *losesInfo =
2522 makeNaN(false, sign);
2523 return is_signaling ? opInvalidOp : opOK;
2524 }
2525
2526 // If NaN is negative zero, we need to create a new NaN to avoid converting
2527 // NaN to -Inf.
2528 if (fromSemantics.nanEncoding == fltNanEncoding::NegativeZero &&
2529 semantics->nanEncoding != fltNanEncoding::NegativeZero)
2530 makeNaN(false, false);
2531
2532 *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan;
2533
2534 // For x87 extended precision, we want to make a NaN, not a special NaN if
2535 // the input wasn't special either.
2536 if (!X86SpecialNan && semantics == &APFloatBase::semX87DoubleExtended)
2537 APInt::tcSetBit(significandParts(), semantics->precision - 1);
2538
2539 // Convert of sNaN creates qNaN and raises an exception (invalid op).
2540 // This also guarantees that a sNaN does not become Inf on a truncation
2541 // that loses all payload bits.
2542 if (is_signaling) {
2543 makeQuiet();
2544 fs = opInvalidOp;
2545 } else {
2546 fs = opOK;
2547 }
2548 } else if (category == fcInfinity &&
2549 semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
2550 makeNaN(false, sign);
2551 *losesInfo = true;
2552 fs = opInexact;
2553 } else if (category == fcZero &&
2554 semantics->nanEncoding == fltNanEncoding::NegativeZero) {
2555 // Negative zero loses info, but positive zero doesn't.
2556 *losesInfo =
2557 fromSemantics.nanEncoding != fltNanEncoding::NegativeZero && sign;
2558 fs = *losesInfo ? opInexact : opOK;
2559 // NaN is negative zero means -0 -> +0, which can lose information
2560 sign = false;
2561 } else {
2562 *losesInfo = false;
2563 fs = opOK;
2564 }
2565
2566 if (category == fcZero && !semantics->hasZero)
2568 return fs;
2569}
2570
2571/* Convert a floating point number to an integer according to the
2572 rounding mode. If the rounded integer value is out of range this
2573 returns an invalid operation exception and the contents of the
2574 destination parts are unspecified. If the rounded value is in
2575 range but the floating point number is not the exact integer, the C
2576 standard doesn't require an inexact exception to be raised. IEEE
2577 854 does require it so we do that.
2578
2579 Note that for conversions to integer type the C standard requires
2580 round-to-zero to always be used. */
2581APFloat::opStatus IEEEFloat::convertToSignExtendedInteger(
2582 MutableArrayRef<integerPart> parts, unsigned int width, bool isSigned,
2583 roundingMode rounding_mode, bool *isExact) const {
2584 *isExact = false;
2585
2586 /* Handle the three special cases first. */
2587 if (category == fcInfinity || category == fcNaN)
2588 return opInvalidOp;
2589
2590 unsigned dstPartsCount = partCountForBits(width);
2591 assert(dstPartsCount <= parts.size() && "Integer too big");
2592
2593 if (category == fcZero) {
2594 APInt::tcSet(parts.data(), 0, dstPartsCount);
2595 // Negative zero can't be represented as an int.
2596 *isExact = !sign;
2597 return opOK;
2598 }
2599
2600 const integerPart *src = significandParts();
2601
2602 unsigned truncatedBits;
2603 /* Step 1: place our absolute value, with any fraction truncated, in
2604 the destination. */
2605 if (exponent < 0) {
2606 /* Our absolute value is less than one; truncate everything. */
2607 APInt::tcSet(parts.data(), 0, dstPartsCount);
2608 /* For exponent -1 the integer bit represents .5, look at that.
2609 For smaller exponents leftmost truncated bit is 0. */
2610 truncatedBits = semantics->precision -1U - exponent;
2611 } else {
2612 /* We want the most significant (exponent + 1) bits; the rest are
2613 truncated. */
2614 unsigned int bits = exponent + 1U;
2615
2616 /* Hopelessly large in magnitude? */
2617 if (bits > width)
2618 return opInvalidOp;
2619
2620 if (bits < semantics->precision) {
2621 /* We truncate (semantics->precision - bits) bits. */
2622 truncatedBits = semantics->precision - bits;
2623 APInt::tcExtract(parts.data(), dstPartsCount, src, bits, truncatedBits);
2624 } else {
2625 /* We want at least as many bits as are available. */
2626 APInt::tcExtract(parts.data(), dstPartsCount, src, semantics->precision,
2627 0);
2628 APInt::tcShiftLeft(parts.data(), dstPartsCount,
2629 bits - semantics->precision);
2630 truncatedBits = 0;
2631 }
2632 }
2633
2634 /* Step 2: work out any lost fraction, and increment the absolute
2635 value if we would round away from zero. */
2636 lostFraction lost_fraction;
2637 if (truncatedBits) {
2638 lost_fraction = lostFractionThroughTruncation(src, partCount(),
2639 truncatedBits);
2640 if (lost_fraction != lfExactlyZero &&
2641 roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) {
2642 if (APInt::tcIncrement(parts.data(), dstPartsCount))
2643 return opInvalidOp; /* Overflow. */
2644 }
2645 } else {
2646 lost_fraction = lfExactlyZero;
2647 }
2648
2649 /* Step 3: check if we fit in the destination. */
2650 unsigned int omsb = APInt::tcMSB(parts.data(), dstPartsCount) + 1;
2651
2652 if (sign) {
2653 if (!isSigned) {
2654 /* Negative numbers cannot be represented as unsigned. */
2655 if (omsb != 0)
2656 return opInvalidOp;
2657 } else {
2658 /* It takes omsb bits to represent the unsigned integer value.
2659 We lose a bit for the sign, but care is needed as the
2660 maximally negative integer is a special case. */
2661 if (omsb == width &&
2662 APInt::tcLSB(parts.data(), dstPartsCount) + 1 != omsb)
2663 return opInvalidOp;
2664
2665 /* This case can happen because of rounding. */
2666 if (omsb > width)
2667 return opInvalidOp;
2668 }
2669
2670 APInt::tcNegate (parts.data(), dstPartsCount);
2671 } else {
2672 if (omsb >= width + !isSigned)
2673 return opInvalidOp;
2674 }
2675
2676 if (lost_fraction == lfExactlyZero) {
2677 *isExact = true;
2678 return opOK;
2679 }
2680 return opInexact;
2681}
2682
2683/* Same as convertToSignExtendedInteger, except we provide
2684 deterministic values in case of an invalid operation exception,
2685 namely zero for NaNs and the minimal or maximal value respectively
2686 for underflow or overflow.
2687 The *isExact output tells whether the result is exact, in the sense
2688 that converting it back to the original floating point type produces
2689 the original value. This is almost equivalent to result==opOK,
2690 except for negative zeroes.
2691*/
2694 unsigned int width, bool isSigned,
2695 roundingMode rounding_mode, bool *isExact) const {
2696 opStatus fs = convertToSignExtendedInteger(parts, width, isSigned,
2697 rounding_mode, isExact);
2698
2699 if (fs == opInvalidOp) {
2700 unsigned int bits, dstPartsCount;
2701
2702 dstPartsCount = partCountForBits(width);
2703 assert(dstPartsCount <= parts.size() && "Integer too big");
2704
2705 if (category == fcNaN)
2706 bits = 0;
2707 else if (sign)
2708 bits = isSigned;
2709 else
2710 bits = width - isSigned;
2711
2712 tcSetLeastSignificantBits(parts.data(), dstPartsCount, bits);
2713 if (sign && isSigned)
2714 APInt::tcShiftLeft(parts.data(), dstPartsCount, width - 1);
2715 }
2716
2717 return fs;
2718}
2719
2720/* Convert an unsigned integer SRC to a floating point number,
2721 rounding according to ROUNDING_MODE. The sign of the floating
2722 point number is not modified. */
2723APFloat::opStatus IEEEFloat::convertFromUnsignedParts(
2724 const integerPart *src, unsigned int srcCount, roundingMode rounding_mode) {
2725 category = fcNormal;
2726 unsigned omsb = APInt::tcMSB(src, srcCount) + 1;
2727 integerPart *dst = significandParts();
2728 unsigned dstCount = partCount();
2729 unsigned precision = semantics->precision;
2730
2731 /* We want the most significant PRECISION bits of SRC. There may not
2732 be that many; extract what we can. */
2733 lostFraction lost_fraction;
2734 if (precision <= omsb) {
2735 exponent = omsb - 1;
2736 lost_fraction = lostFractionThroughTruncation(src, srcCount,
2737 omsb - precision);
2738 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);
2739 } else {
2740 exponent = precision - 1;
2741 lost_fraction = lfExactlyZero;
2742 APInt::tcExtract(dst, dstCount, src, omsb, 0);
2743 }
2744
2745 return normalize(rounding_mode, lost_fraction);
2746}
2747
2749 roundingMode rounding_mode) {
2750 unsigned int partCount = Val.getNumWords();
2751 APInt api = Val;
2752
2753 sign = false;
2754 if (isSigned && api.isNegative()) {
2755 sign = true;
2756 api = -api;
2757 }
2758
2759 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2760}
2761
2763IEEEFloat::convertFromHexadecimalString(StringRef s,
2764 roundingMode rounding_mode) {
2765 lostFraction lost_fraction = lfExactlyZero;
2766
2767 category = fcNormal;
2768 zeroSignificand();
2769 exponent = 0;
2770
2771 integerPart *significand = significandParts();
2772 unsigned partsCount = partCount();
2773 unsigned bitPos = partsCount * integerPartWidth;
2774 bool computedTrailingFraction = false;
2775
2776 // Skip leading zeroes and any (hexa)decimal point.
2777 StringRef::iterator begin = s.begin();
2778 StringRef::iterator end = s.end();
2780 auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot);
2781 if (!PtrOrErr)
2782 return PtrOrErr.takeError();
2783 StringRef::iterator p = *PtrOrErr;
2784 StringRef::iterator firstSignificantDigit = p;
2785
2786 while (p != end) {
2787 integerPart hex_value;
2788
2789 if (*p == '.') {
2790 if (dot != end)
2791 return createError("String contains multiple dots");
2792 dot = p++;
2793 continue;
2794 }
2795
2796 hex_value = hexDigitValue(*p);
2797 if (hex_value == UINT_MAX)
2798 break;
2799
2800 p++;
2801
2802 // Store the number while we have space.
2803 if (bitPos) {
2804 bitPos -= 4;
2805 hex_value <<= bitPos % integerPartWidth;
2806 significand[bitPos / integerPartWidth] |= hex_value;
2807 } else if (!computedTrailingFraction) {
2808 auto FractOrErr = trailingHexadecimalFraction(p, end, hex_value);
2809 if (!FractOrErr)
2810 return FractOrErr.takeError();
2811 lost_fraction = *FractOrErr;
2812 computedTrailingFraction = true;
2813 }
2814 }
2815
2816 /* Hex floats require an exponent but not a hexadecimal point. */
2817 if (p == end)
2818 return createError("Hex strings require an exponent");
2819 if (*p != 'p' && *p != 'P')
2820 return createError("Invalid character in significand");
2821 if (p == begin)
2822 return createError("Significand has no digits");
2823 if (dot != end && p - begin == 1)
2824 return createError("Significand has no digits");
2825
2826 /* Ignore the exponent if we are zero. */
2827 if (p != firstSignificantDigit) {
2828 int expAdjustment;
2829
2830 /* Implicit hexadecimal point? */
2831 if (dot == end)
2832 dot = p;
2833
2834 /* Calculate the exponent adjustment implicit in the number of
2835 significant digits. */
2836 expAdjustment = static_cast<int>(dot - firstSignificantDigit);
2837 if (expAdjustment < 0)
2838 expAdjustment++;
2839 expAdjustment = expAdjustment * 4 - 1;
2840
2841 /* Adjust for writing the significand starting at the most
2842 significant nibble. */
2843 expAdjustment += semantics->precision;
2844 expAdjustment -= partsCount * integerPartWidth;
2845
2846 /* Adjust for the given exponent. */
2847 auto ExpOrErr = totalExponent(p + 1, end, expAdjustment);
2848 if (!ExpOrErr)
2849 return ExpOrErr.takeError();
2850 exponent = *ExpOrErr;
2851 }
2852
2853 return normalize(rounding_mode, lost_fraction);
2854}
2855
2857IEEEFloat::roundSignificandWithExponent(const integerPart *decSigParts,
2858 unsigned sigPartCount, int exp,
2859 roundingMode rounding_mode) {
2860 fltSemantics calcSemantics = { 32767, -32767, 0, 0 };
2862
2863 bool isNearest = rounding_mode == rmNearestTiesToEven ||
2864 rounding_mode == rmNearestTiesToAway;
2865
2866 unsigned parts = partCountForBits(semantics->precision + 11);
2867
2868 /* Calculate pow(5, abs(exp)). */
2869 unsigned pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp : -exp);
2870
2871 for (;; parts *= 2) {
2872 unsigned int excessPrecision, truncatedBits;
2873
2874 calcSemantics.precision = parts * integerPartWidth - 1;
2875 excessPrecision = calcSemantics.precision - semantics->precision;
2876 truncatedBits = excessPrecision;
2877
2878 IEEEFloat decSig(calcSemantics, uninitialized);
2879 decSig.makeZero(sign);
2880 IEEEFloat pow5(calcSemantics);
2881
2882 opStatus sigStatus = decSig.convertFromUnsignedParts(
2883 decSigParts, sigPartCount, rmNearestTiesToEven);
2884 opStatus powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
2886 /* Add exp, as 10^n = 5^n * 2^n. */
2887 decSig.exponent += exp;
2888
2889 lostFraction calcLostFraction;
2890 integerPart HUerr, HUdistance;
2891 unsigned int powHUerr;
2892
2893 if (exp >= 0) {
2894 /* multiplySignificand leaves the precision-th bit set to 1. */
2895 calcLostFraction = decSig.multiplySignificand(pow5);
2896 powHUerr = powStatus != opOK;
2897 } else {
2898 calcLostFraction = decSig.divideSignificand(pow5);
2899 /* Denormal numbers have less precision. */
2900 if (decSig.exponent < semantics->minExponent) {
2901 excessPrecision += (semantics->minExponent - decSig.exponent);
2902 truncatedBits = excessPrecision;
2903 excessPrecision = std::min(excessPrecision, calcSemantics.precision);
2904 }
2905 /* Extra half-ulp lost in reciprocal of exponent. */
2906 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2;
2907 }
2908
2909 /* Both multiplySignificand and divideSignificand return the
2910 result with the integer bit set. */
2912 (decSig.significandParts(), calcSemantics.precision - 1) == 1);
2913
2914 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
2915 powHUerr);
2916 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
2917 excessPrecision, isNearest);
2918
2919 /* Are we guaranteed to round correctly if we truncate? */
2920 if (HUdistance >= HUerr) {
2921 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
2922 calcSemantics.precision - excessPrecision,
2923 excessPrecision);
2924 /* Take the exponent of decSig. If we tcExtract-ed less bits
2925 above we must adjust our exponent to compensate for the
2926 implicit right shift. */
2927 exponent = (decSig.exponent + semantics->precision
2928 - (calcSemantics.precision - excessPrecision));
2929 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
2930 decSig.partCount(),
2931 truncatedBits);
2932 return static_cast<opStatus>(normalize(rounding_mode, calcLostFraction) |
2933 ((sigStatus | powStatus) & opInexact));
2934 }
2935 }
2936}
2937
2938Expected<APFloat::opStatus>
2939IEEEFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) {
2940 decimalInfo D;
2941 opStatus fs;
2942
2943 /* Scan the text. */
2944 StringRef::iterator p = str.begin();
2945 if (Error Err = interpretDecimal(p, str.end(), &D))
2946 return std::move(Err);
2947
2948 /* Handle the quick cases. First the case of no significant digits,
2949 i.e. zero, and then exponents that are obviously too large or too
2950 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp
2951 definitely overflows if
2952
2953 (exp - 1) * L >= maxExponent
2954
2955 and definitely underflows to zero where
2956
2957 (exp + 1) * L <= minExponent - precision
2958
2959 With integer arithmetic the tightest bounds for L are
2960
2961 93/28 < L < 196/59 [ numerator <= 256 ]
2962 42039/12655 < L < 28738/8651 [ numerator <= 65536 ]
2963 */
2964
2965 // Test if we have a zero number allowing for strings with no null terminators
2966 // and zero decimals with non-zero exponents.
2967 //
2968 // We computed firstSigDigit by ignoring all zeros and dots. Thus if
2969 // D->firstSigDigit equals str.end(), every digit must be a zero and there can
2970 // be at most one dot. On the other hand, if we have a zero with a non-zero
2971 // exponent, then we know that D.firstSigDigit will be non-numeric.
2972 if (D.firstSigDigit == str.end() || decDigitValue(*D.firstSigDigit) >= 10U) {
2973 category = fcZero;
2974 fs = opOK;
2975 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2976 sign = false;
2977 if (!semantics->hasZero)
2979
2980 /* Check whether the normalized exponent is high enough to overflow
2981 max during the log-rebasing in the max-exponent check below. */
2982 } else if (D.normalizedExponent - 1 > INT_MAX / 42039) {
2983 fs = handleOverflow(rounding_mode);
2984
2985 /* If it wasn't, then it also wasn't high enough to overflow max
2986 during the log-rebasing in the min-exponent check. Check that it
2987 won't overflow min in either check, then perform the min-exponent
2988 check. */
2989 } else if (D.normalizedExponent - 1 < INT_MIN / 42039 ||
2990 (D.normalizedExponent + 1) * 28738 <=
2991 8651 * (semantics->minExponent - (int) semantics->precision)) {
2992 /* Underflow to zero and round. */
2993 category = fcNormal;
2994 zeroSignificand();
2995 fs = normalize(rounding_mode, lfLessThanHalf);
2996
2997 /* We can finally safely perform the max-exponent check. */
2998 } else if ((D.normalizedExponent - 1) * 42039
2999 >= 12655 * semantics->maxExponent) {
3000 /* Overflow and round. */
3001 fs = handleOverflow(rounding_mode);
3002 } else {
3003 integerPart *decSignificand;
3004 unsigned int partCount;
3005
3006 /* A tight upper bound on number of bits required to hold an
3007 N-digit decimal integer is N * 196 / 59. Allocate enough space
3008 to hold the full significand, and an extra part required by
3009 tcMultiplyPart. */
3010 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1;
3011 partCount = partCountForBits(1 + 196 * partCount / 59);
3012 decSignificand = new integerPart[partCount + 1];
3013 partCount = 0;
3014
3015 /* Convert to binary efficiently - we do almost all multiplication
3016 in an integerPart. When this would overflow do we do a single
3017 bignum multiplication, and then revert again to multiplication
3018 in an integerPart. */
3019 do {
3020 integerPart decValue, val, multiplier;
3021
3022 val = 0;
3023 multiplier = 1;
3024
3025 do {
3026 if (*p == '.') {
3027 p++;
3028 if (p == str.end()) {
3029 break;
3030 }
3031 }
3032 decValue = decDigitValue(*p++);
3033 if (decValue >= 10U) {
3034 delete[] decSignificand;
3035 return createError("Invalid character in significand");
3036 }
3037 multiplier *= 10;
3038 val = val * 10 + decValue;
3039 /* The maximum number that can be multiplied by ten with any
3040 digit added without overflowing an integerPart. */
3041 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);
3042
3043 /* Multiply out the current part. */
3044 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
3045 partCount, partCount + 1, false);
3046
3047 /* If we used another part (likely but not guaranteed), increase
3048 the count. */
3049 if (decSignificand[partCount])
3050 partCount++;
3051 } while (p <= D.lastSigDigit);
3052
3053 category = fcNormal;
3054 fs = roundSignificandWithExponent(decSignificand, partCount,
3055 D.exponent, rounding_mode);
3056
3057 delete [] decSignificand;
3058 }
3059
3060 return fs;
3061}
3062
3063bool IEEEFloat::convertFromStringSpecials(StringRef str) {
3064 const size_t MIN_NAME_SIZE = 3;
3065
3066 if (str.size() < MIN_NAME_SIZE)
3067 return false;
3068
3069 if (str == "inf" || str == "INFINITY" || str == "+Inf" || str == "+inf") {
3070 makeInf(false);
3071 return true;
3072 }
3073
3074 bool IsNegative = str.consume_front("-");
3075 if (IsNegative) {
3076 if (str.size() < MIN_NAME_SIZE)
3077 return false;
3078
3079 if (str == "inf" || str == "INFINITY" || str == "Inf") {
3080 makeInf(true);
3081 return true;
3082 }
3083 }
3084
3085 // If we have a 's' (or 'S') prefix, then this is a Signaling NaN.
3086 bool IsSignaling = str.consume_front_insensitive("s");
3087 if (IsSignaling) {
3088 if (str.size() < MIN_NAME_SIZE)
3089 return false;
3090 }
3091
3092 if (str.consume_front("nan") || str.consume_front("NaN")) {
3093 // A NaN without payload.
3094 if (str.empty()) {
3095 makeNaN(IsSignaling, IsNegative);
3096 return true;
3097 }
3098
3099 // Allow the payload to be inside parentheses.
3100 if (str.front() == '(') {
3101 // Parentheses should be balanced (and not empty).
3102 if (str.size() <= 2 || str.back() != ')')
3103 return false;
3104
3105 str = str.slice(1, str.size() - 1);
3106 }
3107
3108 // Determine the payload number's radix.
3109 unsigned Radix = 10;
3110 if (str[0] == '0') {
3111 if (str.size() > 1 && tolower(str[1]) == 'x') {
3112 str = str.drop_front(2);
3113 Radix = 16;
3114 } else {
3115 Radix = 8;
3116 }
3117 }
3118
3119 // Parse the payload and make the NaN.
3120 APInt Payload;
3121 if (!str.getAsInteger(Radix, Payload)) {
3122 makeNaN(IsSignaling, IsNegative, &Payload);
3123 return true;
3124 }
3125 }
3126
3127 return false;
3128}
3129
3130Expected<APFloat::opStatus>
3132 if (str.empty())
3133 return createError("Invalid string length");
3134
3135 // Handle special cases.
3136 if (convertFromStringSpecials(str))
3137 return opOK;
3138
3139 /* Handle a leading minus sign. */
3140 StringRef::iterator p = str.begin();
3141 size_t slen = str.size();
3142 sign = *p == '-' ? 1 : 0;
3143 if (sign && !semantics->hasSignedRepr)
3145 "This floating point format does not support signed values");
3146
3147 if (*p == '-' || *p == '+') {
3148 p++;
3149 slen--;
3150 if (!slen)
3151 return createError("String has no digits");
3152 }
3153
3154 if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
3155 if (slen == 2)
3156 return createError("Invalid string");
3157 return convertFromHexadecimalString(StringRef(p + 2, slen - 2),
3158 rounding_mode);
3159 }
3160
3161 return convertFromDecimalString(StringRef(p, slen), rounding_mode);
3162}
3163
3164/* Write out a hexadecimal representation of the floating point value
3165 to DST, which must be of sufficient size, in the C99 form
3166 [-]0xh.hhhhp[+-]d. Return the number of characters written,
3167 excluding the terminating NUL.
3168
3169 If UPPERCASE, the output is in upper case, otherwise in lower case.
3170
3171 HEXDIGITS digits appear altogether, rounding the value if
3172 necessary. If HEXDIGITS is 0, the minimal precision to display the
3173 number precisely is used instead. If nothing would appear after
3174 the decimal point it is suppressed.
3175
3176 The decimal exponent is always printed and has at least one digit.
3177 Zero values display an exponent of zero. Infinities and NaNs
3178 appear as "infinity" or "nan" respectively.
3179
3180 The above rules are as specified by C99. There is ambiguity about
3181 what the leading hexadecimal digit should be. This implementation
3182 uses whatever is necessary so that the exponent is displayed as
3183 stored. This implies the exponent will fall within the IEEE format
3184 range, and the leading hexadecimal digit will be 0 (for denormals),
3185 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
3186 any other digits zero).
3187*/
3188unsigned int IEEEFloat::convertToHexString(char *dst, unsigned int hexDigits,
3189 bool upperCase,
3190 roundingMode rounding_mode) const {
3191 char *p = dst;
3192 if (sign)
3193 *dst++ = '-';
3194
3195 switch (category) {
3196 case fcInfinity:
3197 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);
3198 dst += sizeof infinityL - 1;
3199 break;
3200
3201 case fcNaN:
3202 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);
3203 dst += sizeof NaNU - 1;
3204 break;
3205
3206 case fcZero:
3207 *dst++ = '0';
3208 *dst++ = upperCase ? 'X': 'x';
3209 *dst++ = '0';
3210 if (hexDigits > 1) {
3211 *dst++ = '.';
3212 memset (dst, '0', hexDigits - 1);
3213 dst += hexDigits - 1;
3214 }
3215 *dst++ = upperCase ? 'P': 'p';
3216 *dst++ = '0';
3217 break;
3218
3219 case fcNormal:
3220 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
3221 break;
3222 }
3223
3224 *dst = 0;
3225
3226 return static_cast<unsigned int>(dst - p);
3227}
3228
3229/* Does the hard work of outputting the correctly rounded hexadecimal
3230 form of a normal floating point number with the specified number of
3231 hexadecimal digits. If HEXDIGITS is zero the minimum number of
3232 digits necessary to print the value precisely is output. */
3233char *IEEEFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
3234 bool upperCase,
3235 roundingMode rounding_mode) const {
3236 *dst++ = '0';
3237 *dst++ = upperCase ? 'X': 'x';
3238
3239 bool roundUp = false;
3240 const char *hexDigitChars = upperCase ? hexDigitsUpper : hexDigitsLower;
3241
3242 const integerPart *significand = significandParts();
3243 unsigned partsCount = partCount();
3244
3245 /* +3 because the first digit only uses the single integer bit, so
3246 we have 3 virtual zero most-significant-bits. */
3247 unsigned valueBits = semantics->precision + 3;
3248 unsigned shift = integerPartWidth - valueBits % integerPartWidth;
3249
3250 /* The natural number of digits required ignoring trailing
3251 insignificant zeroes. */
3252 unsigned outputDigits = (valueBits - significandLSB() + 3) / 4;
3253
3254 /* hexDigits of zero means use the required number for the
3255 precision. Otherwise, see if we are truncating. If we are,
3256 find out if we need to round away from zero. */
3257 if (hexDigits) {
3258 if (hexDigits < outputDigits) {
3259 /* We are dropping non-zero bits, so need to check how to round.
3260 "bits" is the number of dropped bits. */
3261 unsigned int bits;
3262 lostFraction fraction;
3263
3264 bits = valueBits - hexDigits * 4;
3265 fraction = lostFractionThroughTruncation (significand, partsCount, bits);
3266 roundUp = roundAwayFromZero(rounding_mode, fraction, bits);
3267 }
3268 outputDigits = hexDigits;
3269 }
3270
3271 /* Write the digits consecutively, and start writing in the location
3272 of the hexadecimal point. We move the most significant digit
3273 left and add the hexadecimal point later. */
3274 char *p = ++dst;
3275
3276 unsigned count = (valueBits + integerPartWidth - 1) / integerPartWidth;
3277
3278 while (outputDigits && count) {
3279 integerPart part;
3280
3281 /* Put the most significant integerPartWidth bits in "part". */
3282 if (--count == partsCount)
3283 part = 0; /* An imaginary higher zero part. */
3284 else
3285 part = significand[count] << shift;
3286
3287 if (count && shift)
3288 part |= significand[count - 1] >> (integerPartWidth - shift);
3289
3290 /* Convert as much of "part" to hexdigits as we can. */
3291 unsigned int curDigits = integerPartWidth / 4;
3292
3293 curDigits = std::min(curDigits, outputDigits);
3294 dst += partAsHex (dst, part, curDigits, hexDigitChars);
3295 outputDigits -= curDigits;
3296 }
3297
3298 if (roundUp) {
3299 char *q = dst;
3300
3301 /* Note that hexDigitChars has a trailing '0'. */
3302 do {
3303 q--;
3304 *q = hexDigitChars[hexDigitValue (*q) + 1];
3305 } while (*q == '0');
3306 assert(q >= p);
3307 } else {
3308 /* Add trailing zeroes. */
3309 memset (dst, '0', outputDigits);
3310 dst += outputDigits;
3311 }
3312
3313 /* Move the most significant digit to before the point, and if there
3314 is something after the decimal point add it. This must come
3315 after rounding above. */
3316 p[-1] = p[0];
3317 if (dst -1 == p)
3318 dst--;
3319 else
3320 p[0] = '.';
3321
3322 /* Finally output the exponent. */
3323 *dst++ = upperCase ? 'P': 'p';
3324
3325 return writeSignedDecimal (dst, exponent);
3326}
3327
3329 if (!Arg.isFiniteNonZero())
3330 return hash_combine((uint8_t)Arg.category,
3331 // NaN has no sign, fix it at zero.
3332 Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign,
3333 Arg.semantics->precision);
3334
3335 // Normal floats need their exponent and significand hashed.
3336 return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign,
3337 Arg.semantics->precision, Arg.exponent,
3339 Arg.significandParts(),
3340 Arg.significandParts() + Arg.partCount()));
3341}
3342
3343// Conversion from APFloat to/from host float/double. It may eventually be
3344// possible to eliminate these and have everybody deal with APFloats, but that
3345// will take a while. This approach will not easily extend to long double.
3346// Current implementation requires integerPartWidth==64, which is correct at
3347// the moment but could be made more general.
3348
3349// Denormals have exponent minExponent in APFloat, but minExponent-1 in
3350// the actual IEEE respresentations. We compensate for that here.
3351
3352APInt IEEEFloat::convertF80LongDoubleAPFloatToAPInt() const {
3353 assert(semantics ==
3354 (const llvm::fltSemantics *)&APFloatBase::semX87DoubleExtended);
3355 assert(partCount()==2);
3356
3357 uint64_t myexponent, mysignificand;
3358
3359 if (isFiniteNonZero()) {
3360 myexponent = exponent+16383; //bias
3361 mysignificand = significandParts()[0];
3362 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
3363 myexponent = 0; // denormal
3364 } else if (category==fcZero) {
3365 myexponent = 0;
3366 mysignificand = 0;
3367 } else if (category==fcInfinity) {
3368 myexponent = 0x7fff;
3369 mysignificand = 0x8000000000000000ULL;
3370 } else {
3371 assert(category == fcNaN && "Unknown category");
3372 myexponent = 0x7fff;
3373 mysignificand = significandParts()[0];
3374 }
3375
3376 uint64_t words[2];
3377 words[0] = mysignificand;
3378 words[1] = ((uint64_t)(sign & 1) << 15) |
3379 (myexponent & 0x7fffLL);
3380 return APInt(80, words);
3381}
3382
3383APInt IEEEFloat::convertPPCDoubleDoubleLegacyAPFloatToAPInt() const {
3384 assert(semantics ==
3385 (const llvm::fltSemantics *)&APFloatBase::semPPCDoubleDoubleLegacy);
3386 assert(partCount()==2);
3387
3388 uint64_t words[2];
3389 bool losesInfo;
3390
3391 // Convert number to double. To avoid spurious underflows, we re-
3392 // normalize against the "double" minExponent first, and only *then*
3393 // truncate the mantissa. The result of that second conversion
3394 // may be inexact, but should never underflow.
3395 // Declare fltSemantics before APFloat that uses it (and
3396 // saves pointer to it) to ensure correct destruction order.
3397 fltSemantics extendedSemantics = *semantics;
3398 extendedSemantics.minExponent = APFloatBase::semIEEEdouble.minExponent;
3399 IEEEFloat extended(*this);
3400 [[maybe_unused]] opStatus fs =
3401 extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
3402 assert(fs == opOK && !losesInfo);
3403
3404 IEEEFloat u(extended);
3405 fs = u.convert(APFloatBase::semIEEEdouble, rmNearestTiesToEven, &losesInfo);
3406 assert(fs == opOK || fs == opInexact);
3407 words[0] = *u.convertDoubleAPFloatToAPInt().getRawData();
3408
3409 // If conversion was exact or resulted in a special case, we're done;
3410 // just set the second double to zero. Otherwise, re-convert back to
3411 // the extended format and compute the difference. This now should
3412 // convert exactly to double.
3413 if (u.isFiniteNonZero() && losesInfo) {
3414 fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
3415 assert(fs == opOK && !losesInfo);
3416
3417 IEEEFloat v(extended);
3418 v.subtract(u, rmNearestTiesToEven);
3419 fs = v.convert(APFloatBase::semIEEEdouble, rmNearestTiesToEven, &losesInfo);
3420 assert(fs == opOK && !losesInfo);
3421 words[1] = *v.convertDoubleAPFloatToAPInt().getRawData();
3422 } else {
3423 words[1] = 0;
3424 }
3425
3426 return APInt(128, words);
3427}
3428
3429template <const fltSemantics &S>
3430APInt IEEEFloat::convertIEEEFloatToAPInt() const {
3431 assert(semantics == &S);
3432 const int bias = (semantics == &APFloatBase::semFloat8E8M0FNU)
3433 ? -S.minExponent
3434 : -(S.minExponent - 1);
3435 constexpr unsigned int trailing_significand_bits = S.precision - 1;
3436 constexpr int integer_bit_part = trailing_significand_bits / integerPartWidth;
3437 constexpr integerPart integer_bit =
3438 integerPart{1} << (trailing_significand_bits % integerPartWidth);
3439 constexpr uint64_t significand_mask = integer_bit - 1;
3440 constexpr unsigned int exponent_bits =
3441 trailing_significand_bits ? (S.sizeInBits - 1 - trailing_significand_bits)
3442 : S.sizeInBits;
3443 static_assert(exponent_bits < 64);
3444 constexpr uint64_t exponent_mask = (uint64_t{1} << exponent_bits) - 1;
3445
3446 uint64_t myexponent;
3447 std::array<integerPart, partCountForBits(trailing_significand_bits)>
3448 mysignificand;
3449
3450 if (isFiniteNonZero()) {
3451 myexponent = exponent + bias;
3452 std::copy_n(significandParts(), mysignificand.size(),
3453 mysignificand.begin());
3454 if (myexponent == 1 &&
3455 !(significandParts()[integer_bit_part] & integer_bit))
3456 myexponent = 0; // denormal
3457 } else if (category == fcZero) {
3458 if (!S.hasZero)
3459 llvm_unreachable("semantics does not support zero!");
3460 myexponent = ::exponentZero(S) + bias;
3461 mysignificand.fill(0);
3462 } else if (category == fcInfinity) {
3463 if (S.nonFiniteBehavior == fltNonfiniteBehavior::NanOnly ||
3464 S.nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
3465 llvm_unreachable("semantics don't support inf!");
3466 myexponent = ::exponentInf(S) + bias;
3467 mysignificand.fill(0);
3468 } else {
3469 assert(category == fcNaN && "Unknown category!");
3470 if (S.nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
3471 llvm_unreachable("semantics don't support NaN!");
3472 myexponent = ::exponentNaN(S) + bias;
3473 std::copy_n(significandParts(), mysignificand.size(),
3474 mysignificand.begin());
3475 }
3476 std::array<uint64_t, (S.sizeInBits + 63) / 64> words;
3477 auto words_iter =
3478 std::copy_n(mysignificand.begin(), mysignificand.size(), words.begin());
3479 if constexpr (significand_mask != 0 || trailing_significand_bits == 0) {
3480 // Clear the integer bit.
3481 words[mysignificand.size() - 1] &= significand_mask;
3482 }
3483 std::fill(words_iter, words.end(), uint64_t{0});
3484 constexpr size_t last_word = words.size() - 1;
3485 uint64_t shifted_sign = static_cast<uint64_t>(sign & 1)
3486 << ((S.sizeInBits - 1) % 64);
3487 words[last_word] |= shifted_sign;
3488 uint64_t shifted_exponent = (myexponent & exponent_mask)
3489 << (trailing_significand_bits % 64);
3490 words[last_word] |= shifted_exponent;
3491 if constexpr (last_word == 0) {
3492 return APInt(S.sizeInBits, words[0]);
3493 }
3494 return APInt(S.sizeInBits, words);
3495}
3496
3497APInt IEEEFloat::convertQuadrupleAPFloatToAPInt() const {
3498 assert(partCount() == 2);
3499 return convertIEEEFloatToAPInt<APFloatBase::semIEEEquad>();
3500}
3501
3502APInt IEEEFloat::convertDoubleAPFloatToAPInt() const {
3503 assert(partCount()==1);
3504 return convertIEEEFloatToAPInt<APFloatBase::semIEEEdouble>();
3505}
3506
3507APInt IEEEFloat::convertFloatAPFloatToAPInt() const {
3508 assert(partCount()==1);
3509 return convertIEEEFloatToAPInt<APFloatBase::semIEEEsingle>();
3510}
3511
3512APInt IEEEFloat::convertBFloatAPFloatToAPInt() const {
3513 assert(partCount() == 1);
3514 return convertIEEEFloatToAPInt<APFloatBase::semBFloat>();
3515}
3516
3517APInt IEEEFloat::convertHalfAPFloatToAPInt() const {
3518 assert(partCount()==1);
3519 return convertIEEEFloatToAPInt<APFloatBase::APFloatBase::semIEEEhalf>();
3520}
3521
3522APInt IEEEFloat::convertFloat8E5M2APFloatToAPInt() const {
3523 assert(partCount() == 1);
3524 return convertIEEEFloatToAPInt<APFloatBase::semFloat8E5M2>();
3525}
3526
3527APInt IEEEFloat::convertFloat8E5M2FNUZAPFloatToAPInt() const {
3528 assert(partCount() == 1);
3529 return convertIEEEFloatToAPInt<APFloatBase::semFloat8E5M2FNUZ>();
3530}
3531
3532APInt IEEEFloat::convertFloat8E4M3APFloatToAPInt() const {
3533 assert(partCount() == 1);
3534 return convertIEEEFloatToAPInt<APFloatBase::semFloat8E4M3>();
3535}
3536
3537APInt IEEEFloat::convertFloat8E4M3FNAPFloatToAPInt() const {
3538 assert(partCount() == 1);
3539 return convertIEEEFloatToAPInt<APFloatBase::semFloat8E4M3FN>();
3540}
3541
3542APInt IEEEFloat::convertFloat8E4M3FNUZAPFloatToAPInt() const {
3543 assert(partCount() == 1);
3544 return convertIEEEFloatToAPInt<APFloatBase::semFloat8E4M3FNUZ>();
3545}
3546
3547APInt IEEEFloat::convertFloat8E4M3B11FNUZAPFloatToAPInt() const {
3548 assert(partCount() == 1);
3549 return convertIEEEFloatToAPInt<APFloatBase::semFloat8E4M3B11FNUZ>();
3550}
3551
3552APInt IEEEFloat::convertFloat8E3M4APFloatToAPInt() const {
3553 assert(partCount() == 1);
3554 return convertIEEEFloatToAPInt<APFloatBase::semFloat8E3M4>();
3555}
3556
3557APInt IEEEFloat::convertFloatTF32APFloatToAPInt() const {
3558 assert(partCount() == 1);
3559 return convertIEEEFloatToAPInt<APFloatBase::semFloatTF32>();
3560}
3561
3562APInt IEEEFloat::convertFloat8E8M0FNUAPFloatToAPInt() const {
3563 assert(partCount() == 1);
3564 return convertIEEEFloatToAPInt<APFloatBase::semFloat8E8M0FNU>();
3565}
3566
3567APInt IEEEFloat::convertFloat6E3M2FNAPFloatToAPInt() const {
3568 assert(partCount() == 1);
3569 return convertIEEEFloatToAPInt<APFloatBase::semFloat6E3M2FN>();
3570}
3571
3572APInt IEEEFloat::convertFloat6E2M3FNAPFloatToAPInt() const {
3573 assert(partCount() == 1);
3574 return convertIEEEFloatToAPInt<APFloatBase::semFloat6E2M3FN>();
3575}
3576
3577APInt IEEEFloat::convertFloat4E2M1FNAPFloatToAPInt() const {
3578 assert(partCount() == 1);
3579 return convertIEEEFloatToAPInt<APFloatBase::semFloat4E2M1FN>();
3580}
3581
3582// This function creates an APInt that is just a bit map of the floating
3583// point constant as it would appear in memory. It is not a conversion,
3584// and treating the result as a normal integer is unlikely to be useful.
3585
3587 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEhalf)
3588 return convertHalfAPFloatToAPInt();
3589
3590 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semBFloat)
3591 return convertBFloatAPFloatToAPInt();
3592
3593 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEsingle)
3594 return convertFloatAPFloatToAPInt();
3595
3596 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEdouble)
3597 return convertDoubleAPFloatToAPInt();
3598
3599 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEquad)
3600 return convertQuadrupleAPFloatToAPInt();
3601
3602 if (semantics ==
3603 (const llvm::fltSemantics *)&APFloatBase::semPPCDoubleDoubleLegacy)
3604 return convertPPCDoubleDoubleLegacyAPFloatToAPInt();
3605
3606 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E5M2)
3607 return convertFloat8E5M2APFloatToAPInt();
3608
3609 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E5M2FNUZ)
3610 return convertFloat8E5M2FNUZAPFloatToAPInt();
3611
3612 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E4M3)
3613 return convertFloat8E4M3APFloatToAPInt();
3614
3615 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E4M3FN)
3616 return convertFloat8E4M3FNAPFloatToAPInt();
3617
3618 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E4M3FNUZ)
3619 return convertFloat8E4M3FNUZAPFloatToAPInt();
3620
3621 if (semantics ==
3622 (const llvm::fltSemantics *)&APFloatBase::semFloat8E4M3B11FNUZ)
3623 return convertFloat8E4M3B11FNUZAPFloatToAPInt();
3624
3625 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E3M4)
3626 return convertFloat8E3M4APFloatToAPInt();
3627
3628 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloatTF32)
3629 return convertFloatTF32APFloatToAPInt();
3630
3631 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E8M0FNU)
3632 return convertFloat8E8M0FNUAPFloatToAPInt();
3633
3634 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat6E3M2FN)
3635 return convertFloat6E3M2FNAPFloatToAPInt();
3636
3637 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat6E2M3FN)
3638 return convertFloat6E2M3FNAPFloatToAPInt();
3639
3640 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat4E2M1FN)
3641 return convertFloat4E2M1FNAPFloatToAPInt();
3642
3643 assert(semantics ==
3644 (const llvm::fltSemantics *)&APFloatBase::semX87DoubleExtended &&
3645 "unknown format!");
3646 return convertF80LongDoubleAPFloatToAPInt();
3647}
3648
3650 assert(semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEsingle &&
3651 "Float semantics are not IEEEsingle");
3652 APInt api = bitcastToAPInt();
3653 return api.bitsToFloat();
3654}
3655
3657 assert(semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEdouble &&
3658 "Float semantics are not IEEEdouble");
3659 APInt api = bitcastToAPInt();
3660 return api.bitsToDouble();
3661}
3662
3663#ifdef HAS_IEE754_FLOAT128
3664float128 IEEEFloat::convertToQuad() const {
3665 assert(semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEquad &&
3666 "Float semantics are not IEEEquads");
3667 APInt api = bitcastToAPInt();
3668 return api.bitsToQuad();
3669}
3670#endif
3671
3672/// Integer bit is explicit in this format. Intel hardware (387 and later)
3673/// does not support these bit patterns:
3674/// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
3675/// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
3676/// exponent!=0 nor all 1's, integer bit 0 ("unnormal")
3677/// exponent = 0, integer bit 1 ("pseudodenormal")
3678/// At the moment, the first three are treated as NaNs, the last one as Normal.
3679void IEEEFloat::initFromF80LongDoubleAPInt(const APInt &api) {
3680 uint64_t i1 = api.getRawData()[0];
3681 uint64_t i2 = api.getRawData()[1];
3682 uint64_t myexponent = (i2 & 0x7fff);
3683 uint64_t mysignificand = i1;
3684 uint8_t myintegerbit = mysignificand >> 63;
3685
3686 initialize(&APFloatBase::semX87DoubleExtended);
3687 assert(partCount()==2);
3688
3689 sign = static_cast<unsigned int>(i2>>15);
3690 if (myexponent == 0 && mysignificand == 0) {
3691 makeZero(sign);
3692 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
3693 makeInf(sign);
3694 } else if ((myexponent == 0x7fff && mysignificand != 0x8000000000000000ULL) ||
3695 (myexponent != 0x7fff && myexponent != 0 && myintegerbit == 0)) {
3696 category = fcNaN;
3697 exponent = exponentNaN();
3698 significandParts()[0] = mysignificand;
3699 significandParts()[1] = 0;
3700 } else {
3701 category = fcNormal;
3702 exponent = myexponent - 16383;
3703 significandParts()[0] = mysignificand;
3704 significandParts()[1] = 0;
3705 if (myexponent==0) // denormal
3706 exponent = -16382;
3707 }
3708}
3709
3710void IEEEFloat::initFromPPCDoubleDoubleLegacyAPInt(const APInt &api) {
3711 uint64_t i1 = api.getRawData()[0];
3712 uint64_t i2 = api.getRawData()[1];
3713 bool losesInfo;
3714
3715 // Get the first double and convert to our format.
3716 initFromDoubleAPInt(APInt(64, i1));
3717 [[maybe_unused]] opStatus fs = convert(APFloatBase::semPPCDoubleDoubleLegacy,
3718 rmNearestTiesToEven, &losesInfo);
3719 // (convert may return opInvalidOp if i1 is an sNaN).
3720 assert((fs == opOK || fs == opInvalidOp) && !losesInfo);
3721
3722 // Unless we have a special case, add in second double.
3723 if (isFiniteNonZero()) {
3724 IEEEFloat v(APFloatBase::semIEEEdouble, APInt(64, i2));
3725 fs = v.convert(APFloatBase::semPPCDoubleDoubleLegacy, rmNearestTiesToEven,
3726 &losesInfo);
3727 assert(fs == opOK && !losesInfo);
3728
3730 }
3731}
3732
3733// The E8M0 format has the following characteristics:
3734// It is an 8-bit unsigned format with only exponents (no actual significand).
3735// No encodings for {zero, infinities or denorms}.
3736// NaN is represented by all 1's.
3737// Bias is 127.
3738void IEEEFloat::initFromFloat8E8M0FNUAPInt(const APInt &api) {
3739 const uint64_t exponent_mask = 0xff;
3740 uint64_t val = api.getRawData()[0];
3741 uint64_t myexponent = val & exponent_mask;
3742
3743 initialize(&APFloatBase::semFloat8E8M0FNU);
3744 assert(partCount() == 1);
3745
3746 // This format has unsigned representation only
3747 sign = 0;
3748
3749 // Set the significand
3750 // This format does not have any significand but the 'Pth' precision bit is
3751 // always set to 1 for consistency in APFloat's internal representation.
3752 uint64_t mysignificand = 1;
3753 significandParts()[0] = mysignificand;
3754
3755 // This format can either have a NaN or fcNormal
3756 // All 1's i.e. 255 is a NaN
3757 if (val == exponent_mask) {
3758 category = fcNaN;
3759 exponent = exponentNaN();
3760 return;
3761 }
3762 // Handle fcNormal...
3763 category = fcNormal;
3764 exponent = myexponent - 127; // 127 is bias
3765}
3766
3767template <const fltSemantics &S>
3768void IEEEFloat::initFromIEEEAPInt(const APInt &api) {
3769 assert(api.getBitWidth() == S.sizeInBits);
3770 constexpr integerPart integer_bit = integerPart{1}
3771 << ((S.precision - 1) % integerPartWidth);
3772 constexpr uint64_t significand_mask = integer_bit - 1;
3773 constexpr unsigned int trailing_significand_bits = S.precision - 1;
3774 constexpr unsigned int stored_significand_parts =
3775 partCountForBits(trailing_significand_bits);
3776 constexpr unsigned int exponent_bits =
3777 S.sizeInBits - 1 - trailing_significand_bits;
3778 static_assert(exponent_bits < 64);
3779 constexpr uint64_t exponent_mask = (uint64_t{1} << exponent_bits) - 1;
3780 constexpr int bias = -(S.minExponent - 1);
3781
3782 // Copy the bits of the significand. We need to clear out the exponent and
3783 // sign bit in the last word.
3784 std::array<integerPart, stored_significand_parts> mysignificand;
3785 std::copy_n(api.getRawData(), mysignificand.size(), mysignificand.begin());
3786 if constexpr (significand_mask != 0) {
3787 mysignificand[mysignificand.size() - 1] &= significand_mask;
3788 }
3789
3790 // We assume the last word holds the sign bit, the exponent, and potentially
3791 // some of the trailing significand field.
3792 uint64_t last_word = api.getRawData()[api.getNumWords() - 1];
3793 uint64_t myexponent =
3794 (last_word >> (trailing_significand_bits % 64)) & exponent_mask;
3795
3796 initialize(&S);
3797 assert(partCount() == mysignificand.size());
3798
3799 sign = static_cast<unsigned int>(last_word >> ((S.sizeInBits - 1) % 64));
3800
3801 bool all_zero_significand = llvm::all_of(mysignificand, equal_to(0));
3802
3803 bool is_zero = myexponent == 0 && all_zero_significand;
3804
3805 if constexpr (S.nonFiniteBehavior == fltNonfiniteBehavior::IEEE754) {
3806 if (myexponent - bias == ::exponentInf(S) && all_zero_significand) {
3807 makeInf(sign);
3808 return;
3809 }
3810 }
3811
3812 bool is_nan = false;
3813
3814 if constexpr (S.nanEncoding == fltNanEncoding::IEEE) {
3815 is_nan = myexponent - bias == ::exponentNaN(S) && !all_zero_significand;
3816 } else if constexpr (S.nanEncoding == fltNanEncoding::AllOnes) {
3817 bool all_ones_significand =
3818 std::all_of(mysignificand.begin(), mysignificand.end() - 1,
3819 [](integerPart bits) { return bits == ~integerPart{0}; }) &&
3820 (!significand_mask ||
3821 mysignificand[mysignificand.size() - 1] == significand_mask);
3822 is_nan = myexponent - bias == ::exponentNaN(S) && all_ones_significand;
3823 } else if constexpr (S.nanEncoding == fltNanEncoding::NegativeZero) {
3824 is_nan = is_zero && sign;
3825 }
3826
3827 if (is_nan) {
3828 category = fcNaN;
3829 exponent = ::exponentNaN(S);
3830 std::copy_n(mysignificand.begin(), mysignificand.size(),
3831 significandParts());
3832 return;
3833 }
3834
3835 if (is_zero) {
3836 makeZero(sign);
3837 return;
3838 }
3839
3840 category = fcNormal;
3841 exponent = myexponent - bias;
3842 std::copy_n(mysignificand.begin(), mysignificand.size(), significandParts());
3843 if (myexponent == 0) // denormal
3844 exponent = S.minExponent;
3845 else
3846 significandParts()[mysignificand.size()-1] |= integer_bit; // integer bit
3847}
3848
3849void IEEEFloat::initFromQuadrupleAPInt(const APInt &api) {
3850 initFromIEEEAPInt<APFloatBase::semIEEEquad>(api);
3851}
3852
3853void IEEEFloat::initFromDoubleAPInt(const APInt &api) {
3854 initFromIEEEAPInt<APFloatBase::semIEEEdouble>(api);
3855}
3856
3857void IEEEFloat::initFromFloatAPInt(const APInt &api) {
3858 initFromIEEEAPInt<APFloatBase::semIEEEsingle>(api);
3859}
3860
3861void IEEEFloat::initFromBFloatAPInt(const APInt &api) {
3862 initFromIEEEAPInt<APFloatBase::semBFloat>(api);
3863}
3864
3865void IEEEFloat::initFromHalfAPInt(const APInt &api) {
3866 initFromIEEEAPInt<APFloatBase::semIEEEhalf>(api);
3867}
3868
3869void IEEEFloat::initFromFloat8E5M2APInt(const APInt &api) {
3870 initFromIEEEAPInt<APFloatBase::semFloat8E5M2>(api);
3871}
3872
3873void IEEEFloat::initFromFloat8E5M2FNUZAPInt(const APInt &api) {
3874 initFromIEEEAPInt<APFloatBase::semFloat8E5M2FNUZ>(api);
3875}
3876
3877void IEEEFloat::initFromFloat8E4M3APInt(const APInt &api) {
3878 initFromIEEEAPInt<APFloatBase::semFloat8E4M3>(api);
3879}
3880
3881void IEEEFloat::initFromFloat8E4M3FNAPInt(const APInt &api) {
3882 initFromIEEEAPInt<APFloatBase::semFloat8E4M3FN>(api);
3883}
3884
3885void IEEEFloat::initFromFloat8E4M3FNUZAPInt(const APInt &api) {
3886 initFromIEEEAPInt<APFloatBase::semFloat8E4M3FNUZ>(api);
3887}
3888
3889void IEEEFloat::initFromFloat8E4M3B11FNUZAPInt(const APInt &api) {
3890 initFromIEEEAPInt<APFloatBase::semFloat8E4M3B11FNUZ>(api);
3891}
3892
3893void IEEEFloat::initFromFloat8E3M4APInt(const APInt &api) {
3894 initFromIEEEAPInt<APFloatBase::semFloat8E3M4>(api);
3895}
3896
3897void IEEEFloat::initFromFloatTF32APInt(const APInt &api) {
3898 initFromIEEEAPInt<APFloatBase::semFloatTF32>(api);
3899}
3900
3901void IEEEFloat::initFromFloat6E3M2FNAPInt(const APInt &api) {
3902 initFromIEEEAPInt<APFloatBase::semFloat6E3M2FN>(api);
3903}
3904
3905void IEEEFloat::initFromFloat6E2M3FNAPInt(const APInt &api) {
3906 initFromIEEEAPInt<APFloatBase::semFloat6E2M3FN>(api);
3907}
3908
3909void IEEEFloat::initFromFloat4E2M1FNAPInt(const APInt &api) {
3910 initFromIEEEAPInt<APFloatBase::semFloat4E2M1FN>(api);
3911}
3912
3913/// Treat api as containing the bits of a floating point number.
3914void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) {
3915 assert(api.getBitWidth() == Sem->sizeInBits);
3916 if (Sem == &APFloatBase::semIEEEhalf)
3917 return initFromHalfAPInt(api);
3918 if (Sem == &APFloatBase::semBFloat)
3919 return initFromBFloatAPInt(api);
3920 if (Sem == &APFloatBase::semIEEEsingle)
3921 return initFromFloatAPInt(api);
3922 if (Sem == &APFloatBase::semIEEEdouble)
3923 return initFromDoubleAPInt(api);
3924 if (Sem == &APFloatBase::semX87DoubleExtended)
3925 return initFromF80LongDoubleAPInt(api);
3926 if (Sem == &APFloatBase::semIEEEquad)
3927 return initFromQuadrupleAPInt(api);
3928 if (Sem == &APFloatBase::semPPCDoubleDoubleLegacy)
3929 return initFromPPCDoubleDoubleLegacyAPInt(api);
3930 if (Sem == &APFloatBase::semFloat8E5M2)
3931 return initFromFloat8E5M2APInt(api);
3932 if (Sem == &APFloatBase::semFloat8E5M2FNUZ)
3933 return initFromFloat8E5M2FNUZAPInt(api);
3934 if (Sem == &APFloatBase::semFloat8E4M3)
3935 return initFromFloat8E4M3APInt(api);
3936 if (Sem == &APFloatBase::semFloat8E4M3FN)
3937 return initFromFloat8E4M3FNAPInt(api);
3938 if (Sem == &APFloatBase::semFloat8E4M3FNUZ)
3939 return initFromFloat8E4M3FNUZAPInt(api);
3940 if (Sem == &APFloatBase::semFloat8E4M3B11FNUZ)
3941 return initFromFloat8E4M3B11FNUZAPInt(api);
3942 if (Sem == &APFloatBase::semFloat8E3M4)
3943 return initFromFloat8E3M4APInt(api);
3944 if (Sem == &APFloatBase::semFloatTF32)
3945 return initFromFloatTF32APInt(api);
3946 if (Sem == &APFloatBase::semFloat8E8M0FNU)
3947 return initFromFloat8E8M0FNUAPInt(api);
3948 if (Sem == &APFloatBase::semFloat6E3M2FN)
3949 return initFromFloat6E3M2FNAPInt(api);
3950 if (Sem == &APFloatBase::semFloat6E2M3FN)
3951 return initFromFloat6E2M3FNAPInt(api);
3952 if (Sem == &APFloatBase::semFloat4E2M1FN)
3953 return initFromFloat4E2M1FNAPInt(api);
3954
3955 llvm_unreachable("unsupported semantics");
3956}
3957
3958/// Make this number the largest magnitude normal number in the given
3959/// semantics.
3960void IEEEFloat::makeLargest(bool Negative) {
3961 if (Negative && !semantics->hasSignedRepr)
3963 "This floating point format does not support signed values");
3964 // We want (in interchange format):
3965 // sign = {Negative}
3966 // exponent = 1..10
3967 // significand = 1..1
3968 category = fcNormal;
3969 sign = Negative;
3970 exponent = semantics->maxExponent;
3971
3972 // Use memset to set all but the highest integerPart to all ones.
3973 integerPart *significand = significandParts();
3974 unsigned PartCount = partCount();
3975 memset(significand, 0xFF, sizeof(integerPart)*(PartCount - 1));
3976
3977 // Set the high integerPart especially setting all unused top bits for
3978 // internal consistency.
3979 const unsigned NumUnusedHighBits =
3980 PartCount*integerPartWidth - semantics->precision;
3981 significand[PartCount - 1] = (NumUnusedHighBits < integerPartWidth)
3982 ? (~integerPart(0) >> NumUnusedHighBits)
3983 : 0;
3984 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
3985 semantics->nanEncoding == fltNanEncoding::AllOnes &&
3986 (semantics->precision > 1))
3987 significand[0] &= ~integerPart(1);
3988}
3989
3990/// Make this number the smallest magnitude denormal number in the given
3991/// semantics.
3992void IEEEFloat::makeSmallest(bool Negative) {
3993 if (Negative && !semantics->hasSignedRepr)
3995 "This floating point format does not support signed values");
3996 // We want (in interchange format):
3997 // sign = {Negative}
3998 // exponent = 0..0
3999 // significand = 0..01
4000 category = fcNormal;
4001 sign = Negative;
4002 exponent = semantics->minExponent;
4003 APInt::tcSet(significandParts(), 1, partCount());
4004}
4005
4007 if (Negative && !semantics->hasSignedRepr)
4009 "This floating point format does not support signed values");
4010 // We want (in interchange format):
4011 // sign = {Negative}
4012 // exponent = 0..0
4013 // significand = 10..0
4014
4015 category = fcNormal;
4016 zeroSignificand();
4017 sign = Negative;
4018 exponent = semantics->minExponent;
4019 APInt::tcSetBit(significandParts(), semantics->precision - 1);
4020}
4021
4022IEEEFloat::IEEEFloat(const fltSemantics &Sem, const APInt &API) {
4023 initFromAPInt(&Sem, API);
4024}
4025
4027 initFromAPInt(&APFloatBase::semIEEEsingle, APInt::floatToBits(f));
4028}
4029
4031 initFromAPInt(&APFloatBase::semIEEEdouble, APInt::doubleToBits(d));
4032}
4033
4034namespace {
4035 void append(SmallVectorImpl<char> &Buffer, StringRef Str) {
4036 Buffer.append(Str.begin(), Str.end());
4037 }
4038
4039 /// Removes data from the given significand until it is no more
4040 /// precise than is required for the desired precision.
4041 void AdjustToPrecision(APInt &significand,
4042 int &exp, unsigned FormatPrecision) {
4043 unsigned bits = significand.getActiveBits();
4044
4045 // 196/59 is a very slight overestimate of lg_2(10).
4046 unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59;
4047
4048 if (bits <= bitsRequired) return;
4049
4050 unsigned tensRemovable = (bits - bitsRequired) * 59 / 196;
4051 if (!tensRemovable) return;
4052
4053 exp += tensRemovable;
4054
4055 APInt divisor(significand.getBitWidth(), 1);
4056 APInt powten(significand.getBitWidth(), 10);
4057 while (true) {
4058 if (tensRemovable & 1)
4059 divisor *= powten;
4060 tensRemovable >>= 1;
4061 if (!tensRemovable) break;
4062 powten *= powten;
4063 }
4064
4065 significand = significand.udiv(divisor);
4066
4067 // Truncate the significand down to its active bit count.
4068 significand = significand.trunc(significand.getActiveBits());
4069 }
4070
4071
4072 void AdjustToPrecision(SmallVectorImpl<char> &buffer,
4073 int &exp, unsigned FormatPrecision) {
4074 unsigned N = buffer.size();
4075 if (N <= FormatPrecision) return;
4076
4077 // The most significant figures are the last ones in the buffer.
4078 unsigned FirstSignificant = N - FormatPrecision;
4079
4080 // Round.
4081 // FIXME: this probably shouldn't use 'round half up'.
4082
4083 // Rounding down is just a truncation, except we also want to drop
4084 // trailing zeros from the new result.
4085 if (buffer[FirstSignificant - 1] < '5') {
4086 while (FirstSignificant < N && buffer[FirstSignificant] == '0')
4087 FirstSignificant++;
4088
4089 exp += FirstSignificant;
4090 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
4091 return;
4092 }
4093
4094 // Rounding up requires a decimal add-with-carry. If we continue
4095 // the carry, the newly-introduced zeros will just be truncated.
4096 for (unsigned I = FirstSignificant; I != N; ++I) {
4097 if (buffer[I] == '9') {
4098 FirstSignificant++;
4099 } else {
4100 buffer[I]++;
4101 break;
4102 }
4103 }
4104
4105 // If we carried through, we have exactly one digit of precision.
4106 if (FirstSignificant == N) {
4107 exp += FirstSignificant;
4108 buffer.clear();
4109 buffer.push_back('1');
4110 return;
4111 }
4112
4113 exp += FirstSignificant;
4114 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
4115 }
4116
4117 void toStringImpl(SmallVectorImpl<char> &Str, const bool isNeg, int exp,
4118 APInt significand, unsigned FormatPrecision,
4119 unsigned FormatMaxPadding, bool TruncateZero) {
4120 const int semanticsPrecision = significand.getBitWidth();
4121
4122 if (isNeg)
4123 Str.push_back('-');
4124
4125 // Set FormatPrecision if zero. We want to do this before we
4126 // truncate trailing zeros, as those are part of the precision.
4127 if (!FormatPrecision) {
4128 // We use enough digits so the number can be round-tripped back to an
4129 // APFloat. The formula comes from "How to Print Floating-Point Numbers
4130 // Accurately" by Steele and White.
4131 // FIXME: Using a formula based purely on the precision is conservative;
4132 // we can print fewer digits depending on the actual value being printed.
4133
4134 // FormatPrecision = 2 + floor(significandBits / lg_2(10))
4135 FormatPrecision = 2 + semanticsPrecision * 59 / 196;
4136 }
4137
4138 // Ignore trailing binary zeros.
4139 int trailingZeros = significand.countr_zero();
4140 exp += trailingZeros;
4141 significand.lshrInPlace(trailingZeros);
4142
4143 // Change the exponent from 2^e to 10^e.
4144 if (exp == 0) {
4145 // Nothing to do.
4146 } else if (exp > 0) {
4147 // Just shift left.
4148 significand = significand.zext(semanticsPrecision + exp);
4149 significand <<= exp;
4150 exp = 0;
4151 } else { /* exp < 0 */
4152 int texp = -exp;
4153
4154 // We transform this using the identity:
4155 // (N)(2^-e) == (N)(5^e)(10^-e)
4156 // This means we have to multiply N (the significand) by 5^e.
4157 // To avoid overflow, we have to operate on numbers large
4158 // enough to store N * 5^e:
4159 // log2(N * 5^e) == log2(N) + e * log2(5)
4160 // <= semantics->precision + e * 137 / 59
4161 // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59)
4162
4163 unsigned precision = semanticsPrecision + (137 * texp + 136) / 59;
4164
4165 // Multiply significand by 5^e.
4166 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
4167 significand = significand.zext(precision);
4168 APInt five_to_the_i(precision, 5);
4169 while (true) {
4170 if (texp & 1)
4171 significand *= five_to_the_i;
4172
4173 texp >>= 1;
4174 if (!texp)
4175 break;
4176 five_to_the_i *= five_to_the_i;
4177 }
4178 }
4179
4180 AdjustToPrecision(significand, exp, FormatPrecision);
4181
4183
4184 // Fill the buffer.
4185 unsigned precision = significand.getBitWidth();
4186 if (precision < 4) {
4187 // We need enough precision to store the value 10.
4188 precision = 4;
4189 significand = significand.zext(precision);
4190 }
4191 APInt ten(precision, 10);
4192 APInt digit(precision, 0);
4193
4194 bool inTrail = true;
4195 while (significand != 0) {
4196 // digit <- significand % 10
4197 // significand <- significand / 10
4198 APInt::udivrem(significand, ten, significand, digit);
4199
4200 unsigned d = digit.getZExtValue();
4201
4202 // Drop trailing zeros.
4203 if (inTrail && !d)
4204 exp++;
4205 else {
4206 buffer.push_back((char) ('0' + d));
4207 inTrail = false;
4208 }
4209 }
4210
4211 assert(!buffer.empty() && "no characters in buffer!");
4212
4213 // Drop down to FormatPrecision.
4214 // TODO: don't do more precise calculations above than are required.
4215 AdjustToPrecision(buffer, exp, FormatPrecision);
4216
4217 unsigned NDigits = buffer.size();
4218
4219 // Check whether we should use scientific notation.
4220 bool FormatScientific;
4221 if (!FormatMaxPadding) {
4222 FormatScientific = true;
4223 } else {
4224 if (exp >= 0) {
4225 // 765e3 --> 765000
4226 // ^^^
4227 // But we shouldn't make the number look more precise than it is.
4228 FormatScientific = ((unsigned) exp > FormatMaxPadding ||
4229 NDigits + (unsigned) exp > FormatPrecision);
4230 } else {
4231 // Power of the most significant digit.
4232 int MSD = exp + (int) (NDigits - 1);
4233 if (MSD >= 0) {
4234 // 765e-2 == 7.65
4235 FormatScientific = false;
4236 } else {
4237 // 765e-5 == 0.00765
4238 // ^ ^^
4239 FormatScientific = ((unsigned) -MSD) > FormatMaxPadding;
4240 }
4241 }
4242 }
4243
4244 // Scientific formatting is pretty straightforward.
4245 if (FormatScientific) {
4246 exp += (NDigits - 1);
4247
4248 Str.push_back(buffer[NDigits-1]);
4249 Str.push_back('.');
4250 if (NDigits == 1 && TruncateZero)
4251 Str.push_back('0');
4252 else
4253 for (unsigned I = 1; I != NDigits; ++I)
4254 Str.push_back(buffer[NDigits-1-I]);
4255 // Fill with zeros up to FormatPrecision.
4256 if (!TruncateZero && FormatPrecision > NDigits - 1)
4257 Str.append(FormatPrecision - NDigits + 1, '0');
4258 // For !TruncateZero we use lower 'e'.
4259 Str.push_back(TruncateZero ? 'E' : 'e');
4260
4261 Str.push_back(exp >= 0 ? '+' : '-');
4262 if (exp < 0)
4263 exp = -exp;
4264 SmallVector<char, 6> expbuf;
4265 do {
4266 expbuf.push_back((char) ('0' + (exp % 10)));
4267 exp /= 10;
4268 } while (exp);
4269 // Exponent always at least two digits if we do not truncate zeros.
4270 if (!TruncateZero && expbuf.size() < 2)
4271 expbuf.push_back('0');
4272 for (unsigned I = 0, E = expbuf.size(); I != E; ++I)
4273 Str.push_back(expbuf[E-1-I]);
4274 return;
4275 }
4276
4277 // Non-scientific, positive exponents.
4278 if (exp >= 0) {
4279 for (unsigned I = 0; I != NDigits; ++I)
4280 Str.push_back(buffer[NDigits-1-I]);
4281 for (unsigned I = 0; I != (unsigned) exp; ++I)
4282 Str.push_back('0');
4283 return;
4284 }
4285
4286 // Non-scientific, negative exponents.
4287
4288 // The number of digits to the left of the decimal point.
4289 int NWholeDigits = exp + (int) NDigits;
4290
4291 unsigned I = 0;
4292 if (NWholeDigits > 0) {
4293 for (; I != (unsigned) NWholeDigits; ++I)
4294 Str.push_back(buffer[NDigits-I-1]);
4295 Str.push_back('.');
4296 } else {
4297 unsigned NZeros = 1 + (unsigned) -NWholeDigits;
4298
4299 Str.push_back('0');
4300 Str.push_back('.');
4301 for (unsigned Z = 1; Z != NZeros; ++Z)
4302 Str.push_back('0');
4303 }
4304
4305 for (; I != NDigits; ++I)
4306 Str.push_back(buffer[NDigits-I-1]);
4307
4308 }
4309} // namespace
4310
4311void IEEEFloat::toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision,
4312 unsigned FormatMaxPadding, bool TruncateZero) const {
4313 switch (category) {
4314 case fcInfinity:
4315 if (isNegative())
4316 return append(Str, "-Inf");
4317 else
4318 return append(Str, "+Inf");
4319
4320 case fcNaN: return append(Str, "NaN");
4321
4322 case fcZero:
4323 if (isNegative())
4324 Str.push_back('-');
4325
4326 if (!FormatMaxPadding) {
4327 if (TruncateZero)
4328 append(Str, "0.0E+0");
4329 else {
4330 append(Str, "0.0");
4331 if (FormatPrecision > 1)
4332 Str.append(FormatPrecision - 1, '0');
4333 append(Str, "e+00");
4334 }
4335 } else {
4336 Str.push_back('0');
4337 }
4338 return;
4339
4340 case fcNormal:
4341 break;
4342 }
4343
4344 // Decompose the number into an APInt and an exponent.
4345 int exp = exponent - ((int) semantics->precision - 1);
4346 APInt significand(
4347 semantics->precision,
4348 ArrayRef(significandParts(), partCountForBits(semantics->precision)));
4349
4350 toStringImpl(Str, isNegative(), exp, significand, FormatPrecision,
4351 FormatMaxPadding, TruncateZero);
4352
4353}
4354
4356 if (!isFinite() || isZero())
4357 return INT_MIN;
4358
4359 const integerPart *Parts = significandParts();
4360 const int PartCount = partCountForBits(semantics->precision);
4361
4362 int PopCount = 0;
4363 for (int i = 0; i < PartCount; ++i) {
4364 PopCount += llvm::popcount(Parts[i]);
4365 if (PopCount > 1)
4366 return INT_MIN;
4367 }
4368
4369 if (exponent != semantics->minExponent)
4370 return exponent;
4371
4372 int CountrParts = 0;
4373 for (int i = 0; i < PartCount;
4374 ++i, CountrParts += APInt::APINT_BITS_PER_WORD) {
4375 if (Parts[i] != 0) {
4376 return exponent - semantics->precision + CountrParts +
4377 llvm::countr_zero(Parts[i]) + 1;
4378 }
4379 }
4380
4381 llvm_unreachable("didn't find the set bit");
4382}
4383
4385 if (!isNaN())
4386 return false;
4387 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly ||
4388 semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
4389 return false;
4390
4391 // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the
4392 // first bit of the trailing significand being 0.
4393 return !APInt::tcExtractBit(significandParts(), semantics->precision - 2);
4394}
4395
4396/// IEEE-754R 2008 5.3.1: nextUp/nextDown.
4397///
4398/// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with
4399/// appropriate sign switching before/after the computation.
4401 // If we are performing nextDown, swap sign so we have -x.
4402 if (nextDown)
4403 changeSign();
4404
4405 // Compute nextUp(x)
4406 opStatus result = opOK;
4407
4408 // Handle each float category separately.
4409 switch (category) {
4410 case fcInfinity:
4411 // nextUp(+inf) = +inf
4412 if (!isNegative())
4413 break;
4414 // nextUp(-inf) = -getLargest()
4415 makeLargest(true);
4416 break;
4417 case fcNaN:
4418 // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag.
4419 // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not
4420 // change the payload.
4421 if (isSignaling()) {
4422 result = opInvalidOp;
4423 // For consistency, propagate the sign of the sNaN to the qNaN.
4424 makeNaN(false, isNegative(), nullptr);
4425 }
4426 break;
4427 case fcZero:
4428 // nextUp(pm 0) = +getSmallest()
4429 makeSmallest(false);
4430 break;
4431 case fcNormal:
4432 // nextUp(-getSmallest()) = -0
4433 if (isSmallest() && isNegative()) {
4434 APInt::tcSet(significandParts(), 0, partCount());
4435 category = fcZero;
4436 exponent = 0;
4437 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
4438 sign = false;
4439 if (!semantics->hasZero)
4441 break;
4442 }
4443
4444 if (isLargest() && !isNegative()) {
4445 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
4446 // nextUp(getLargest()) == NAN
4447 makeNaN();
4448 break;
4449 } else if (semantics->nonFiniteBehavior ==
4451 // nextUp(getLargest()) == getLargest()
4452 break;
4453 } else {
4454 // nextUp(getLargest()) == INFINITY
4455 APInt::tcSet(significandParts(), 0, partCount());
4456 category = fcInfinity;
4457 exponent = semantics->maxExponent + 1;
4458 break;
4459 }
4460 }
4461
4462 // nextUp(normal) == normal + inc.
4463 if (isNegative()) {
4464 // If we are negative, we need to decrement the significand.
4465
4466 // We only cross a binade boundary that requires adjusting the exponent
4467 // if:
4468 // 1. exponent != semantics->minExponent. This implies we are not in the
4469 // smallest binade or are dealing with denormals.
4470 // 2. Our significand excluding the integral bit is all zeros.
4471 bool WillCrossBinadeBoundary =
4472 exponent != semantics->minExponent && isSignificandAllZeros();
4473
4474 // Decrement the significand.
4475 //
4476 // We always do this since:
4477 // 1. If we are dealing with a non-binade decrement, by definition we
4478 // just decrement the significand.
4479 // 2. If we are dealing with a normal -> normal binade decrement, since
4480 // we have an explicit integral bit the fact that all bits but the
4481 // integral bit are zero implies that subtracting one will yield a
4482 // significand with 0 integral bit and 1 in all other spots. Thus we
4483 // must just adjust the exponent and set the integral bit to 1.
4484 // 3. If we are dealing with a normal -> denormal binade decrement,
4485 // since we set the integral bit to 0 when we represent denormals, we
4486 // just decrement the significand.
4487 integerPart *Parts = significandParts();
4488 APInt::tcDecrement(Parts, partCount());
4489
4490 if (WillCrossBinadeBoundary) {
4491 // Our result is a normal number. Do the following:
4492 // 1. Set the integral bit to 1.
4493 // 2. Decrement the exponent.
4494 APInt::tcSetBit(Parts, semantics->precision - 1);
4495 exponent--;
4496 }
4497 } else {
4498 // If we are positive, we need to increment the significand.
4499
4500 // We only cross a binade boundary that requires adjusting the exponent if
4501 // the input is not a denormal and all of said input's significand bits
4502 // are set. If all of said conditions are true: clear the significand, set
4503 // the integral bit to 1, and increment the exponent. If we have a
4504 // denormal always increment since moving denormals and the numbers in the
4505 // smallest normal binade have the same exponent in our representation.
4506 // If there are only exponents, any increment always crosses the
4507 // BinadeBoundary.
4508 bool WillCrossBinadeBoundary = !APFloat::hasSignificand(*semantics) ||
4509 (!isDenormal() && isSignificandAllOnes());
4510
4511 if (WillCrossBinadeBoundary) {
4512 integerPart *Parts = significandParts();
4513 APInt::tcSet(Parts, 0, partCount());
4514 APInt::tcSetBit(Parts, semantics->precision - 1);
4515 assert(exponent != semantics->maxExponent &&
4516 "We can not increment an exponent beyond the maxExponent allowed"
4517 " by the given floating point semantics.");
4518 exponent++;
4519 } else {
4520 incrementSignificand();
4521 }
4522 }
4523 break;
4524 }
4525
4526 // If we are performing nextDown, swap sign so we have -nextUp(-x)
4527 if (nextDown)
4528 changeSign();
4529
4530 return result;
4531}
4532
4534 assert(isNaN() && "Can only be called on NaN values");
4535 // Number of bits in the payload, excluding the (maybe implied) integer bit.
4536 unsigned Bits = semantics->precision - 1;
4537 return APInt(Bits, ArrayRef(significandParts(), partCountForBits(Bits)));
4538}
4539
4540APFloatBase::ExponentType IEEEFloat::exponentNaN() const {
4541 return ::exponentNaN(*semantics);
4542}
4543
4544APFloatBase::ExponentType IEEEFloat::exponentInf() const {
4545 return ::exponentInf(*semantics);
4546}
4547
4548APFloatBase::ExponentType IEEEFloat::exponentZero() const {
4549 return ::exponentZero(*semantics);
4550}
4551
4552void IEEEFloat::makeInf(bool Negative) {
4553 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
4554 llvm_unreachable("This floating point format does not support Inf");
4555
4556 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
4557 // There is no Inf, so make NaN instead.
4558 makeNaN(false, Negative);
4559 return;
4560 }
4561 category = fcInfinity;
4562 sign = Negative;
4563 exponent = exponentInf();
4564 APInt::tcSet(significandParts(), 0, partCount());
4565}
4566
4567void IEEEFloat::makeZero(bool Negative) {
4568 if (!semantics->hasZero)
4569 llvm_unreachable("This floating point format does not support Zero");
4570
4571 category = fcZero;
4572 sign = Negative;
4573 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
4574 // Merge negative zero to positive because 0b10000...000 is used for NaN
4575 sign = false;
4576 }
4577 exponent = exponentZero();
4578 APInt::tcSet(significandParts(), 0, partCount());
4579}
4580
4582 assert(isNaN());
4583 if (semantics->nonFiniteBehavior != fltNonfiniteBehavior::NanOnly)
4584 APInt::tcSetBit(significandParts(), semantics->precision - 2);
4585}
4586
4587int ilogb(const IEEEFloat &Arg) {
4588 if (Arg.isNaN())
4589 return APFloat::IEK_NaN;
4590 if (Arg.isZero())
4591 return APFloat::IEK_Zero;
4592 if (Arg.isInfinity())
4593 return APFloat::IEK_Inf;
4594 if (!Arg.isDenormal())
4595 return Arg.exponent;
4596
4597 IEEEFloat Normalized(Arg);
4598 int SignificandBits = Arg.getSemantics().precision - 1;
4599
4600 Normalized.exponent += SignificandBits;
4601 Normalized.normalize(APFloat::rmNearestTiesToEven, lfExactlyZero);
4602 return Normalized.exponent - SignificandBits;
4603}
4604
4606 auto MaxExp = X.getSemantics().maxExponent;
4607 auto MinExp = X.getSemantics().minExponent;
4608
4609 // If Exp is wildly out-of-scale, simply adding it to X.exponent will
4610 // overflow; clamp it to a safe range before adding, but ensure that the range
4611 // is large enough that the clamp does not change the result. The range we
4612 // need to support is the difference between the largest possible exponent and
4613 // the normalized exponent of half the smallest denormal.
4614
4615 int SignificandBits = X.getSemantics().precision - 1;
4616 int MaxIncrement = MaxExp - (MinExp - SignificandBits) + 1;
4617
4618 // Clamp to one past the range ends to let normalize handle overlflow.
4619 X.exponent += std::clamp(Exp, -MaxIncrement - 1, MaxIncrement);
4620 X.normalize(RoundingMode, lfExactlyZero);
4621 if (X.isNaN())
4622 X.makeQuiet();
4623 return X;
4624}
4625
4626IEEEFloat frexp(const IEEEFloat &Val, int &Exp, roundingMode RM) {
4627 Exp = ilogb(Val);
4628
4629 // Quiet signalling nans.
4630 if (Exp == APFloat::IEK_NaN) {
4631 IEEEFloat Quiet(Val);
4632 Quiet.makeQuiet();
4633 return Quiet;
4634 }
4635
4636 if (Exp == APFloat::IEK_Inf)
4637 return Val;
4638
4639 // 1 is added because frexp is defined to return a normalized fraction in
4640 // +/-[0.5, 1.0), rather than the usual +/-[1.0, 2.0).
4641 Exp = Exp == APFloat::IEK_Zero ? 0 : Exp + 1;
4642 return scalbn(Val, -Exp, RM);
4643}
4644
4646 : Semantics(&S),
4647 Floats(new APFloat[2]{APFloat(APFloatBase::semIEEEdouble),
4648 APFloat(APFloatBase::semIEEEdouble)}) {
4649 assert(Semantics == &APFloatBase::semPPCDoubleDouble);
4650}
4651
4653 : Semantics(&S), Floats(new APFloat[2]{
4654 APFloat(APFloatBase::semIEEEdouble, uninitialized),
4655 APFloat(APFloatBase::semIEEEdouble, uninitialized)}) {
4656 assert(Semantics == &APFloatBase::semPPCDoubleDouble);
4657}
4658
4660 : Semantics(&S),
4661 Floats(new APFloat[2]{APFloat(APFloatBase::semIEEEdouble, I),
4662 APFloat(APFloatBase::semIEEEdouble)}) {
4663 assert(Semantics == &APFloatBase::semPPCDoubleDouble);
4664}
4665
4667 : Semantics(&S),
4668 Floats(new APFloat[2]{
4669 APFloat(APFloatBase::semIEEEdouble, APInt(64, I.getRawData()[0])),
4670 APFloat(APFloatBase::semIEEEdouble, APInt(64, I.getRawData()[1]))}) {
4671 assert(Semantics == &APFloatBase::semPPCDoubleDouble);
4672}
4673
4675 APFloat &&Second)
4676 : Semantics(&S),
4677 Floats(new APFloat[2]{std::move(First), std::move(Second)}) {
4678 assert(Semantics == &APFloatBase::semPPCDoubleDouble);
4679 assert(&Floats[0].getSemantics() == &APFloatBase::semIEEEdouble);
4680 assert(&Floats[1].getSemantics() == &APFloatBase::semIEEEdouble);
4681}
4682
4684 : Semantics(RHS.Semantics),
4685 Floats(RHS.Floats ? new APFloat[2]{APFloat(RHS.Floats[0]),
4686 APFloat(RHS.Floats[1])}
4687 : nullptr) {
4688 assert(Semantics == &APFloatBase::semPPCDoubleDouble);
4689}
4690
4692 : Semantics(RHS.Semantics), Floats(RHS.Floats) {
4693 RHS.Semantics = &APFloatBase::semBogus;
4694 RHS.Floats = nullptr;
4695 assert(Semantics == &APFloatBase::semPPCDoubleDouble);
4696}
4697
4699 if (Semantics == RHS.Semantics && RHS.Floats) {
4700 Floats[0] = RHS.Floats[0];
4701 Floats[1] = RHS.Floats[1];
4702 } else if (this != &RHS) {
4703 this->~DoubleAPFloat();
4704 new (this) DoubleAPFloat(RHS);
4705 }
4706 return *this;
4707}
4708
4709// Returns a result such that:
4710// 1. abs(Lo) <= ulp(Hi)/2
4711// 2. Hi == RTNE(Hi + Lo)
4712// 3. Hi + Lo == X + Y
4713//
4714// Requires that log2(X) >= log2(Y).
4715static std::pair<APFloat, APFloat> fastTwoSum(APFloat X, APFloat Y) {
4716 if (!X.isFinite())
4717 return {X, APFloat::getZero(X.getSemantics(), /*Negative=*/false)};
4718 APFloat Hi = X + Y;
4719 APFloat Delta = Hi - X;
4720 APFloat Lo = Y - Delta;
4721 return {Hi, Lo};
4722}
4723
4724// Implement addition, subtraction, multiplication and division based on:
4725// "Software for Doubled-Precision Floating-Point Computations",
4726// by Seppo Linnainmaa, ACM TOMS vol 7 no 3, September 1981, pages 272-283.
4727APFloat::opStatus DoubleAPFloat::addImpl(const APFloat &a, const APFloat &aa,
4728 const APFloat &c, const APFloat &cc,
4729 roundingMode RM) {
4730 int Status = opOK;
4731 APFloat z = a;
4732 Status |= z.add(c, RM);
4733 if (!z.isFinite()) {
4734 if (!z.isInfinity()) {
4735 Floats[0] = std::move(z);
4736 Floats[1].makeZero(/* Neg = */ false);
4737 return (opStatus)Status;
4738 }
4739 Status = opOK;
4740 auto AComparedToC = a.compareAbsoluteValue(c);
4741 z = cc;
4742 Status |= z.add(aa, RM);
4743 if (AComparedToC == APFloat::cmpGreaterThan) {
4744 // z = cc + aa + c + a;
4745 Status |= z.add(c, RM);
4746 Status |= z.add(a, RM);
4747 } else {
4748 // z = cc + aa + a + c;
4749 Status |= z.add(a, RM);
4750 Status |= z.add(c, RM);
4751 }
4752 if (!z.isFinite()) {
4753 Floats[0] = std::move(z);
4754 Floats[1].makeZero(/* Neg = */ false);
4755 return (opStatus)Status;
4756 }
4757 Floats[0] = z;
4758 APFloat zz = aa;
4759 Status |= zz.add(cc, RM);
4760 if (AComparedToC == APFloat::cmpGreaterThan) {
4761 // Floats[1] = a - z + c + zz;
4762 Floats[1] = a;
4763 Status |= Floats[1].subtract(z, RM);
4764 Status |= Floats[1].add(c, RM);
4765 Status |= Floats[1].add(zz, RM);
4766 } else {
4767 // Floats[1] = c - z + a + zz;
4768 Floats[1] = c;
4769 Status |= Floats[1].subtract(z, RM);
4770 Status |= Floats[1].add(a, RM);
4771 Status |= Floats[1].add(zz, RM);
4772 }
4773 } else {
4774 // q = a - z;
4775 APFloat q = a;
4776 Status |= q.subtract(z, RM);
4777
4778 // zz = q + c + (a - (q + z)) + aa + cc;
4779 // Compute a - (q + z) as -((q + z) - a) to avoid temporary copies.
4780 auto zz = q;
4781 Status |= zz.add(c, RM);
4782 Status |= q.add(z, RM);
4783 Status |= q.subtract(a, RM);
4784 q.changeSign();
4785 Status |= zz.add(q, RM);
4786 Status |= zz.add(aa, RM);
4787 Status |= zz.add(cc, RM);
4788 if (zz.isZero() && !zz.isNegative()) {
4789 Floats[0] = std::move(z);
4790 Floats[1].makeZero(/* Neg = */ false);
4791 return opOK;
4792 }
4793 Floats[0] = z;
4794 Status |= Floats[0].add(zz, RM);
4795 if (!Floats[0].isFinite()) {
4796 Floats[1].makeZero(/* Neg = */ false);
4797 return (opStatus)Status;
4798 }
4799 Floats[1] = std::move(z);
4800 Status |= Floats[1].subtract(Floats[0], RM);
4801 Status |= Floats[1].add(zz, RM);
4802 }
4803 return (opStatus)Status;
4804}
4805
4806APFloat::opStatus DoubleAPFloat::addWithSpecial(const DoubleAPFloat &LHS,
4807 const DoubleAPFloat &RHS,
4808 DoubleAPFloat &Out,
4809 roundingMode RM) {
4810 if (LHS.getCategory() == fcNaN) {
4811 Out = LHS;
4812 return opOK;
4813 }
4814 if (RHS.getCategory() == fcNaN) {
4815 Out = RHS;
4816 return opOK;
4817 }
4818 if (LHS.getCategory() == fcZero) {
4819 Out = RHS;
4820 return opOK;
4821 }
4822 if (RHS.getCategory() == fcZero) {
4823 Out = LHS;
4824 return opOK;
4825 }
4826 if (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcInfinity &&
4827 LHS.isNegative() != RHS.isNegative()) {
4828 Out.makeNaN(false, Out.isNegative(), nullptr);
4829 return opInvalidOp;
4830 }
4831 if (LHS.getCategory() == fcInfinity) {
4832 Out = LHS;
4833 return opOK;
4834 }
4835 if (RHS.getCategory() == fcInfinity) {
4836 Out = RHS;
4837 return opOK;
4838 }
4839 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal);
4840
4841 APFloat A(LHS.Floats[0]), AA(LHS.Floats[1]), C(RHS.Floats[0]),
4842 CC(RHS.Floats[1]);
4843 assert(&A.getSemantics() == &APFloatBase::semIEEEdouble);
4844 assert(&AA.getSemantics() == &APFloatBase::semIEEEdouble);
4845 assert(&C.getSemantics() == &APFloatBase::semIEEEdouble);
4846 assert(&CC.getSemantics() == &APFloatBase::semIEEEdouble);
4847 assert(&Out.Floats[0].getSemantics() == &APFloatBase::semIEEEdouble);
4848 assert(&Out.Floats[1].getSemantics() == &APFloatBase::semIEEEdouble);
4849 return Out.addImpl(A, AA, C, CC, RM);
4850}
4851
4853 roundingMode RM) {
4854 return addWithSpecial(*this, RHS, *this, RM);
4855}
4856
4858 roundingMode RM) {
4859 changeSign();
4860 auto Ret = add(RHS, RM);
4861 changeSign();
4862 return Ret;
4863}
4864
4867 const auto &LHS = *this;
4868 auto &Out = *this;
4869 /* Interesting observation: For special categories, finding the lowest
4870 common ancestor of the following layered graph gives the correct
4871 return category:
4872
4873 NaN
4874 / \
4875 Zero Inf
4876 \ /
4877 Normal
4878
4879 e.g. NaN * NaN = NaN
4880 Zero * Inf = NaN
4881 Normal * Zero = Zero
4882 Normal * Inf = Inf
4883 */
4884 if (LHS.getCategory() == fcNaN) {
4885 Out = LHS;
4886 return opOK;
4887 }
4888 if (RHS.getCategory() == fcNaN) {
4889 Out = RHS;
4890 return opOK;
4891 }
4892 if ((LHS.getCategory() == fcZero && RHS.getCategory() == fcInfinity) ||
4893 (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcZero)) {
4894 Out.makeNaN(false, false, nullptr);
4895 return opOK;
4896 }
4897 if (LHS.getCategory() == fcZero || LHS.getCategory() == fcInfinity) {
4898 Out = LHS;
4899 return opOK;
4900 }
4901 if (RHS.getCategory() == fcZero || RHS.getCategory() == fcInfinity) {
4902 Out = RHS;
4903 return opOK;
4904 }
4905 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal &&
4906 "Special cases not handled exhaustively");
4907
4908 int Status = opOK;
4909 APFloat A = Floats[0], B = Floats[1], C = RHS.Floats[0], D = RHS.Floats[1];
4910 // t = a * c
4911 APFloat T = A;
4912 Status |= T.multiply(C, RM);
4913 if (!T.isFiniteNonZero()) {
4914 Floats[0] = std::move(T);
4915 Floats[1].makeZero(/* Neg = */ false);
4916 return (opStatus)Status;
4917 }
4918
4919 // tau = fmsub(a, c, t), that is -fmadd(-a, c, t).
4920 APFloat Tau = A;
4921 T.changeSign();
4922 Status |= Tau.fusedMultiplyAdd(C, T, RM);
4923 T.changeSign();
4924 {
4925 // v = a * d
4926 APFloat V = A;
4927 Status |= V.multiply(D, RM);
4928 // w = b * c
4929 APFloat W = B;
4930 Status |= W.multiply(C, RM);
4931 Status |= V.add(W, RM);
4932 // tau += v + w
4933 Status |= Tau.add(V, RM);
4934 }
4935 // u = t + tau
4936 APFloat U = T;
4937 Status |= U.add(Tau, RM);
4938
4939 Floats[0] = U;
4940 if (!U.isFinite()) {
4941 Floats[1].makeZero(/* Neg = */ false);
4942 } else {
4943 // Floats[1] = (t - u) + tau
4944 Status |= T.subtract(U, RM);
4945 Status |= T.add(Tau, RM);
4946 Floats[1] = std::move(T);
4947 }
4948 return (opStatus)Status;
4949}
4950
4953 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
4954 "Unexpected Semantics");
4955 APFloat Tmp(APFloatBase::semPPCDoubleDoubleLegacy, bitcastToAPInt());
4956 auto Ret = Tmp.divide(
4957 APFloat(APFloatBase::semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()), RM);
4958 *this = DoubleAPFloat(APFloatBase::semPPCDoubleDouble, Tmp.bitcastToAPInt());
4959 return Ret;
4960}
4961
4963 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
4964 "Unexpected Semantics");
4965 APFloat Tmp(APFloatBase::semPPCDoubleDoubleLegacy, bitcastToAPInt());
4966 auto Ret = Tmp.remainder(
4967 APFloat(APFloatBase::semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
4968 *this = DoubleAPFloat(APFloatBase::semPPCDoubleDouble, Tmp.bitcastToAPInt());
4969 return Ret;
4970}
4971
4973 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
4974 "Unexpected Semantics");
4975 APFloat Tmp(APFloatBase::semPPCDoubleDoubleLegacy, bitcastToAPInt());
4976 auto Ret = Tmp.mod(
4977 APFloat(APFloatBase::semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
4978 *this = DoubleAPFloat(APFloatBase::semPPCDoubleDouble, Tmp.bitcastToAPInt());
4979 return Ret;
4980}
4981
4984 const DoubleAPFloat &Addend,
4986 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
4987 "Unexpected Semantics");
4988 APFloat Tmp(APFloatBase::semPPCDoubleDoubleLegacy, bitcastToAPInt());
4989 auto Ret = Tmp.fusedMultiplyAdd(
4990 APFloat(APFloatBase::semPPCDoubleDoubleLegacy,
4991 Multiplicand.bitcastToAPInt()),
4992 APFloat(APFloatBase::semPPCDoubleDoubleLegacy, Addend.bitcastToAPInt()),
4993 RM);
4994 *this = DoubleAPFloat(APFloatBase::semPPCDoubleDouble, Tmp.bitcastToAPInt());
4995 return Ret;
4996}
4997
4999 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5000 "Unexpected Semantics");
5001 const APFloat &Hi = getFirst();
5002 const APFloat &Lo = getSecond();
5003
5004 APFloat RoundedHi = Hi;
5005 const opStatus HiStatus = RoundedHi.roundToIntegral(RM);
5006
5007 // We can reduce the problem to just the high part if the input:
5008 // 1. Represents a non-finite value.
5009 // 2. Has a component which is zero.
5010 if (!Hi.isFiniteNonZero() || Lo.isZero()) {
5011 Floats[0] = std::move(RoundedHi);
5012 Floats[1].makeZero(/*Neg=*/false);
5013 return HiStatus;
5014 }
5015
5016 // Adjust `Rounded` in the direction of `TieBreaker` if `ToRound` was at a
5017 // halfway point.
5018 auto RoundToNearestHelper = [](APFloat ToRound, APFloat Rounded,
5019 APFloat TieBreaker) {
5020 // RoundingError tells us which direction we rounded:
5021 // - RoundingError > 0: we rounded up.
5022 // - RoundingError < 0: we rounded down.
5023 // Sterbenz' lemma ensures that RoundingError is exact.
5024 const APFloat RoundingError = Rounded - ToRound;
5025 if (TieBreaker.isNonZero() &&
5026 TieBreaker.isNegative() != RoundingError.isNegative() &&
5027 abs(RoundingError).isExactlyValue(0.5))
5028 Rounded.add(
5029 APFloat::getOne(Rounded.getSemantics(), TieBreaker.isNegative()),
5031 return Rounded;
5032 };
5033
5034 // Case 1: Hi is not an integer.
5035 // Special cases are for rounding modes that are sensitive to ties.
5036 if (RoundedHi != Hi) {
5037 // We need to consider the case where Hi was between two integers and the
5038 // rounding mode broke the tie when, in fact, Lo may have had a different
5039 // sign than Hi.
5040 if (RM == rmNearestTiesToAway || RM == rmNearestTiesToEven)
5041 RoundedHi = RoundToNearestHelper(Hi, RoundedHi, Lo);
5042
5043 Floats[0] = std::move(RoundedHi);
5044 Floats[1].makeZero(/*Neg=*/false);
5045 return HiStatus;
5046 }
5047
5048 // Case 2: Hi is an integer.
5049 // Special cases are for rounding modes which are rounding towards or away from zero.
5050 RoundingMode LoRoundingMode;
5051 if (RM == rmTowardZero)
5052 // When our input is positive, we want the Lo component rounded toward
5053 // negative infinity to get the smallest result magnitude. Likewise,
5054 // negative inputs want the Lo component rounded toward positive infinity.
5055 LoRoundingMode = isNegative() ? rmTowardPositive : rmTowardNegative;
5056 else
5057 LoRoundingMode = RM;
5058
5059 APFloat RoundedLo = Lo;
5060 const opStatus LoStatus = RoundedLo.roundToIntegral(LoRoundingMode);
5061 if (LoRoundingMode == rmNearestTiesToAway)
5062 // We need to consider the case where Lo was between two integers and the
5063 // rounding mode broke the tie when, in fact, Hi may have had a different
5064 // sign than Lo.
5065 RoundedLo = RoundToNearestHelper(Lo, RoundedLo, Hi);
5066
5067 // We must ensure that the final result has no overlap between the two APFloat values.
5068 std::tie(RoundedHi, RoundedLo) = fastTwoSum(RoundedHi, RoundedLo);
5069
5070 Floats[0] = std::move(RoundedHi);
5071 Floats[1] = std::move(RoundedLo);
5072 return LoStatus;
5073}
5074
5076 Floats[0].changeSign();
5077 Floats[1].changeSign();
5078}
5079
5082 // Compare absolute values of the high parts.
5083 const cmpResult HiPartCmp = Floats[0].compareAbsoluteValue(RHS.Floats[0]);
5084 if (HiPartCmp != cmpEqual)
5085 return HiPartCmp;
5086
5087 // Zero, regardless of sign, is equal.
5088 if (Floats[1].isZero() && RHS.Floats[1].isZero())
5089 return cmpEqual;
5090
5091 // At this point, |this->Hi| == |RHS.Hi|.
5092 // The magnitude is |Hi+Lo| which is Hi+|Lo| if signs of Hi and Lo are the
5093 // same, and Hi-|Lo| if signs are different.
5094 const bool ThisIsSubtractive =
5095 Floats[0].isNegative() != Floats[1].isNegative();
5096 const bool RHSIsSubtractive =
5097 RHS.Floats[0].isNegative() != RHS.Floats[1].isNegative();
5098
5099 // Case 1: The low part of 'this' is zero.
5100 if (Floats[1].isZero())
5101 // We are comparing |Hi| vs. |Hi| ± |RHS.Lo|.
5102 // If RHS is subtractive, its magnitude is smaller.
5103 // If RHS is additive, its magnitude is larger.
5104 return RHSIsSubtractive ? cmpGreaterThan : cmpLessThan;
5105
5106 // Case 2: The low part of 'RHS' is zero (and we know 'this' is not).
5107 if (RHS.Floats[1].isZero())
5108 // We are comparing |Hi| ± |This.Lo| vs. |Hi|.
5109 // If 'this' is subtractive, its magnitude is smaller.
5110 // If 'this' is additive, its magnitude is larger.
5111 return ThisIsSubtractive ? cmpLessThan : cmpGreaterThan;
5112
5113 // If their natures differ, the additive one is larger.
5114 if (ThisIsSubtractive != RHSIsSubtractive)
5115 return ThisIsSubtractive ? cmpLessThan : cmpGreaterThan;
5116
5117 // Case 3: Both are additive (Hi+|Lo|) or both are subtractive (Hi-|Lo|).
5118 // The comparison now depends on the magnitude of the low parts.
5119 const cmpResult LoPartCmp = Floats[1].compareAbsoluteValue(RHS.Floats[1]);
5120
5121 if (ThisIsSubtractive) {
5122 // Both are subtractive (Hi-|Lo|), so the comparison of |Lo| is inverted.
5123 if (LoPartCmp == cmpLessThan)
5124 return cmpGreaterThan;
5125 if (LoPartCmp == cmpGreaterThan)
5126 return cmpLessThan;
5127 }
5128
5129 // If additive, the comparison of |Lo| is direct.
5130 // If equal, they are equal.
5131 return LoPartCmp;
5132}
5133
5135 return Floats[0].getCategory();
5136}
5137
5138bool DoubleAPFloat::isNegative() const { return Floats[0].isNegative(); }
5139
5141 Floats[0].makeInf(Neg);
5142 Floats[1].makeZero(/* Neg = */ false);
5143}
5144
5146 Floats[0].makeZero(Neg);
5147 Floats[1].makeZero(/* Neg = */ false);
5148}
5149
5151 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5152 "Unexpected Semantics");
5153 Floats[0] =
5154 APFloat(APFloatBase::semIEEEdouble, APInt(64, 0x7fefffffffffffffull));
5155 Floats[1] =
5156 APFloat(APFloatBase::semIEEEdouble, APInt(64, 0x7c8ffffffffffffeull));
5157 if (Neg)
5158 changeSign();
5159}
5160
5162 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5163 "Unexpected Semantics");
5164 Floats[0].makeSmallest(Neg);
5165 Floats[1].makeZero(/* Neg = */ false);
5166}
5167
5169 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5170 "Unexpected Semantics");
5171 Floats[0] =
5172 APFloat(APFloatBase::semIEEEdouble, APInt(64, 0x0360000000000000ull));
5173 if (Neg)
5174 Floats[0].changeSign();
5175 Floats[1].makeZero(/* Neg = */ false);
5176}
5177
5178void DoubleAPFloat::makeNaN(bool SNaN, bool Neg, const APInt *fill) {
5179 Floats[0].makeNaN(SNaN, Neg, fill);
5180 Floats[1].makeZero(/* Neg = */ false);
5181}
5182
5184 auto Result = Floats[0].compare(RHS.Floats[0]);
5185 // |Float[0]| > |Float[1]|
5186 if (Result == APFloat::cmpEqual)
5187 return Floats[1].compare(RHS.Floats[1]);
5188 return Result;
5189}
5190
5192 return Floats[0].bitwiseIsEqual(RHS.Floats[0]) &&
5193 Floats[1].bitwiseIsEqual(RHS.Floats[1]);
5194}
5195
5197 if (Arg.Floats)
5198 return hash_combine(hash_value(Arg.Floats[0]), hash_value(Arg.Floats[1]));
5199 return hash_combine(Arg.Semantics);
5200}
5201
5203 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5204 "Unexpected Semantics");
5205 uint64_t Data[] = {
5206 Floats[0].bitcastToAPInt().getRawData()[0],
5207 Floats[1].bitcastToAPInt().getRawData()[0],
5208 };
5209 return APInt(128, Data);
5210}
5211
5213 roundingMode RM) {
5214 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5215 "Unexpected Semantics");
5216 APFloat Tmp(APFloatBase::semPPCDoubleDoubleLegacy);
5217 auto Ret = Tmp.convertFromString(S, RM);
5218 *this = DoubleAPFloat(APFloatBase::semPPCDoubleDouble, Tmp.bitcastToAPInt());
5219 return Ret;
5220}
5221
5222// The double-double lattice of values corresponds to numbers which obey:
5223// - abs(lo) <= 1/2 * ulp(hi)
5224// - roundTiesToEven(hi + lo) == hi
5225//
5226// nextUp must choose the smallest output > input that follows these rules.
5227// nexDown must choose the largest output < input that follows these rules.
5229 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5230 "Unexpected Semantics");
5231 // nextDown(x) = -nextUp(-x)
5232 if (nextDown) {
5233 changeSign();
5234 APFloat::opStatus Result = next(/*nextDown=*/false);
5235 changeSign();
5236 return Result;
5237 }
5238 switch (getCategory()) {
5239 case fcInfinity:
5240 // nextUp(+inf) = +inf
5241 // nextUp(-inf) = -getLargest()
5242 if (isNegative())
5243 makeLargest(true);
5244 return opOK;
5245
5246 case fcNaN:
5247 // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag.
5248 // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not
5249 // change the payload.
5250 if (getFirst().isSignaling()) {
5251 // For consistency, propagate the sign of the sNaN to the qNaN.
5252 makeNaN(false, isNegative(), nullptr);
5253 return opInvalidOp;
5254 }
5255 return opOK;
5256
5257 case fcZero:
5258 // nextUp(pm 0) = +getSmallest()
5259 makeSmallest(false);
5260 return opOK;
5261
5262 case fcNormal:
5263 break;
5264 }
5265
5266 const APFloat &HiOld = getFirst();
5267 const APFloat &LoOld = getSecond();
5268
5269 APFloat NextLo = LoOld;
5270 NextLo.next(/*nextDown=*/false);
5271
5272 // We want to admit values where:
5273 // 1. abs(Lo) <= ulp(Hi)/2
5274 // 2. Hi == RTNE(Hi + lo)
5275 auto InLattice = [](const APFloat &Hi, const APFloat &Lo) {
5276 return Hi + Lo == Hi;
5277 };
5278
5279 // Check if (HiOld, nextUp(LoOld) is in the lattice.
5280 if (InLattice(HiOld, NextLo)) {
5281 // Yes, the result is (HiOld, nextUp(LoOld)).
5282 Floats[1] = std::move(NextLo);
5283
5284 // TODO: Because we currently rely on semPPCDoubleDoubleLegacy, our maximum
5285 // value is defined to have exactly 106 bits of precision. This limitation
5286 // results in semPPCDoubleDouble being unable to reach its maximum canonical
5287 // value.
5288 DoubleAPFloat Largest{*Semantics, uninitialized};
5289 Largest.makeLargest(/*Neg=*/false);
5290 if (compare(Largest) == cmpGreaterThan)
5291 makeInf(/*Neg=*/false);
5292
5293 return opOK;
5294 }
5295
5296 // Now we need to handle the cases where (HiOld, nextUp(LoOld)) is not the
5297 // correct result. We know the new hi component will be nextUp(HiOld) but our
5298 // lattice rules make it a little ambiguous what the correct NextLo must be.
5299 APFloat NextHi = HiOld;
5300 NextHi.next(/*nextDown=*/false);
5301
5302 // nextUp(getLargest()) == INFINITY
5303 if (NextHi.isInfinity()) {
5304 makeInf(/*Neg=*/false);
5305 return opOK;
5306 }
5307
5308 // IEEE 754-2019 5.3.1:
5309 // "If x is the negative number of least magnitude in x's format, nextUp(x) is
5310 // -0."
5311 if (NextHi.isZero()) {
5312 makeZero(/*Neg=*/true);
5313 return opOK;
5314 }
5315
5316 // abs(NextLo) must be <= ulp(NextHi)/2. We want NextLo to be as close to
5317 // negative infinity as possible.
5318 NextLo = neg(scalbn(harrisonUlp(NextHi), -1, rmTowardZero));
5319 if (!InLattice(NextHi, NextLo))
5320 // RTNE may mean that Lo must be < ulp(NextHi) / 2 so we bump NextLo.
5321 NextLo.next(/*nextDown=*/false);
5322
5323 Floats[0] = std::move(NextHi);
5324 Floats[1] = std::move(NextLo);
5325
5326 return opOK;
5327}
5328
5329APFloat::opStatus DoubleAPFloat::convertToSignExtendedInteger(
5330 MutableArrayRef<integerPart> Input, unsigned int Width, bool IsSigned,
5331 roundingMode RM, bool *IsExact) const {
5332 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5333 "Unexpected Semantics");
5334
5335 // If Hi is not finite, or Lo is zero, the value is entirely represented
5336 // by Hi. Delegate to the simpler single-APFloat conversion.
5337 if (!getFirst().isFiniteNonZero() || getSecond().isZero())
5338 return getFirst().convertToInteger(Input, Width, IsSigned, RM, IsExact);
5339
5340 // First, round the full double-double value to an integral value. This
5341 // simplifies the rest of the function, as we no longer need to consider
5342 // fractional parts.
5343 *IsExact = false;
5344 DoubleAPFloat Integral = *this;
5345 const opStatus RoundStatus = Integral.roundToIntegral(RM);
5346 if (RoundStatus == opInvalidOp)
5347 return opInvalidOp;
5348 const APFloat &IntegralHi = Integral.getFirst();
5349 const APFloat &IntegralLo = Integral.getSecond();
5350
5351 // If rounding results in either component being zero, the sum is trivial.
5352 // Delegate to the simpler single-APFloat conversion.
5353 bool HiIsExact;
5354 if (IntegralHi.isZero() || IntegralLo.isZero()) {
5355 const opStatus HiStatus =
5356 IntegralHi.convertToInteger(Input, Width, IsSigned, RM, &HiIsExact);
5357 // The conversion from an integer-valued float to an APInt may fail if the
5358 // result would be out of range. Regardless, taking this path is only
5359 // possible if rounding occurred during the initial `roundToIntegral`.
5360 return HiStatus == opOK ? opInexact : HiStatus;
5361 }
5362
5363 // A negative number cannot be represented by an unsigned integer.
5364 // Since a double-double is canonical, if Hi is negative, the sum is negative.
5365 if (!IsSigned && IntegralHi.isNegative())
5366 return opInvalidOp;
5367
5368 // Handle the special boundary case where |Hi| is exactly the power of two
5369 // that marks the edge of the integer's range (e.g., 2^63 for int64_t). In
5370 // this situation, Hi itself won't fit, but the sum Hi + Lo might.
5371 // `PositiveOverflowWidth` is the bit number for this boundary (N-1 for
5372 // signed, N for unsigned).
5373 bool LoIsExact;
5374 const int HiExactLog2 = IntegralHi.getExactLog2Abs();
5375 const unsigned PositiveOverflowWidth = IsSigned ? Width - 1 : Width;
5376 if (HiExactLog2 >= 0 &&
5377 static_cast<unsigned>(HiExactLog2) == PositiveOverflowWidth) {
5378 // If Hi and Lo have the same sign, |Hi + Lo| > |Hi|, so the sum is
5379 // guaranteed to overflow. E.g., for uint128_t, (2^128, 1) overflows.
5380 if (IntegralHi.isNegative() == IntegralLo.isNegative())
5381 return opInvalidOp;
5382
5383 // If the signs differ, the sum will fit. We can compute the result using
5384 // properties of two's complement arithmetic without a wide intermediate
5385 // integer. E.g., for uint128_t, (2^128, -1) should be 2^128 - 1.
5386 const opStatus LoStatus = IntegralLo.convertToInteger(
5387 Input, Width, /*IsSigned=*/true, RM, &LoIsExact);
5388 if (LoStatus == opInvalidOp)
5389 return opInvalidOp;
5390
5391 // Adjust the bit pattern of Lo to account for Hi's value:
5392 // - For unsigned (Hi=2^Width): `2^Width + Lo` in `Width`-bit
5393 // arithmetic is equivalent to just `Lo`. The conversion of `Lo` above
5394 // already produced the correct final bit pattern.
5395 // - For signed (Hi=2^(Width-1)): The sum `2^(Width-1) + Lo` (where Lo<0)
5396 // can be computed by taking the two's complement pattern for `Lo` and
5397 // clearing the sign bit.
5398 if (IsSigned && !IntegralHi.isNegative())
5399 APInt::tcClearBit(Input.data(), PositiveOverflowWidth);
5400 *IsExact = RoundStatus == opOK;
5401 return RoundStatus;
5402 }
5403
5404 // Convert Hi into an integer. This may not fit but that is OK: we know that
5405 // Hi + Lo would not fit either in this situation.
5406 const opStatus HiStatus = IntegralHi.convertToInteger(
5407 Input, Width, IsSigned, rmTowardZero, &HiIsExact);
5408 if (HiStatus == opInvalidOp)
5409 return HiStatus;
5410
5411 // Convert Lo into a temporary integer of the same width.
5412 APSInt LoResult{Width, /*isUnsigned=*/!IsSigned};
5413 const opStatus LoStatus =
5414 IntegralLo.convertToInteger(LoResult, rmTowardZero, &LoIsExact);
5415 if (LoStatus == opInvalidOp)
5416 return LoStatus;
5417
5418 // Add Lo to Hi. This addition is guaranteed not to overflow because of the
5419 // double-double canonicalization rule (`|Lo| <= ulp(Hi)/2`). The only case
5420 // where the sum could cross the integer type's boundary is when Hi is a
5421 // power of two, which is handled by the special case block above.
5422 APInt::tcAdd(Input.data(), LoResult.getRawData(), /*carry=*/0, Input.size());
5423
5424 *IsExact = RoundStatus == opOK;
5425 return RoundStatus;
5426}
5427
5430 unsigned int Width, bool IsSigned,
5431 roundingMode RM, bool *IsExact) const {
5432 opStatus FS =
5433 convertToSignExtendedInteger(Input, Width, IsSigned, RM, IsExact);
5434
5435 if (FS == opInvalidOp) {
5436 const unsigned DstPartsCount = partCountForBits(Width);
5437 assert(DstPartsCount <= Input.size() && "Integer too big");
5438
5439 unsigned Bits;
5440 if (getCategory() == fcNaN)
5441 Bits = 0;
5442 else if (isNegative())
5443 Bits = IsSigned;
5444 else
5445 Bits = Width - IsSigned;
5446
5447 tcSetLeastSignificantBits(Input.data(), DstPartsCount, Bits);
5448 if (isNegative() && IsSigned)
5449 APInt::tcShiftLeft(Input.data(), DstPartsCount, Width - 1);
5450 }
5451
5452 return FS;
5453}
5454
5455APFloat::opStatus DoubleAPFloat::handleOverflow(roundingMode RM) {
5456 switch (RM) {
5458 makeLargest(/*Neg=*/isNegative());
5459 break;
5461 if (isNegative())
5462 makeInf(/*Neg=*/true);
5463 else
5464 makeLargest(/*Neg=*/false);
5465 break;
5467 if (isNegative())
5468 makeLargest(/*Neg=*/true);
5469 else
5470 makeInf(/*Neg=*/false);
5471 break;
5474 makeInf(/*Neg=*/isNegative());
5475 break;
5476 default:
5477 llvm_unreachable("Invalid rounding mode found");
5478 }
5479 opStatus S = opInexact;
5480 if (!getFirst().isFinite())
5481 S = static_cast<opStatus>(S | opOverflow);
5482 return S;
5483}
5484
5485APFloat::opStatus DoubleAPFloat::convertFromUnsignedParts(
5486 const integerPart *Src, unsigned int SrcCount, roundingMode RM) {
5487 // Find the most significant bit of the source integer. APInt::tcMSB returns
5488 // UINT_MAX for a zero value.
5489 const unsigned SrcMSB = APInt::tcMSB(Src, SrcCount);
5490 if (SrcMSB == UINT_MAX) {
5491 // The source integer is 0.
5492 makeZero(/*Neg=*/false);
5493 return opOK;
5494 }
5495
5496 // Create a minimally-sized APInt to represent the source value.
5497 const unsigned SrcBitWidth = SrcMSB + 1;
5498 APSInt SrcInt{APInt{/*numBits=*/SrcBitWidth, ArrayRef(Src, SrcCount)},
5499 /*isUnsigned=*/true};
5500
5501 // Stage 1: Initial Approximation.
5502 // Convert the source integer SrcInt to the Hi part of the DoubleAPFloat.
5503 // We use round-to-nearest because it minimizes the initial error, which is
5504 // crucial for the subsequent steps.
5506 Hi.convertFromAPInt(SrcInt, /*IsSigned=*/false, rmNearestTiesToEven);
5507
5508 // If the first approximation already overflows, the number is too large.
5509 // NOTE: The underlying semantics are *more* conservative when choosing to
5510 // overflow because their notion of ULP is much larger. As such, it is always
5511 // safe to overflow at the DoubleAPFloat level if the APFloat overflows.
5512 if (!Hi.isFinite())
5513 return handleOverflow(RM);
5514
5515 // Stage 2: Exact Error Calculation.
5516 // Calculate the exact error of the first approximation: Error = SrcInt - Hi.
5517 // This is done by converting Hi back to an integer and subtracting it from
5518 // the original source.
5519 bool HiAsIntIsExact;
5520 // Create an integer representation of Hi. Its width is determined by the
5521 // exponent of Hi, ensuring it's just large enough. This width can exceed
5522 // SrcBitWidth if the conversion to Hi rounded up to a power of two.
5523 // accurately when converted back to an integer.
5524 APSInt HiAsInt{static_cast<uint32_t>(ilogb(Hi) + 1), /*isUnsigned=*/true};
5525 Hi.convertToInteger(HiAsInt, rmNearestTiesToEven, &HiAsIntIsExact);
5526 const APInt Error = SrcInt.zext(HiAsInt.getBitWidth()) - HiAsInt;
5527
5528 // Stage 3: Error Approximation and Rounding.
5529 // Convert the integer error into the Lo part of the DoubleAPFloat. This step
5530 // captures the remainder of the original number. The rounding mode for this
5531 // conversion (LoRM) may need to be adjusted from the user-requested RM to
5532 // ensure the final sum (Hi + Lo) rounds correctly.
5533 roundingMode LoRM = RM;
5534 // Adjustments are only necessary when the initial approximation Hi was an
5535 // overestimate, making the Error negative.
5536 if (Error.isNegative()) {
5537 if (RM == rmNearestTiesToAway) {
5538 // For rmNearestTiesToAway, a tie should round away from zero. Since
5539 // SrcInt is positive, this means rounding toward +infinity.
5540 // A standard conversion of a negative Error would round ties toward
5541 // -infinity, causing the final sum Hi + Lo to be smaller. To
5542 // counteract this, we detect the tie case and override the rounding
5543 // mode for Lo to rmTowardPositive.
5544 const unsigned ErrorActiveBits = Error.getSignificantBits() - 1;
5545 const unsigned LoPrecision = getSecond().getSemantics().precision;
5546 if (ErrorActiveBits > LoPrecision) {
5547 const unsigned RoundingBoundary = ErrorActiveBits - LoPrecision;
5548 // A tie occurs when the bits to be truncated are of the form 100...0.
5549 // This is detected by checking if the number of trailing zeros is
5550 // exactly one less than the number of bits being truncated.
5551 if (Error.countTrailingZeros() == RoundingBoundary - 1)
5552 LoRM = rmTowardPositive;
5553 }
5554 } else if (RM == rmTowardZero) {
5555 // For rmTowardZero, the final positive result must be truncated (rounded
5556 // down). When Hi is an overestimate, Error is negative. A standard
5557 // rmTowardZero conversion of Error would make it *less* negative,
5558 // effectively rounding the final sum Hi + Lo *up*. To ensure the sum
5559 // rounds down correctly, we force Lo to round toward -infinity.
5560 LoRM = rmTowardNegative;
5561 }
5562 }
5563
5565 opStatus Status = Lo.convertFromAPInt(Error, /*IsSigned=*/true, LoRM);
5566
5567 // Renormalize the pair (Hi, Lo) into a canonical DoubleAPFloat form where the
5568 // components do not overlap. fastTwoSum performs this operation.
5569 std::tie(Hi, Lo) = fastTwoSum(Hi, Lo);
5570 Floats[0] = std::move(Hi);
5571 Floats[1] = std::move(Lo);
5572
5573 // A final check for overflow is needed because fastTwoSum can cause a
5574 // carry-out from Lo that pushes Hi to infinity.
5575 if (!getFirst().isFinite())
5576 return handleOverflow(RM);
5577
5578 // The largest DoubleAPFloat must be canonical. Values which are larger are
5579 // not canonical and are equivalent to overflow.
5580 if (getFirst().isFiniteNonZero() && Floats[0].isLargest()) {
5581 DoubleAPFloat Largest{*Semantics};
5582 Largest.makeLargest(/*Neg=*/false);
5583 if (compare(Largest) == APFloat::cmpGreaterThan)
5584 return handleOverflow(RM);
5585 }
5586
5587 // The final status of the operation is determined by the conversion of the
5588 // error term. If Lo could represent Error exactly, the entire conversion
5589 // is exact. Otherwise, it's inexact.
5590 return Status;
5591}
5592
5594 bool IsSigned,
5595 roundingMode RM) {
5596 const bool NegateInput = IsSigned && Input.isNegative();
5597 APInt API = Input;
5598 if (NegateInput)
5599 API.negate();
5600
5602 convertFromUnsignedParts(API.getRawData(), API.getNumWords(), RM);
5603 if (NegateInput)
5604 changeSign();
5605 return Status;
5606}
5607
5609 unsigned int HexDigits,
5610 bool UpperCase,
5611 roundingMode RM) const {
5612 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5613 "Unexpected Semantics");
5614 return APFloat(APFloatBase::semPPCDoubleDoubleLegacy, bitcastToAPInt())
5615 .convertToHexString(DST, HexDigits, UpperCase, RM);
5616}
5617
5619 return getCategory() == fcNormal &&
5620 (Floats[0].isDenormal() || Floats[1].isDenormal() ||
5621 // (double)(Hi + Lo) == Hi defines a normal number.
5622 Floats[0] != Floats[0] + Floats[1]);
5623}
5624
5626 if (getCategory() != fcNormal)
5627 return false;
5628 DoubleAPFloat Tmp(*this);
5629 Tmp.makeSmallest(this->isNegative());
5630 return Tmp.compare(*this) == cmpEqual;
5631}
5632
5634 if (getCategory() != fcNormal)
5635 return false;
5636
5637 DoubleAPFloat Tmp(*this);
5639 return Tmp.compare(*this) == cmpEqual;
5640}
5641
5643 if (getCategory() != fcNormal)
5644 return false;
5645 DoubleAPFloat Tmp(*this);
5646 Tmp.makeLargest(this->isNegative());
5647 return Tmp.compare(*this) == cmpEqual;
5648}
5649
5651 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5652 "Unexpected Semantics");
5653 return Floats[0].isInteger() && Floats[1].isInteger();
5654}
5655
5657 unsigned FormatPrecision,
5658 unsigned FormatMaxPadding,
5659 bool TruncateZero) const {
5660 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5661 "Unexpected Semantics");
5662 APFloat(APFloatBase::semPPCDoubleDoubleLegacy, bitcastToAPInt())
5663 .toString(Str, FormatPrecision, FormatMaxPadding, TruncateZero);
5664}
5665
5667 // In order for Hi + Lo to be a power of two, the following must be true:
5668 // 1. Hi must be a power of two.
5669 // 2. Lo must be zero.
5670 if (getSecond().isNonZero())
5671 return INT_MIN;
5672 return getFirst().getExactLog2Abs();
5673}
5674
5675int ilogb(const DoubleAPFloat &Arg) {
5676 const APFloat &Hi = Arg.getFirst();
5677 const APFloat &Lo = Arg.getSecond();
5678 int IlogbResult = ilogb(Hi);
5679 // Zero and non-finite values can delegate to ilogb(Hi).
5680 if (Arg.getCategory() != fcNormal)
5681 return IlogbResult;
5682 // If Lo can't change the binade, we can delegate to ilogb(Hi).
5683 if (Lo.isZero() || Hi.isNegative() == Lo.isNegative())
5684 return IlogbResult;
5685 if (Hi.getExactLog2Abs() == INT_MIN)
5686 return IlogbResult;
5687 // Numbers of the form 2^a - 2^b or -2^a + 2^b are almost powers of two but
5688 // get nudged out of the binade by the low component.
5689 return IlogbResult - 1;
5690}
5691
5694 assert(Arg.Semantics == &APFloatBase::PPCDoubleDouble() &&
5695 "Unexpected Semantics");
5697 scalbn(Arg.Floats[0], Exp, RM),
5698 scalbn(Arg.Floats[1], Exp, RM));
5699}
5700
5701DoubleAPFloat frexp(const DoubleAPFloat &Arg, int &Exp,
5703 assert(Arg.Semantics == &APFloatBase::PPCDoubleDouble() &&
5704 "Unexpected Semantics");
5705
5706 // Get the unbiased exponent e of the number, where |Arg| = m * 2^e for m in
5707 // [1.0, 2.0).
5708 Exp = ilogb(Arg);
5709
5710 // For NaNs, quiet any signaling NaN and return the result, as per standard
5711 // practice.
5712 if (Exp == APFloat::IEK_NaN) {
5713 DoubleAPFloat Quiet{Arg};
5714 Quiet.getFirst() = Quiet.getFirst().makeQuiet();
5715 return Quiet;
5716 }
5717
5718 // For infinity, return it unchanged. The exponent remains IEK_Inf.
5719 if (Exp == APFloat::IEK_Inf)
5720 return Arg;
5721
5722 // For zero, the fraction is zero and the standard requires the exponent be 0.
5723 if (Exp == APFloat::IEK_Zero) {
5724 Exp = 0;
5725 return Arg;
5726 }
5727
5728 const APFloat &Hi = Arg.getFirst();
5729 const APFloat &Lo = Arg.getSecond();
5730
5731 // frexp requires the fraction's absolute value to be in [0.5, 1.0).
5732 // ilogb provides an exponent for an absolute value in [1.0, 2.0).
5733 // Increment the exponent to ensure the fraction is in the correct range.
5734 ++Exp;
5735
5736 const bool SignsDisagree = Hi.isNegative() != Lo.isNegative();
5737 APFloat Second = Lo;
5738 if (Arg.getCategory() == APFloat::fcNormal && Lo.isFiniteNonZero()) {
5739 roundingMode LoRoundingMode;
5740 // The interpretation of rmTowardZero depends on the sign of the combined
5741 // Arg rather than the sign of the component.
5742 if (RM == rmTowardZero)
5743 LoRoundingMode = Arg.isNegative() ? rmTowardPositive : rmTowardNegative;
5744 // For rmNearestTiesToAway, we face a similar problem. If signs disagree,
5745 // Lo is a correction *toward* zero relative to Hi. Rounding Lo
5746 // "away from zero" based on its own sign would move the value in the
5747 // wrong direction. As a safe proxy, we use rmNearestTiesToEven, which is
5748 // direction-agnostic. We only need to bother with this if Lo is scaled
5749 // down.
5750 else if (RM == rmNearestTiesToAway && SignsDisagree && Exp > 0)
5751 LoRoundingMode = rmNearestTiesToEven;
5752 else
5753 LoRoundingMode = RM;
5754 Second = scalbn(Lo, -Exp, LoRoundingMode);
5755 // The rmNearestTiesToEven proxy is correct most of the time, but it
5756 // differs from rmNearestTiesToAway when the scaled value of Lo is an
5757 // exact midpoint.
5758 // NOTE: This is morally equivalent to roundTiesTowardZero.
5759 if (RM == rmNearestTiesToAway && LoRoundingMode == rmNearestTiesToEven) {
5760 // Re-scale the result back to check if rounding occurred.
5761 const APFloat RecomposedLo = scalbn(Second, Exp, rmNearestTiesToEven);
5762 if (RecomposedLo != Lo) {
5763 // RoundingError tells us which direction we rounded:
5764 // - RoundingError > 0: we rounded up.
5765 // - RoundingError < 0: we down up.
5766 const APFloat RoundingError = RecomposedLo - Lo;
5767 // Determine if scalbn(Lo, -Exp) landed exactly on a midpoint.
5768 // We do this by checking if the absolute rounding error is exactly
5769 // half a ULP of the result.
5770 const APFloat UlpOfSecond = harrisonUlp(Second);
5771 const APFloat ScaledUlpOfSecond =
5772 scalbn(UlpOfSecond, Exp - 1, rmNearestTiesToEven);
5773 const bool IsMidpoint = abs(RoundingError) == ScaledUlpOfSecond;
5774 const bool RoundedLoAway =
5775 Second.isNegative() == RoundingError.isNegative();
5776 // The sign of Hi and Lo disagree and we rounded Lo away: we must
5777 // decrease the magnitude of Second to increase the magnitude
5778 // First+Second.
5779 if (IsMidpoint && RoundedLoAway)
5780 Second.next(/*nextDown=*/!Second.isNegative());
5781 }
5782 }
5783 // Handle a tricky edge case where Arg is slightly less than a power of two
5784 // (e.g., Arg = 2^k - epsilon). In this situation:
5785 // 1. Hi is 2^k, and Lo is a small negative value -epsilon.
5786 // 2. ilogb(Arg) correctly returns k-1.
5787 // 3. Our initial Exp becomes (k-1) + 1 = k.
5788 // 4. Scaling Hi (2^k) by 2^-k would yield a magnitude of 1.0 and
5789 // scaling Lo by 2^-k would yield zero. This would make the result 1.0
5790 // which is an invalid fraction, as the required interval is [0.5, 1.0).
5791 // We detect this specific case by checking if Hi is a power of two and if
5792 // the scaled Lo underflowed to zero. The fix: Increment Exp to k+1. This
5793 // adjusts the scale factor, causing Hi to be scaled to 0.5, which is a
5794 // valid fraction.
5795 if (Second.isZero() && SignsDisagree && Hi.getExactLog2Abs() != INT_MIN)
5796 ++Exp;
5797 }
5798
5799 APFloat First = scalbn(Hi, -Exp, RM);
5801 std::move(Second));
5802}
5803
5804APInt DoubleAPFloat::getNaNPayload() const { return Floats[0].getNaNPayload(); }
5805} // namespace detail
5806
5807APFloat::Storage::Storage(IEEEFloat F, const fltSemantics &Semantics) {
5808 if (usesLayout<IEEEFloat>(Semantics)) {
5809 new (&IEEE) IEEEFloat(std::move(F));
5810 return;
5811 }
5812 if (usesLayout<DoubleAPFloat>(Semantics)) {
5813 const fltSemantics& S = F.getSemantics();
5814 new (&Double) DoubleAPFloat(Semantics, APFloat(std::move(F), S),
5816 return;
5817 }
5818 llvm_unreachable("Unexpected semantics");
5819}
5820
5825
5826hash_code hash_value(const APFloat &Arg) {
5827 if (APFloat::usesLayout<detail::IEEEFloat>(Arg.getSemantics()))
5828 return hash_value(Arg.U.IEEE);
5829 if (APFloat::usesLayout<detail::DoubleAPFloat>(Arg.getSemantics()))
5830 return hash_value(Arg.U.Double);
5831 llvm_unreachable("Unexpected semantics");
5832}
5833
5835 : APFloat(Semantics) {
5836 auto StatusOrErr = convertFromString(S, rmNearestTiesToEven);
5837 assert(StatusOrErr && "Invalid floating point representation");
5838 consumeError(StatusOrErr.takeError());
5839}
5840
5842 if (isZero())
5843 return isNegative() ? fcNegZero : fcPosZero;
5844 if (isNormal())
5845 return isNegative() ? fcNegNormal : fcPosNormal;
5846 if (isDenormal())
5848 if (isInfinity())
5849 return isNegative() ? fcNegInf : fcPosInf;
5850 assert(isNaN() && "Other class of FP constant");
5851 return isSignaling() ? fcSNan : fcQNan;
5852}
5853
5854bool APFloat::getExactInverse(APFloat *Inv) const {
5855 // Only finite, non-zero numbers can have a useful, representable inverse.
5856 // This check filters out +/- zero, +/- infinity, and NaN.
5857 if (!isFiniteNonZero())
5858 return false;
5859
5860 // Historically, this function rejects subnormal inputs. One reason why this
5861 // might be important is that subnormals may behave differently under FTZ/DAZ
5862 // runtime behavior.
5863 if (isDenormal())
5864 return false;
5865
5866 // A number has an exact, representable inverse if and only if it is a power
5867 // of two.
5868 //
5869 // Mathematical Rationale:
5870 // 1. A binary floating-point number x is a dyadic rational, meaning it can
5871 // be written as x = M / 2^k for integers M (the significand) and k.
5872 // 2. The inverse is 1/x = 2^k / M.
5873 // 3. For 1/x to also be a dyadic rational (and thus exactly representable
5874 // in binary), its denominator M must also be a power of two.
5875 // Let's say M = 2^m.
5876 // 4. Substituting this back into the formula for x, we get
5877 // x = (2^m) / (2^k) = 2^(m-k).
5878 //
5879 // This proves that x must be a power of two.
5880
5881 // getExactLog2Abs() returns the integer exponent if the number is a power of
5882 // two or INT_MIN if it is not.
5883 const int Exp = getExactLog2Abs();
5884 if (Exp == INT_MIN)
5885 return false;
5886
5887 // The inverse of +/- 2^Exp is +/- 2^(-Exp). We can compute this by
5888 // scaling 1.0 by the negated exponent.
5889 APFloat Reciprocal =
5890 scalbn(APFloat::getOne(getSemantics(), /*Negative=*/isNegative()), -Exp,
5891 rmTowardZero);
5892
5893 // scalbn might round if the resulting exponent -Exp is outside the
5894 // representable range, causing overflow (to infinity) or underflow. We
5895 // must verify that the result is still the exact power of two we expect.
5896 if (Reciprocal.getExactLog2Abs() != -Exp)
5897 return false;
5898
5899 // Avoid multiplication with a subnormal, it is not safe on all platforms and
5900 // may be slower than a normal division.
5901 if (Reciprocal.isDenormal())
5902 return false;
5903
5904 assert(Reciprocal.isFiniteNonZero());
5905
5906 if (Inv)
5907 *Inv = std::move(Reciprocal);
5908
5909 return true;
5910}
5911
5913 roundingMode RM, bool *losesInfo) {
5914 if (&getSemantics() == &ToSemantics) {
5915 *losesInfo = false;
5916 return opOK;
5917 }
5918 if (usesLayout<IEEEFloat>(getSemantics()) &&
5919 usesLayout<IEEEFloat>(ToSemantics))
5920 return U.IEEE.convert(ToSemantics, RM, losesInfo);
5921 if (usesLayout<IEEEFloat>(getSemantics()) &&
5922 usesLayout<DoubleAPFloat>(ToSemantics)) {
5923 assert(&ToSemantics == &APFloatBase::semPPCDoubleDouble);
5924 auto Ret =
5925 U.IEEE.convert(APFloatBase::semPPCDoubleDoubleLegacy, RM, losesInfo);
5926 *this = APFloat(ToSemantics, U.IEEE.bitcastToAPInt());
5927 return Ret;
5928 }
5929 if (usesLayout<DoubleAPFloat>(getSemantics()) &&
5930 usesLayout<IEEEFloat>(ToSemantics)) {
5931 auto Ret = getIEEE().convert(ToSemantics, RM, losesInfo);
5932 *this = APFloat(std::move(getIEEE()), ToSemantics);
5933 return Ret;
5934 }
5935 llvm_unreachable("Unexpected semantics");
5936}
5937
5941
5943 SmallVector<char, 16> Buffer;
5944 toString(Buffer);
5945 OS << Buffer;
5946}
5947
5948#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
5950 print(dbgs());
5951 dbgs() << '\n';
5952}
5953#endif
5954
5956 NID.Add(bitcastToAPInt());
5957}
5958
5960 roundingMode rounding_mode,
5961 bool *isExact) const {
5962 unsigned bitWidth = result.getBitWidth();
5963 SmallVector<uint64_t, 4> parts(result.getNumWords());
5964 opStatus status = convertToInteger(parts, bitWidth, result.isSigned(),
5965 rounding_mode, isExact);
5966 // Keeps the original signed-ness.
5967 result = APInt(bitWidth, parts);
5968 return status;
5969}
5970
5972 if (&getSemantics() == &APFloatBase::semIEEEdouble)
5973 return getIEEE().convertToDouble();
5974 assert(isRepresentableBy(getSemantics(), semIEEEdouble) &&
5975 "Float semantics is not representable by IEEEdouble");
5976 APFloat Temp = *this;
5977 bool LosesInfo;
5978 [[maybe_unused]] opStatus St =
5979 Temp.convert(APFloatBase::semIEEEdouble, rmNearestTiesToEven, &LosesInfo);
5980 assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
5981 return Temp.getIEEE().convertToDouble();
5982}
5983
5984#ifdef HAS_IEE754_FLOAT128
5985float128 APFloat::convertToQuad() const {
5986 if (&getSemantics() == &APFloatBase::semIEEEquad)
5987 return getIEEE().convertToQuad();
5988 assert(isRepresentableBy(getSemantics(), semIEEEquad) &&
5989 "Float semantics is not representable by IEEEquad");
5990 APFloat Temp = *this;
5991 bool LosesInfo;
5992 [[maybe_unused]] opStatus St =
5993 Temp.convert(APFloatBase::semIEEEquad, rmNearestTiesToEven, &LosesInfo);
5994 assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
5995 return Temp.getIEEE().convertToQuad();
5996}
5997#endif
5998
6000 if (&getSemantics() == &APFloatBase::semIEEEsingle)
6001 return getIEEE().convertToFloat();
6002 assert(isRepresentableBy(getSemantics(), semIEEEsingle) &&
6003 "Float semantics is not representable by IEEEsingle");
6004 APFloat Temp = *this;
6005 bool LosesInfo;
6006 [[maybe_unused]] opStatus St =
6007 Temp.convert(APFloatBase::semIEEEsingle, rmNearestTiesToEven, &LosesInfo);
6008 assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
6009 return Temp.getIEEE().convertToFloat();
6010}
6011
6013 static constexpr StringLiteral ValidFormats[] = {
6014 "Float8E5M2", "Float8E5M2FNUZ", "Float8E4M3", "Float8E4M3FN",
6015 "Float8E4M3FNUZ", "Float8E4M3B11FNUZ", "Float8E3M4", "Float8E8M0FNU",
6016 "Float6E3M2FN", "Float6E2M3FN", "Float4E2M1FN"};
6017 return llvm::is_contained(ValidFormats, Format);
6018}
6019
6021 // TODO: extend to remaining arbitrary FP types: Float8E4M3, Float8E3M4,
6022 // Float8E5M2FNUZ, Float8E4M3FNUZ, Float8E4M3B11FNUZ, Float8E8M0FNU.
6024 .Case("Float8E5M2", &semFloat8E5M2)
6025 .Case("Float8E4M3FN", &semFloat8E4M3FN)
6026 .Case("Float4E2M1FN", &semFloat4E2M1FN)
6027 .Case("Float6E3M2FN", &semFloat6E3M2FN)
6028 .Case("Float6E2M3FN", &semFloat6E2M3FN)
6029 .Default(nullptr);
6030}
6031
6032APFloat::Storage::~Storage() {
6033 if (usesLayout<IEEEFloat>(*semantics)) {
6034 IEEE.~IEEEFloat();
6035 return;
6036 }
6037 if (usesLayout<DoubleAPFloat>(*semantics)) {
6038 Double.~DoubleAPFloat();
6039 return;
6040 }
6041 llvm_unreachable("Unexpected semantics");
6042}
6043
6044APFloat::Storage::Storage(const APFloat::Storage &RHS) {
6045 if (usesLayout<IEEEFloat>(*RHS.semantics)) {
6046 new (this) IEEEFloat(RHS.IEEE);
6047 return;
6048 }
6049 if (usesLayout<DoubleAPFloat>(*RHS.semantics)) {
6050 new (this) DoubleAPFloat(RHS.Double);
6051 return;
6052 }
6053 llvm_unreachable("Unexpected semantics");
6054}
6055
6056APFloat::Storage::Storage(APFloat::Storage &&RHS) {
6057 if (usesLayout<IEEEFloat>(*RHS.semantics)) {
6058 new (this) IEEEFloat(std::move(RHS.IEEE));
6059 return;
6060 }
6061 if (usesLayout<DoubleAPFloat>(*RHS.semantics)) {
6062 new (this) DoubleAPFloat(std::move(RHS.Double));
6063 return;
6064 }
6065 llvm_unreachable("Unexpected semantics");
6066}
6067
6068APFloat::Storage &APFloat::Storage::operator=(const APFloat::Storage &RHS) {
6069 if (usesLayout<IEEEFloat>(*semantics) &&
6070 usesLayout<IEEEFloat>(*RHS.semantics)) {
6071 IEEE = RHS.IEEE;
6072 } else if (usesLayout<DoubleAPFloat>(*semantics) &&
6073 usesLayout<DoubleAPFloat>(*RHS.semantics)) {
6074 Double = RHS.Double;
6075 } else if (this != &RHS) {
6076 this->~Storage();
6077 new (this) Storage(RHS);
6078 }
6079 return *this;
6080}
6081
6082APFloat::Storage &APFloat::Storage::operator=(APFloat::Storage &&RHS) {
6083 if (usesLayout<IEEEFloat>(*semantics) &&
6084 usesLayout<IEEEFloat>(*RHS.semantics)) {
6085 IEEE = std::move(RHS.IEEE);
6086 } else if (usesLayout<DoubleAPFloat>(*semantics) &&
6087 usesLayout<DoubleAPFloat>(*RHS.semantics)) {
6088 Double = std::move(RHS.Double);
6089 } else if (this != &RHS) {
6090 this->~Storage();
6091 new (this) Storage(std::move(RHS));
6092 }
6093 return *this;
6094}
6095
6096// TODO: Support other rounding modes when LLVM libc math implement static
6097// roundings.
6098APFloat exp(const APFloat &X, RoundingMode rounding_mode) {
6099 if (rounding_mode == APFloatBase::rmNearestTiesToEven) {
6100 if (APFloat::SemanticsToEnum(X.getSemantics()) ==
6102 float result = LIBC_NAMESPACE::shared::expf(X.convertToFloat());
6103 return APFloat(result);
6104 }
6105 if (APFloat::SemanticsToEnum(X.getSemantics()) ==
6107 double result = LIBC_NAMESPACE::shared::exp(X.convertToDouble());
6108 return APFloat(result);
6109 }
6110 }
6111 llvm_unreachable("Unexpected semantics");
6112}
6113
6114} // namespace llvm
6115
6116#undef APFLOAT_DISPATCH_ON_SEMANTICS
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
#define PackCategoriesIntoKey(_lhs, _rhs)
A macro used to combine two fcCategory enums into one key which can be used in a switch statement to ...
Definition APFloat.cpp:62
This file declares a class to represent arbitrary precision floating point values and provide a varie...
#define APFLOAT_DISPATCH_ON_SEMANTICS(METHOD_CALL)
Definition APFloat.h:26
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
Function Alias Analysis false
#define X(NUM, ENUM, NAME)
Definition ELF.h:853
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition Compiler.h:661
static bool isNeg(Value *V)
Returns true if the operation is a negation of V, and it works for both integers and floats.
static bool isSigned(unsigned Opcode)
Utilities for dealing with flags related to floating point properties and mode controls.
This file defines a hash set that can be used to remove duplication of nodes in a graph.
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition Lint.cpp:539
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define T
#define P(N)
if(PassOpts->AAPipeline)
This file contains some templates that are useful if you are working with the STL at all.
This file contains some functions that are useful when dealing with strings.
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, const llvm::StringTable &StandardNames, VectorLibrary VecLib)
Initialize the set of available library functions based on the specified target triple.
Value * RHS
Value * LHS
The Input class is used to parse a yaml document into in-memory structs and vectors.
static const fltSemantics & IEEEsingle()
Definition APFloat.h:296
static const fltSemantics & Float8E4M3FN()
Definition APFloat.h:306
static LLVM_ABI const llvm::fltSemantics & EnumToSemantics(Semantics S)
Definition APFloat.cpp:111
static LLVM_ABI bool semanticsHasInf(const fltSemantics &)
Definition APFloat.cpp:260
cmpResult
IEEE-754R 5.11: Floating Point Comparison Relations.
Definition APFloat.h:334
static constexpr roundingMode rmTowardZero
Definition APFloat.h:348
static LLVM_ABI ExponentType semanticsMinExponent(const fltSemantics &)
Definition APFloat.cpp:235
llvm::RoundingMode roundingMode
IEEE-754R 4.3: Rounding-direction attributes.
Definition APFloat.h:342
static const fltSemantics & BFloat()
Definition APFloat.h:295
static const fltSemantics & IEEEquad()
Definition APFloat.h:298
static LLVM_ABI unsigned int semanticsSizeInBits(const fltSemantics &)
Definition APFloat.cpp:238
static const fltSemantics & Float8E8M0FNU()
Definition APFloat.h:313
static LLVM_ABI bool semanticsHasSignedRepr(const fltSemantics &)
Definition APFloat.cpp:256
static const fltSemantics & IEEEdouble()
Definition APFloat.h:297
static LLVM_ABI unsigned getSizeInBits(const fltSemantics &Sem)
Returns the size of the floating point number (in bits) in the given semantics.
Definition APFloat.cpp:291
static const fltSemantics & x87DoubleExtended()
Definition APFloat.h:317
static constexpr roundingMode rmTowardNegative
Definition APFloat.h:347
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
static LLVM_ABI bool isValidArbitraryFPFormat(StringRef Format)
Returns true if the given string is a valid arbitrary floating-point format interpretation for llvm....
Definition APFloat.cpp:6012
static LLVM_ABI bool hasSignBitInMSB(const fltSemantics &)
Definition APFloat.cpp:273
static LLVM_ABI ExponentType semanticsMaxExponent(const fltSemantics &)
Definition APFloat.cpp:231
friend class APFloat
Definition APFloat.h:291
static LLVM_ABI unsigned int semanticsPrecision(const fltSemantics &)
Definition APFloat.cpp:227
static LLVM_ABI bool semanticsHasNaN(const fltSemantics &)
Definition APFloat.cpp:264
static LLVM_ABI Semantics SemanticsToEnum(const llvm::fltSemantics &Sem)
Definition APFloat.cpp:158
int32_t ExponentType
A signed type to represent a floating point numbers unbiased exponent.
Definition APFloat.h:155
static constexpr unsigned integerPartWidth
Definition APFloat.h:152
static const fltSemantics & PPCDoubleDoubleLegacy()
Definition APFloat.h:300
APInt::WordType integerPart
Definition APFloat.h:151
static LLVM_ABI bool semanticsHasZero(const fltSemantics &)
Definition APFloat.cpp:252
static LLVM_ABI bool isRepresentableAsNormalIn(const fltSemantics &Src, const fltSemantics &Dst)
Definition APFloat.cpp:277
static const fltSemantics & Float8E5M2FNUZ()
Definition APFloat.h:304
static const fltSemantics & Float8E4M3FNUZ()
Definition APFloat.h:307
static constexpr roundingMode rmTowardPositive
Definition APFloat.h:346
static const fltSemantics & IEEEhalf()
Definition APFloat.h:294
static const fltSemantics & Float4E2M1FN()
Definition APFloat.h:316
static const fltSemantics & Float6E2M3FN()
Definition APFloat.h:315
static const fltSemantics & Float8E4M3()
Definition APFloat.h:305
static const fltSemantics & Float8E4M3B11FNUZ()
Definition APFloat.h:308
static LLVM_ABI bool isRepresentableBy(const fltSemantics &A, const fltSemantics &B)
Definition APFloat.cpp:203
static const fltSemantics & Float8E3M4()
Definition APFloat.h:311
static LLVM_ABI bool isIEEELikeFP(const fltSemantics &)
Definition APFloat.cpp:268
static const fltSemantics & Float8E5M2()
Definition APFloat.h:303
fltCategory
Category of internally-represented number.
Definition APFloat.h:370
static constexpr roundingMode rmNearestTiesToAway
Definition APFloat.h:349
static const fltSemantics & PPCDoubleDouble()
Definition APFloat.h:299
static const fltSemantics & Float6E3M2FN()
Definition APFloat.h:314
opStatus
IEEE-754R 7: Default exception handling.
Definition APFloat.h:360
static LLVM_ABI const fltSemantics * getArbitraryFPSemantics(StringRef Format)
Returns the fltSemantics for a given arbitrary FP format string, or nullptr if invalid.
Definition APFloat.cpp:6020
static const fltSemantics & FloatTF32()
Definition APFloat.h:312
static LLVM_ABI unsigned int semanticsIntSizeInBits(const fltSemantics &, bool)
Definition APFloat.cpp:241
static APFloat getQNaN(const fltSemantics &Sem, bool Negative=false, const APInt *payload=nullptr)
Factory for QNaN values.
Definition APFloat.h:1179
LLVM_ABI void Profile(FoldingSetNodeID &NID) const
Used to insert APFloat objects, or objects that contain APFloat objects, into FoldingSets.
Definition APFloat.cpp:5955
opStatus divide(const APFloat &RHS, roundingMode RM)
Definition APFloat.h:1267
bool isFiniteNonZero() const
Definition APFloat.h:1548
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition APFloat.cpp:5912
LLVM_READONLY int getExactLog2Abs() const
Definition APFloat.h:1594
bool isNegative() const
Definition APFloat.h:1538
LLVM_ABI bool getExactInverse(APFloat *Inv) const
If this value is normal and has an exact, normal, multiplicative inverse, store it in inv and return ...
Definition APFloat.cpp:5854
cmpResult compareAbsoluteValue(const APFloat &RHS) const
Definition APFloat.h:1493
friend DoubleAPFloat
Definition APFloat.h:1610
LLVM_ABI double convertToDouble() const
Converts this APFloat to host double value.
Definition APFloat.cpp:5971
void toString(SmallVectorImpl< char > &Str, unsigned FormatPrecision=0, unsigned FormatMaxPadding=3, bool TruncateZero=true) const
Definition APFloat.h:1575
bool isNormal() const
Definition APFloat.h:1542
bool isDenormal() const
Definition APFloat.h:1539
opStatus add(const APFloat &RHS, roundingMode RM)
Definition APFloat.h:1240
static LLVM_ABI APFloat getAllOnesValue(const fltSemantics &Semantics)
Returns a float which is bitcasted from an all one value int.
Definition APFloat.cpp:5938
LLVM_ABI friend hash_code hash_value(const APFloat &Arg)
See friend declarations above.
Definition APFloat.cpp:5826
const fltSemantics & getSemantics() const
Definition APFloat.h:1546
bool isFinite() const
Definition APFloat.h:1543
bool isNaN() const
Definition APFloat.h:1536
static APFloat getOne(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative One.
Definition APFloat.h:1147
unsigned int convertToHexString(char *DST, unsigned int HexDigits, bool UpperCase, roundingMode RM) const
Definition APFloat.h:1528
LLVM_ABI float convertToFloat() const
Converts this APFloat to host float value.
Definition APFloat.cpp:5999
bool isSignaling() const
Definition APFloat.h:1540
opStatus fusedMultiplyAdd(const APFloat &Multiplicand, const APFloat &Addend, roundingMode RM)
Definition APFloat.h:1294
opStatus remainder(const APFloat &RHS)
Definition APFloat.h:1276
bool isZero() const
Definition APFloat.h:1534
APInt bitcastToAPInt() const
Definition APFloat.h:1430
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition APFloat.h:1391
opStatus next(bool nextDown)
Definition APFloat.h:1313
static APFloat getInf(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Infinity.
Definition APFloat.h:1157
friend APFloat scalbn(APFloat X, int Exp, roundingMode RM)
static APFloat getSmallest(const fltSemantics &Sem, bool Negative=false)
Returns the smallest (by magnitude) finite number in the given semantics.
Definition APFloat.h:1207
LLVM_ABI FPClassTest classify() const
Return the FPClassTest which will return true for the value.
Definition APFloat.cpp:5841
opStatus mod(const APFloat &RHS)
Definition APFloat.h:1285
Expected< opStatus > convertFromString(StringRef, roundingMode)
Fill this APFloat with the result of a string conversion.
Definition APFloat.cpp:5821
friend IEEEFloat
Definition APFloat.h:1609
LLVM_DUMP_METHOD void dump() const
Definition APFloat.cpp:5949
LLVM_ABI void print(raw_ostream &) const
Definition APFloat.cpp:5942
opStatus roundToIntegral(roundingMode RM)
Definition APFloat.h:1307
static bool hasSignificand(const fltSemantics &Sem)
Returns true if the given semantics has actual significand.
Definition APFloat.h:1232
static APFloat getZero(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Zero.
Definition APFloat.h:1138
bool isInfinity() const
Definition APFloat.h:1535
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt udiv(const APInt &RHS) const
Unsigned division operation.
Definition APInt.cpp:1616
static LLVM_ABI void tcSetBit(WordType *, unsigned bit)
Set the given bit of a bignum. Zero-based.
Definition APInt.cpp:2420
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
static LLVM_ABI void tcSet(WordType *, WordType, unsigned)
Sets the least significant part of a bignum to the input value, and zeroes out higher parts.
Definition APInt.cpp:2392
static LLVM_ABI void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
Dual division/remainder interface.
Definition APInt.cpp:1810
static LLVM_ABI int tcExtractBit(const WordType *, unsigned bit)
Extract the given bit of a bignum; returns 0 or 1. Zero-based.
Definition APInt.cpp:2415
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1055
static LLVM_ABI WordType tcAdd(WordType *, const WordType *, WordType carry, unsigned)
DST += RHS + CARRY where CARRY is zero or one. Returns the carry flag.
Definition APInt.cpp:2494
static LLVM_ABI void tcExtract(WordType *, unsigned dstCount, const WordType *, unsigned srcBits, unsigned srcLSB)
Copy the bit vector of width srcBITS from SRC, starting at bit srcLSB, to DST, of dstCOUNT parts,...
Definition APInt.cpp:2464
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition APInt.h:1535
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:968
static LLVM_ABI int tcCompare(const WordType *, const WordType *, unsigned)
Comparison (unsigned) of two bignums.
Definition APInt.cpp:2804
static APInt floatToBits(float V)
Converts a float to APInt bits.
Definition APInt.h:1775
uint64_t WordType
Definition APInt.h:80
static LLVM_ABI void tcAssign(WordType *, const WordType *, unsigned)
Assign one bignum to another.
Definition APInt.cpp:2400
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1511
static LLVM_ABI void tcShiftRight(WordType *, unsigned Words, unsigned Count)
Shift a bignum right Count bits.
Definition APInt.cpp:2778
static LLVM_ABI void tcFullMultiply(WordType *, const WordType *, const WordType *, unsigned, unsigned)
DST = LHS * RHS, where DST has width the sum of the widths of the operands.
Definition APInt.cpp:2684
unsigned getNumWords() const
Get the number of words.
Definition APInt.h:1518
bool isNegative() const
Determine sign of this APInt.
Definition APInt.h:330
static LLVM_ABI void tcClearBit(WordType *, unsigned bit)
Clear the given bit of a bignum. Zero-based.
Definition APInt.cpp:2425
void negate()
Negate this APInt in place.
Definition APInt.h:1491
static WordType tcDecrement(WordType *dst, unsigned parts)
Decrement a bignum in-place. Return the borrow flag.
Definition APInt.h:1941
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1662
static LLVM_ABI unsigned tcLSB(const WordType *, unsigned n)
Returns the bit number of the least or most significant set bit of a number.
Definition APInt.cpp:2431
static LLVM_ABI void tcShiftLeft(WordType *, unsigned Words, unsigned Count)
Shift a bignum left Count bits.
Definition APInt.cpp:2751
static LLVM_ABI bool tcIsZero(const WordType *, unsigned)
Returns true if a bignum is zero, false otherwise.
Definition APInt.cpp:2406
static LLVM_ABI unsigned tcMSB(const WordType *parts, unsigned n)
Returns the bit number of the most significant set bit of a number.
Definition APInt.cpp:2444
float bitsToFloat() const
Converts APInt bits to a float.
Definition APInt.h:1759
static LLVM_ABI int tcMultiplyPart(WordType *dst, const WordType *src, WordType multiplier, WordType carry, unsigned srcParts, unsigned dstParts, bool add)
DST += SRC * MULTIPLIER + PART if add is true DST = SRC * MULTIPLIER + PART if add is false.
Definition APInt.cpp:2582
static constexpr unsigned APINT_BITS_PER_WORD
Bits in a word.
Definition APInt.h:86
static LLVM_ABI WordType tcSubtract(WordType *, const WordType *, WordType carry, unsigned)
DST -= RHS + CARRY where CARRY is zero or one. Returns the carry flag.
Definition APInt.cpp:2529
static LLVM_ABI void tcNegate(WordType *, unsigned)
Negate a bignum in-place.
Definition APInt.cpp:2568
static APInt doubleToBits(double V)
Converts a double to APInt bits.
Definition APInt.h:1767
static WordType tcIncrement(WordType *dst, unsigned parts)
Increment a bignum in-place. Return the carry flag.
Definition APInt.h:1936
double bitsToDouble() const
Converts APInt bits to a double.
Definition APInt.h:1745
const uint64_t * getRawData() const
This function returns a pointer to the internal storage of the APInt.
Definition APInt.h:576
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition APInt.h:865
An arbitrary precision integer that knows its signedness.
Definition APSInt.h:24
bool isSigned() const
Definition APSInt.h:78
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
Get the array size.
Definition ArrayRef.h:141
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
static ErrorSuccess success()
Create a success value.
Definition Error.h:336
Tagged union holding either a T or a Error.
Definition Error.h:485
This class is used to gather all the unique data bits of a node.
Definition FoldingSet.h:208
void Add(const T &x)
Definition FoldingSet.h:248
Represent a mutable reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:294
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
iterator erase(const_iterator CI)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition StringRef.h:882
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition StringRef.h:490
const char * iterator
Definition StringRef.h:60
constexpr bool empty() const
Check if the string is empty.
Definition StringRef.h:141
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition StringRef.h:629
iterator begin() const
Definition StringRef.h:114
char back() const
Get the last character in the string.
Definition StringRef.h:153
StringRef slice(size_t Start, size_t End) const
Return a reference to the substring from [Start, End).
Definition StringRef.h:714
constexpr size_t size() const
Get the string size.
Definition StringRef.h:144
char front() const
Get the first character in the string.
Definition StringRef.h:147
iterator end() const
Definition StringRef.h:116
bool consume_front(char Prefix)
Returns true if this StringRef has the given prefix and removes that prefix.
Definition StringRef.h:655
bool consume_front_insensitive(StringRef Prefix)
Returns true if this StringRef has the given prefix, ignoring case, and removes that prefix.
Definition StringRef.h:675
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
LLVM_ABI void makeSmallestNormalized(bool Neg)
Definition APFloat.cpp:5168
LLVM_ABI DoubleAPFloat & operator=(const DoubleAPFloat &RHS)
Definition APFloat.cpp:4698
LLVM_ABI void changeSign()
Definition APFloat.cpp:5075
LLVM_ABI bool isLargest() const
Definition APFloat.cpp:5642
LLVM_ABI opStatus remainder(const DoubleAPFloat &RHS)
Definition APFloat.cpp:4962
LLVM_ABI opStatus multiply(const DoubleAPFloat &RHS, roundingMode RM)
Definition APFloat.cpp:4865
LLVM_ABI fltCategory getCategory() const
Definition APFloat.cpp:5134
LLVM_ABI bool bitwiseIsEqual(const DoubleAPFloat &RHS) const
Definition APFloat.cpp:5191
LLVM_ABI LLVM_READONLY int getExactLog2Abs() const
Definition APFloat.cpp:5666
LLVM_ABI opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition APFloat.cpp:5593
LLVM_ABI APInt bitcastToAPInt() const
Definition APFloat.cpp:5202
LLVM_ABI Expected< opStatus > convertFromString(StringRef, roundingMode)
Definition APFloat.cpp:5212
LLVM_ABI bool isSmallest() const
Definition APFloat.cpp:5625
LLVM_ABI opStatus subtract(const DoubleAPFloat &RHS, roundingMode RM)
Definition APFloat.cpp:4857
LLVM_ABI friend hash_code hash_value(const DoubleAPFloat &Arg)
Definition APFloat.cpp:5196
LLVM_ABI cmpResult compareAbsoluteValue(const DoubleAPFloat &RHS) const
Definition APFloat.cpp:5081
LLVM_ABI bool isDenormal() const
Definition APFloat.cpp:5618
LLVM_ABI opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition APFloat.cpp:5429
LLVM_ABI void makeSmallest(bool Neg)
Definition APFloat.cpp:5161
LLVM_ABI friend int ilogb(const DoubleAPFloat &X)
Definition APFloat.cpp:5675
LLVM_ABI opStatus next(bool nextDown)
Definition APFloat.cpp:5228
LLVM_ABI void makeInf(bool Neg)
Definition APFloat.cpp:5140
LLVM_ABI bool isInteger() const
Definition APFloat.cpp:5650
LLVM_ABI void makeZero(bool Neg)
Definition APFloat.cpp:5145
LLVM_ABI opStatus divide(const DoubleAPFloat &RHS, roundingMode RM)
Definition APFloat.cpp:4951
LLVM_ABI bool isSmallestNormalized() const
Definition APFloat.cpp:5633
LLVM_ABI opStatus mod(const DoubleAPFloat &RHS)
Definition APFloat.cpp:4972
LLVM_ABI DoubleAPFloat(const fltSemantics &S)
Definition APFloat.cpp:4645
LLVM_ABI void toString(SmallVectorImpl< char > &Str, unsigned FormatPrecision, unsigned FormatMaxPadding, bool TruncateZero=true) const
Definition APFloat.cpp:5656
LLVM_ABI void makeLargest(bool Neg)
Definition APFloat.cpp:5150
LLVM_ABI cmpResult compare(const DoubleAPFloat &RHS) const
Definition APFloat.cpp:5183
LLVM_ABI friend DoubleAPFloat scalbn(const DoubleAPFloat &X, int Exp, roundingMode)
LLVM_ABI opStatus roundToIntegral(roundingMode RM)
Definition APFloat.cpp:4998
LLVM_ABI opStatus fusedMultiplyAdd(const DoubleAPFloat &Multiplicand, const DoubleAPFloat &Addend, roundingMode RM)
Definition APFloat.cpp:4983
LLVM_ABI unsigned int convertToHexString(char *DST, unsigned int HexDigits, bool UpperCase, roundingMode RM) const
Definition APFloat.cpp:5608
LLVM_ABI bool isNegative() const
Definition APFloat.cpp:5138
LLVM_ABI opStatus add(const DoubleAPFloat &RHS, roundingMode RM)
Definition APFloat.cpp:4852
LLVM_ABI void makeNaN(bool SNaN, bool Neg, const APInt *fill)
Definition APFloat.cpp:5178
LLVM_ABI unsigned int convertToHexString(char *dst, unsigned int hexDigits, bool upperCase, roundingMode) const
Write out a hexadecimal representation of the floating point value to DST, which must be of sufficien...
Definition APFloat.cpp:3188
LLVM_ABI cmpResult compareAbsoluteValue(const IEEEFloat &) const
Definition APFloat.cpp:1437
LLVM_ABI opStatus mod(const IEEEFloat &)
C fmod, or llvm frem.
Definition APFloat.cpp:2193
fltCategory getCategory() const
Definition APFloat.h:582
LLVM_ABI opStatus convertFromAPInt(const APInt &, bool, roundingMode)
Definition APFloat.cpp:2748
APInt getNaNPayload() const
Definition APFloat.cpp:4533
bool isFiniteNonZero() const
Definition APFloat.h:585
bool needsCleanup() const
Returns whether this instance allocated memory.
Definition APFloat.h:472
LLVM_ABI void makeLargest(bool Neg=false)
Make this number the largest magnitude normal number in the given semantics.
Definition APFloat.cpp:3960
LLVM_ABI LLVM_READONLY int getExactLog2Abs() const
Definition APFloat.cpp:4355
LLVM_ABI APInt bitcastToAPInt() const
Definition APFloat.cpp:3586
LLVM_ABI friend IEEEFloat scalbn(IEEEFloat X, int Exp, roundingMode)
Definition APFloat.cpp:4605
LLVM_ABI cmpResult compare(const IEEEFloat &) const
IEEE comparison with another floating point number (NaNs compare unordered, 0==-0).
Definition APFloat.cpp:2361
bool isNegative() const
IEEE-754R isSignMinus: Returns true if and only if the current value is negative.
Definition APFloat.h:547
LLVM_ABI opStatus divide(const IEEEFloat &, roundingMode)
Definition APFloat.cpp:2067
bool isNaN() const
Returns true if and only if the float is a quiet or signaling NaN.
Definition APFloat.h:572
LLVM_ABI opStatus remainder(const IEEEFloat &)
IEEE remainder.
Definition APFloat.cpp:2085
LLVM_ABI double convertToDouble() const
Definition APFloat.cpp:3656
LLVM_ABI float convertToFloat() const
Definition APFloat.cpp:3649
LLVM_ABI opStatus subtract(const IEEEFloat &, roundingMode)
Definition APFloat.cpp:2043
LLVM_ABI void toString(SmallVectorImpl< char > &Str, unsigned FormatPrecision=0, unsigned FormatMaxPadding=3, bool TruncateZero=true) const
Converts this value into a decimal string.
Definition APFloat.cpp:4311
LLVM_ABI void makeSmallest(bool Neg=false)
Make this number the smallest magnitude denormal number in the given semantics.
Definition APFloat.cpp:3992
LLVM_ABI void makeInf(bool Neg=false)
Definition APFloat.cpp:4552
LLVM_ABI bool isSmallestNormalized() const
Returns true if this is the smallest (by magnitude) normalized finite number in the given semantics.
Definition APFloat.cpp:958
LLVM_ABI void makeQuiet()
Definition APFloat.cpp:4581
LLVM_ABI bool isLargest() const
Returns true if and only if the number has the largest possible finite magnitude in the current seman...
Definition APFloat.cpp:1060
LLVM_ABI opStatus add(const IEEEFloat &, roundingMode)
Definition APFloat.cpp:2037
bool isFinite() const
Returns true if and only if the current value is zero, subnormal, or normal.
Definition APFloat.h:559
LLVM_ABI Expected< opStatus > convertFromString(StringRef, roundingMode)
Definition APFloat.cpp:3131
LLVM_ABI void makeNaN(bool SNaN=false, bool Neg=false, const APInt *fill=nullptr)
Definition APFloat.cpp:847
LLVM_ABI opStatus multiply(const IEEEFloat &, roundingMode)
Definition APFloat.cpp:2049
LLVM_ABI opStatus roundToIntegral(roundingMode)
Definition APFloat.cpp:2276
LLVM_ABI IEEEFloat & operator=(const IEEEFloat &)
Definition APFloat.cpp:919
LLVM_ABI bool bitwiseIsEqual(const IEEEFloat &) const
Bitwise comparison for equality (QNaNs compare equal, 0!=-0).
Definition APFloat.cpp:1085
LLVM_ABI void makeSmallestNormalized(bool Negative=false)
Returns the smallest (by magnitude) normalized finite number in the given semantics.
Definition APFloat.cpp:4006
LLVM_ABI bool isInteger() const
Returns true if and only if the number is an exact integer.
Definition APFloat.cpp:1077
LLVM_ABI IEEEFloat(const fltSemantics &)
Definition APFloat.cpp:1112
LLVM_ABI opStatus fusedMultiplyAdd(const IEEEFloat &, const IEEEFloat &, roundingMode)
Definition APFloat.cpp:2230
LLVM_ABI friend int ilogb(const IEEEFloat &Arg)
Definition APFloat.cpp:4587
LLVM_ABI opStatus next(bool nextDown)
IEEE-754R 5.3.1: nextUp/nextDown.
Definition APFloat.cpp:4400
bool isInfinity() const
IEEE-754R isInfinite(): Returns true if and only if the float is infinity.
Definition APFloat.h:569
const fltSemantics & getSemantics() const
Definition APFloat.h:583
bool isZero() const
Returns true if and only if the float is plus or minus zero.
Definition APFloat.h:562
LLVM_ABI bool isSignaling() const
Returns true if and only if the float is a signaling NaN.
Definition APFloat.cpp:4384
LLVM_ABI void makeZero(bool Neg=false)
Definition APFloat.cpp:4567
LLVM_ABI opStatus convert(const fltSemantics &, roundingMode, bool *)
IEEEFloat::convert - convert a value of one floating point type to another.
Definition APFloat.cpp:2437
LLVM_ABI void changeSign()
Definition APFloat.cpp:1995
LLVM_ABI bool isDenormal() const
IEEE-754R isSubnormal(): Returns true if and only if the float is a denormal.
Definition APFloat.cpp:944
LLVM_ABI opStatus convertToInteger(MutableArrayRef< integerPart >, unsigned int, bool, roundingMode, bool *) const
Definition APFloat.cpp:2693
LLVM_ABI bool isSmallest() const
Returns true if and only if the number has the smallest possible non-zero magnitude in the current se...
Definition APFloat.cpp:950
An opaque object representing a hash code.
Definition Hashing.h:78
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
static constexpr opStatus opInexact
Definition APFloat.h:448
LLVM_ABI SlowDynamicAPInt abs(const SlowDynamicAPInt &X)
Redeclarations of friend declarations above to make it discoverable by lookups.
static constexpr fltCategory fcNaN
Definition APFloat.h:450
static constexpr opStatus opDivByZero
Definition APFloat.h:445
static constexpr opStatus opOverflow
Definition APFloat.h:446
static constexpr cmpResult cmpLessThan
Definition APFloat.h:440
const char unit< Period >::value[]
Definition Chrono.h:104
static void tcSetLeastSignificantBits(APInt::WordType *dst, unsigned parts, unsigned bits)
Definition APFloat.cpp:1460
static constexpr roundingMode rmTowardPositive
Definition APFloat.h:436
static constexpr uninitializedTag uninitialized
Definition APFloat.h:430
static constexpr fltCategory fcZero
Definition APFloat.h:452
static constexpr opStatus opOK
Definition APFloat.h:443
static constexpr cmpResult cmpGreaterThan
Definition APFloat.h:441
static constexpr unsigned integerPartWidth
Definition APFloat.h:438
LLVM_ABI hash_code hash_value(const IEEEFloat &Arg)
Definition APFloat.cpp:3328
APFloatBase::ExponentType ExponentType
Definition APFloat.h:429
static constexpr fltCategory fcNormal
Definition APFloat.h:451
static constexpr opStatus opInvalidOp
Definition APFloat.h:444
APFloatBase::opStatus opStatus
Definition APFloat.h:426
LLVM_ABI IEEEFloat frexp(const IEEEFloat &Val, int &Exp, roundingMode RM)
Definition APFloat.cpp:4626
APFloatBase::uninitializedTag uninitializedTag
Definition APFloat.h:424
static constexpr cmpResult cmpUnordered
Definition APFloat.h:442
static constexpr roundingMode rmTowardNegative
Definition APFloat.h:435
APFloatBase::roundingMode roundingMode
Definition APFloat.h:425
APFloatBase::cmpResult cmpResult
Definition APFloat.h:427
static constexpr fltCategory fcInfinity
Definition APFloat.h:449
static constexpr roundingMode rmNearestTiesToAway
Definition APFloat.h:433
static constexpr roundingMode rmTowardZero
Definition APFloat.h:437
static constexpr opStatus opUnderflow
Definition APFloat.h:447
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:431
LLVM_ABI int ilogb(const IEEEFloat &Arg)
Definition APFloat.cpp:4587
static constexpr cmpResult cmpEqual
Definition APFloat.h:439
LLVM_ABI IEEEFloat scalbn(IEEEFloat X, int Exp, roundingMode)
Definition APFloat.cpp:4605
static std::pair< APFloat, APFloat > fastTwoSum(APFloat X, APFloat Y)
Definition APFloat.cpp:4715
APFloatBase::integerPart integerPart
Definition APFloat.h:423
LLVM_ABI std::error_code status(const Twine &path, file_status &result, bool follow=true)
Get file status as if by POSIX stat().
This is an optimization pass for GlobalISel generic memory operations.
static unsigned int partAsHex(char *dst, APFloatBase::integerPart part, unsigned int count, const char *hexDigitChars)
Definition APFloat.cpp:744
void fill(R &&Range, T &&Value)
Provide wrappers to std::fill which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1758
static const char infinityL[]
Definition APFloat.cpp:735
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
static constexpr unsigned int partCountForBits(unsigned int bits)
Definition APFloat.cpp:322
static const char NaNU[]
Definition APFloat.cpp:738
static unsigned int HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
Definition APFloat.cpp:620
static unsigned int powerOf5(APFloatBase::integerPart *dst, unsigned int power)
Definition APFloat.cpp:679
unsigned hexDigitValue(char C)
Interpret the given character C as a hexadecimal digit and return its value.
static APFloat harrisonUlp(const APFloat &X)
Definition APFloat.cpp:791
static constexpr APFloatBase::ExponentType exponentZero(const fltSemantics &semantics)
Definition APFloat.cpp:296
static Expected< int > totalExponent(StringRef::iterator p, StringRef::iterator end, int exponentAdjustment)
Definition APFloat.cpp:379
LLVM_ABI std::error_code inconvertibleErrorCode()
The value returned by this function can be returned from convertToErrorCode for Error values where no...
Definition Error.cpp:94
const unsigned int maxPowerOfFiveExponent
Definition APFloat.cpp:222
int ilogb(const APFloat &Arg)
Returns the exponent of the internal representation of the APFloat.
Definition APFloat.h:1631
static char * writeUnsignedDecimal(char *dst, unsigned int n)
Definition APFloat.cpp:761
constexpr auto equal_to(T &&Arg)
Functor variant of std::equal_to that can be used as a UnaryPredicate in functional algorithms like a...
Definition STLExtras.h:2172
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition bit.h:156
const unsigned int maxPrecision
Definition APFloat.cpp:221
APFloat frexp(const APFloat &X, int &Exp, APFloat::roundingMode RM)
Equivalent of C standard library function.
Definition APFloat.h:1652
static const char NaNL[]
Definition APFloat.cpp:737
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:204
static const char infinityU[]
Definition APFloat.cpp:736
lostFraction
Enum that represents what fraction of the LSB truncated bits of an fp number represent.
Definition APFloat.h:50
@ lfMoreThanHalf
Definition APFloat.h:54
@ lfLessThanHalf
Definition APFloat.h:52
@ lfExactlyHalf
Definition APFloat.h:53
@ lfExactlyZero
Definition APFloat.h:51
static Error interpretDecimal(StringRef::iterator begin, StringRef::iterator end, decimalInfo *D)
Definition APFloat.cpp:469
LLVM_ABI bool isFinite(const Loop *L)
Return true if this loop can be assumed to run for a finite number of iterations.
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
const unsigned int maxPowerOfFiveParts
Definition APFloat.cpp:223
APFloat scalbn(APFloat X, int Exp, APFloat::roundingMode RM)
Returns: X * 2^Exp for integral exponents.
Definition APFloat.h:1640
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
static constexpr APFloatBase::ExponentType exponentNaN(const fltSemantics &semantics)
Definition APFloat.cpp:306
static Error createError(const Twine &Err)
Definition APFloat.cpp:318
static lostFraction shiftRight(APFloatBase::integerPart *dst, unsigned int parts, unsigned int bits)
Definition APFloat.cpp:588
Error make_error(ArgTs &&... Args)
Make a Error instance representing failure using the given error info type.
Definition Error.h:340
LLVM_READONLY APFloat exp(const APFloat &X, RoundingMode RM=APFloat::rmNearestTiesToEven)
Implement IEEE 754-2019 exp functions.
Definition APFloat.cpp:6098
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:74
static const char hexDigitsUpper[]
Definition APFloat.cpp:734
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:221
const unsigned int maxExponent
Definition APFloat.cpp:220
static unsigned int decDigitValue(unsigned int c)
Definition APFloat.cpp:329
fltNonfiniteBehavior
Definition APFloat.h:952
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:2011
static lostFraction combineLostFractions(lostFraction moreSignificant, lostFraction lessSignificant)
Definition APFloat.cpp:599
static Expected< StringRef::iterator > skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end, StringRef::iterator *dot)
Definition APFloat.cpp:429
RoundingMode
Rounding mode.
ArrayRef(const T &OneElt) -> ArrayRef< T >
static constexpr APFloatBase::ExponentType exponentInf(const fltSemantics &semantics)
Definition APFloat.cpp:301
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1946
static lostFraction lostFractionThroughTruncation(const APFloatBase::integerPart *parts, unsigned int partCount, unsigned int bits)
Definition APFloat.cpp:568
APFloat neg(APFloat X)
Returns the negated value of the argument.
Definition APFloat.h:1666
static APFloatBase::integerPart ulpsFromBoundary(const APFloatBase::integerPart *parts, unsigned int bits, bool isNearest)
Definition APFloat.cpp:634
static char * writeSignedDecimal(char *dst, int value)
Definition APFloat.cpp:777
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
Definition Hashing.h:325
static Expected< lostFraction > trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end, unsigned int digitValue)
Definition APFloat.cpp:539
void consumeError(Error Err)
Consume a Error without doing anything.
Definition Error.h:1106
static Expected< int > readExponent(StringRef::iterator begin, StringRef::iterator end)
Definition APFloat.cpp:339
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
Definition Hashing.h:305
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition MathExtras.h:373
static const char hexDigitsLower[]
Definition APFloat.cpp:733
#define N
const char * lastSigDigit
Definition APFloat.cpp:464
const char * firstSigDigit
Definition APFloat.cpp:463
APFloatBase::ExponentType maxExponent
Definition APFloat.h:1000
fltNonfiniteBehavior nonFiniteBehavior
Definition APFloat.h:1013
APFloatBase::ExponentType minExponent
Definition APFloat.h:1004
unsigned int sizeInBits
Definition APFloat.h:1011
unsigned int precision
Definition APFloat.h:1008
fltNanEncoding nanEncoding
Definition APFloat.h:1015