LLVM 23.0.0git
APFloat.cpp
Go to the documentation of this file.
1//===-- APFloat.cpp - Implement APFloat class -----------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements a class to represent arbitrary precision floating
10// point values and provide a variety of arithmetic operations on them.
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/ADT/APFloat.h"
15#include "llvm/ADT/APSInt.h"
16#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/FoldingSet.h"
19#include "llvm/ADT/Hashing.h"
20#include "llvm/ADT/STLExtras.h"
22#include "llvm/ADT/StringRef.h"
24#include "llvm/Config/llvm-config.h"
25#include "llvm/Support/Debug.h"
26#include "llvm/Support/Error.h"
29#include <cstring>
30#include <limits.h>
31
32/// Shared headers from LLVM libc
33/// Make sure to add ${LLVM_SOURCE_DIR}/../libc to include directories.
34///
35/// Notes: So far it looks like APFloat does not check errnos or floating-point
36/// exceptions after calling the math functions, so we will configure LLVM libc
37/// math functions to skip setting errnos and floating-point exceptions
38/// explicitly. We also put them in a separate namespace so that the symbols
39/// do not clash with other libc math builds just in case.
40#define LIBC_NAMESPACE __llvm_libc_apfloat
41#define LIBC_MATH (LIBC_MATH_NO_ERRNO | LIBC_MATH_NO_EXCEPT)
42
43#include "shared/math.h"
44#include "shared/math_check_exceptions.h"
45
46#define APFLOAT_DISPATCH_ON_SEMANTICS(METHOD_CALL) \
47 do { \
48 if (usesLayout<IEEEFloat>(getSemantics())) \
49 return U.IEEE.METHOD_CALL; \
50 if (usesLayout<DoubleAPFloat>(getSemantics())) \
51 return U.Double.METHOD_CALL; \
52 llvm_unreachable("Unexpected semantics"); \
53 } while (false)
54
55using namespace llvm;
56
57/// A macro used to combine two fcCategory enums into one key which can be used
58/// in a switch statement to classify how the interaction of two APFloat's
59/// categories affects an operation.
60///
61/// TODO: If clang source code is ever allowed to use constexpr in its own
62/// codebase, change this into a static inline function.
63#define PackCategoriesIntoKey(_lhs, _rhs) ((_lhs) * 4 + (_rhs))
64
65/* Assumed in hexadecimal significand parsing, and conversion to
66 hexadecimal strings. */
67static_assert(APFloatBase::integerPartWidth % 4 == 0, "Part width must be divisible by 4!");
68
69namespace llvm {
70
71constexpr fltSemantics APFloatBase::semIEEEhalf = {15, -14, 11, 16};
72constexpr fltSemantics APFloatBase::semBFloat = {127, -126, 8, 16};
73constexpr fltSemantics APFloatBase::semIEEEsingle = {127, -126, 24, 32};
74constexpr fltSemantics APFloatBase::semIEEEdouble = {1023, -1022, 53, 64};
75constexpr fltSemantics APFloatBase::semIEEEquad = {16383, -16382, 113, 128};
76constexpr fltSemantics APFloatBase::semFloat8E5M2 = {15, -14, 3, 8};
77constexpr fltSemantics APFloatBase::semFloat8E5M2FNUZ = {
79constexpr fltSemantics APFloatBase::semFloat8E4M3 = {7, -6, 4, 8};
80constexpr fltSemantics APFloatBase::semFloat8E4M3FN = {
82constexpr fltSemantics APFloatBase::semFloat8E4M3FNUZ = {
84constexpr fltSemantics APFloatBase::semFloat8E4M3B11FNUZ = {
86constexpr fltSemantics APFloatBase::semFloat8E3M4 = {3, -2, 5, 8};
87constexpr fltSemantics APFloatBase::semFloatTF32 = {127, -126, 11, 19};
88constexpr fltSemantics APFloatBase::semFloat8E8M0FNU = {
89 127,
90 -127,
91 1,
92 8,
95 false,
96 false,
97 false,
98 false};
99
100constexpr fltSemantics APFloatBase::semFloat6E3M2FN = {
102constexpr fltSemantics APFloatBase::semFloat6E2M3FN = {
104constexpr fltSemantics APFloatBase::semFloat4E2M1FN = {
106constexpr fltSemantics APFloatBase::semX87DoubleExtended = {16383, -16382, 64,
107 80};
108constexpr fltSemantics APFloatBase::semBogus = {0, 0, 0, 0};
109constexpr fltSemantics APFloatBase::semPPCDoubleDouble = {-1, 0, 0, 128};
110constexpr fltSemantics APFloatBase::semPPCDoubleDoubleLegacy = {
111 1023, -1022 + 53, 53 + 53, 128};
112
114 switch (S) {
115 case S_IEEEhalf:
116 return IEEEhalf();
117 case S_BFloat:
118 return BFloat();
119 case S_IEEEsingle:
120 return IEEEsingle();
121 case S_IEEEdouble:
122 return IEEEdouble();
123 case S_IEEEquad:
124 return IEEEquad();
126 return PPCDoubleDouble();
128 return PPCDoubleDoubleLegacy();
129 case S_Float8E5M2:
130 return Float8E5M2();
131 case S_Float8E5M2FNUZ:
132 return Float8E5M2FNUZ();
133 case S_Float8E4M3:
134 return Float8E4M3();
135 case S_Float8E4M3FN:
136 return Float8E4M3FN();
137 case S_Float8E4M3FNUZ:
138 return Float8E4M3FNUZ();
140 return Float8E4M3B11FNUZ();
141 case S_Float8E3M4:
142 return Float8E3M4();
143 case S_FloatTF32:
144 return FloatTF32();
145 case S_Float8E8M0FNU:
146 return Float8E8M0FNU();
147 case S_Float6E3M2FN:
148 return Float6E3M2FN();
149 case S_Float6E2M3FN:
150 return Float6E2M3FN();
151 case S_Float4E2M1FN:
152 return Float4E2M1FN();
154 return x87DoubleExtended();
155 }
156 llvm_unreachable("Unrecognised floating semantics");
157}
158
161 if (&Sem == &llvm::APFloat::IEEEhalf())
162 return S_IEEEhalf;
163 else if (&Sem == &llvm::APFloat::BFloat())
164 return S_BFloat;
165 else if (&Sem == &llvm::APFloat::IEEEsingle())
166 return S_IEEEsingle;
167 else if (&Sem == &llvm::APFloat::IEEEdouble())
168 return S_IEEEdouble;
169 else if (&Sem == &llvm::APFloat::IEEEquad())
170 return S_IEEEquad;
171 else if (&Sem == &llvm::APFloat::PPCDoubleDouble())
172 return S_PPCDoubleDouble;
173 else if (&Sem == &llvm::APFloat::PPCDoubleDoubleLegacy())
175 else if (&Sem == &llvm::APFloat::Float8E5M2())
176 return S_Float8E5M2;
177 else if (&Sem == &llvm::APFloat::Float8E5M2FNUZ())
178 return S_Float8E5M2FNUZ;
179 else if (&Sem == &llvm::APFloat::Float8E4M3())
180 return S_Float8E4M3;
181 else if (&Sem == &llvm::APFloat::Float8E4M3FN())
182 return S_Float8E4M3FN;
183 else if (&Sem == &llvm::APFloat::Float8E4M3FNUZ())
184 return S_Float8E4M3FNUZ;
185 else if (&Sem == &llvm::APFloat::Float8E4M3B11FNUZ())
186 return S_Float8E4M3B11FNUZ;
187 else if (&Sem == &llvm::APFloat::Float8E3M4())
188 return S_Float8E3M4;
189 else if (&Sem == &llvm::APFloat::FloatTF32())
190 return S_FloatTF32;
191 else if (&Sem == &llvm::APFloat::Float8E8M0FNU())
192 return S_Float8E8M0FNU;
193 else if (&Sem == &llvm::APFloat::Float6E3M2FN())
194 return S_Float6E3M2FN;
195 else if (&Sem == &llvm::APFloat::Float6E2M3FN())
196 return S_Float6E2M3FN;
197 else if (&Sem == &llvm::APFloat::Float4E2M1FN())
198 return S_Float4E2M1FN;
199 else if (&Sem == &llvm::APFloat::x87DoubleExtended())
200 return S_x87DoubleExtended;
201 else
202 llvm_unreachable("Unknown floating semantics");
203}
204
206 const fltSemantics &B) {
207 return A.maxExponent <= B.maxExponent && A.minExponent >= B.minExponent &&
208 A.precision <= B.precision;
209}
210
211/* A tight upper bound on number of parts required to hold the value
212 pow(5, power) is
213
214 power * 815 / (351 * integerPartWidth) + 1
215
216 However, whilst the result may require only this many parts,
217 because we are multiplying two values to get it, the
218 multiplication may require an extra part with the excess part
219 being zero (consider the trivial case of 1 * 1, tcFullMultiply
220 requires two parts to hold the single-part result). So we add an
221 extra one to guarantee enough space whilst multiplying. */
222const unsigned int maxExponent = 16383;
223const unsigned int maxPrecision = 113;
225const unsigned int maxPowerOfFiveParts =
226 2 +
228
229unsigned int APFloatBase::semanticsPrecision(const fltSemantics &semantics) {
230 return semantics.precision;
231}
234 return semantics.maxExponent;
235}
238 return semantics.minExponent;
239}
240unsigned int APFloatBase::semanticsSizeInBits(const fltSemantics &semantics) {
241 return semantics.sizeInBits;
242}
244 bool isSigned) {
245 // The max FP value is pow(2, MaxExponent) * (1 + MaxFraction), so we need
246 // at least one more bit than the MaxExponent to hold the max FP value.
247 unsigned int MinBitWidth = semanticsMaxExponent(semantics) + 1;
248 // Extra sign bit needed.
249 if (isSigned)
250 ++MinBitWidth;
251 return MinBitWidth;
252}
253
255 return semantics.hasZero;
256}
257
259 return semantics.hasSignedRepr;
260}
261
265
269
271 // Keep in sync with Type::isIEEELikeFPTy
272 return SemanticsToEnum(semantics) <= S_IEEEquad;
273}
274
276 return semantics.hasSignBitInMSB;
277}
278
280 const fltSemantics &Dst) {
281 // Exponent range must be larger.
282 if (Src.maxExponent >= Dst.maxExponent || Src.minExponent <= Dst.minExponent)
283 return false;
284
285 // If the mantissa is long enough, the result value could still be denormal
286 // with a larger exponent range.
287 //
288 // FIXME: This condition is probably not accurate but also shouldn't be a
289 // practical concern with existing types.
290 return Dst.precision >= Src.precision;
291}
292
294 return Sem.sizeInBits;
295}
296
297static constexpr APFloatBase::ExponentType
298exponentZero(const fltSemantics &semantics) {
299 return semantics.minExponent - 1;
300}
301
302static constexpr APFloatBase::ExponentType
303exponentInf(const fltSemantics &semantics) {
304 return semantics.maxExponent + 1;
305}
306
307static constexpr APFloatBase::ExponentType
308exponentNaN(const fltSemantics &semantics) {
311 return exponentZero(semantics);
312 if (semantics.hasSignedRepr)
313 return semantics.maxExponent;
314 }
315 return semantics.maxExponent + 1;
316}
317
318/* A bunch of private, handy routines. */
319
320static inline Error createError(const Twine &Err) {
322}
323
324static constexpr inline unsigned int partCountForBits(unsigned int bits) {
325 return std::max(1u, (bits + APFloatBase::integerPartWidth - 1) /
327}
328
329/* Returns 0U-9U. Return values >= 10U are not digits. */
330static inline unsigned int
331decDigitValue(unsigned int c)
332{
333 return c - '0';
334}
335
336/* Return the value of a decimal exponent of the form
337 [+-]ddddddd.
338
339 If the exponent overflows, returns a large exponent with the
340 appropriate sign. */
343 const unsigned int overlargeExponent = 24000; /* FIXME. */
344 StringRef::iterator p = begin;
345
346 // Treat no exponent as 0 to match binutils
347 if (p == end || ((*p == '-' || *p == '+') && (p + 1) == end))
348 return 0;
349
350 bool isNegative = *p == '-';
351 if (*p == '-' || *p == '+') {
352 p++;
353 if (p == end)
354 return createError("Exponent has no digits");
355 }
356
357 unsigned absExponent = decDigitValue(*p++);
358 if (absExponent >= 10U)
359 return createError("Invalid character in exponent");
360
361 for (; p != end; ++p) {
362 unsigned value = decDigitValue(*p);
363 if (value >= 10U)
364 return createError("Invalid character in exponent");
365
366 absExponent = absExponent * 10U + value;
367 if (absExponent >= overlargeExponent) {
368 absExponent = overlargeExponent;
369 break;
370 }
371 }
372
373 if (isNegative)
374 return -(int) absExponent;
375 else
376 return (int) absExponent;
377}
378
379/* This is ugly and needs cleaning up, but I don't immediately see
380 how whilst remaining safe. */
383 int exponentAdjustment) {
384 int exponent = 0;
385
386 if (p == end)
387 return createError("Exponent has no digits");
388
389 bool negative = *p == '-';
390 if (*p == '-' || *p == '+') {
391 p++;
392 if (p == end)
393 return createError("Exponent has no digits");
394 }
395
396 int unsignedExponent = 0;
397 bool overflow = false;
398 for (; p != end; ++p) {
399 unsigned int value;
400
401 value = decDigitValue(*p);
402 if (value >= 10U)
403 return createError("Invalid character in exponent");
404
405 unsignedExponent = unsignedExponent * 10 + value;
406 if (unsignedExponent > 32767) {
407 overflow = true;
408 break;
409 }
410 }
411
412 if (exponentAdjustment > 32767 || exponentAdjustment < -32768)
413 overflow = true;
414
415 if (!overflow) {
416 exponent = unsignedExponent;
417 if (negative)
418 exponent = -exponent;
419 exponent += exponentAdjustment;
420 if (exponent > 32767 || exponent < -32768)
421 overflow = true;
422 }
423
424 if (overflow)
425 exponent = negative ? -32768: 32767;
426
427 return exponent;
428}
429
432 StringRef::iterator *dot) {
433 StringRef::iterator p = begin;
434 *dot = end;
435 while (p != end && *p == '0')
436 p++;
437
438 if (p != end && *p == '.') {
439 *dot = p++;
440
441 if (end - begin == 1)
442 return createError("Significand has no digits");
443
444 while (p != end && *p == '0')
445 p++;
446 }
447
448 return p;
449}
450
451/* Given a normal decimal floating point number of the form
452
453 dddd.dddd[eE][+-]ddd
454
455 where the decimal point and exponent are optional, fill out the
456 structure D. Exponent is appropriate if the significand is
457 treated as an integer, and normalizedExponent if the significand
458 is taken to have the decimal point after a single leading
459 non-zero digit.
460
461 If the value is zero, V->firstSigDigit points to a non-digit, and
462 the return exponent is zero.
463*/
465 const char *firstSigDigit;
466 const char *lastSigDigit;
469};
470
473 StringRef::iterator dot = end;
474
475 auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot);
476 if (!PtrOrErr)
477 return PtrOrErr.takeError();
478 StringRef::iterator p = *PtrOrErr;
479
480 D->firstSigDigit = p;
481 D->exponent = 0;
482 D->normalizedExponent = 0;
483
484 for (; p != end; ++p) {
485 if (*p == '.') {
486 if (dot != end)
487 return createError("String contains multiple dots");
488 dot = p++;
489 if (p == end)
490 break;
491 }
492 if (decDigitValue(*p) >= 10U)
493 break;
494 }
495
496 if (p != end) {
497 if (*p != 'e' && *p != 'E')
498 return createError("Invalid character in significand");
499 if (p == begin)
500 return createError("Significand has no digits");
501 if (dot != end && p - begin == 1)
502 return createError("Significand has no digits");
503
504 /* p points to the first non-digit in the string */
505 auto ExpOrErr = readExponent(p + 1, end);
506 if (!ExpOrErr)
507 return ExpOrErr.takeError();
508 D->exponent = *ExpOrErr;
509
510 /* Implied decimal point? */
511 if (dot == end)
512 dot = p;
513 }
514
515 /* If number is all zeroes accept any exponent. */
516 if (p != D->firstSigDigit) {
517 /* Drop insignificant trailing zeroes. */
518 if (p != begin) {
519 do
520 do
521 p--;
522 while (p != begin && *p == '0');
523 while (p != begin && *p == '.');
524 }
525
526 /* Adjust the exponents for any decimal point. */
527 D->exponent += static_cast<APFloat::ExponentType>((dot - p) - (dot > p));
528 D->normalizedExponent = (D->exponent +
529 static_cast<APFloat::ExponentType>((p - D->firstSigDigit)
530 - (dot > D->firstSigDigit && dot < p)));
531 }
532
533 D->lastSigDigit = p;
534 return Error::success();
535}
536
537/* Return the trailing fraction of a hexadecimal number.
538 DIGITVALUE is the first hex digit of the fraction, P points to
539 the next digit. */
542 unsigned int digitValue) {
543 /* If the first trailing digit isn't 0 or 8 we can work out the
544 fraction immediately. */
545 if (digitValue > 8)
546 return lfMoreThanHalf;
547 else if (digitValue < 8 && digitValue > 0)
548 return lfLessThanHalf;
549
550 // Otherwise we need to find the first non-zero digit.
551 while (p != end && (*p == '0' || *p == '.'))
552 p++;
553
554 if (p == end)
555 return createError("Invalid trailing hexadecimal fraction!");
556
557 unsigned hexDigit = hexDigitValue(*p);
558
559 /* If we ran off the end it is exactly zero or one-half, otherwise
560 a little more. */
561 if (hexDigit == UINT_MAX)
562 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
563 else
564 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
565}
566
567/* Return the fraction lost were a bignum truncated losing the least
568 significant BITS bits. */
569static lostFraction
571 unsigned int partCount,
572 unsigned int bits)
573{
574 unsigned lsb = APInt::tcLSB(parts, partCount);
575
576 /* Note this is guaranteed true if bits == 0, or LSB == UINT_MAX. */
577 if (bits <= lsb)
578 return lfExactlyZero;
579 if (bits == lsb + 1)
580 return lfExactlyHalf;
581 if (bits <= partCount * APFloatBase::integerPartWidth &&
582 APInt::tcExtractBit(parts, bits - 1))
583 return lfMoreThanHalf;
584
585 return lfLessThanHalf;
586}
587
588/* Shift DST right BITS bits noting lost fraction. */
589static lostFraction
590shiftRight(APFloatBase::integerPart *dst, unsigned int parts, unsigned int bits)
591{
592 lostFraction lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
593
594 APInt::tcShiftRight(dst, parts, bits);
595
596 return lost_fraction;
597}
598
599/* Combine the effect of two lost fractions. */
600static lostFraction
602 lostFraction lessSignificant)
603{
604 if (lessSignificant != lfExactlyZero) {
605 if (moreSignificant == lfExactlyZero)
606 moreSignificant = lfLessThanHalf;
607 else if (moreSignificant == lfExactlyHalf)
608 moreSignificant = lfMoreThanHalf;
609 }
610
611 return moreSignificant;
612}
613
614/* The error from the true value, in half-ulps, on multiplying two
615 floating point numbers, which differ from the value they
616 approximate by at most HUE1 and HUE2 half-ulps, is strictly less
617 than the returned value.
618
619 See "How to Read Floating Point Numbers Accurately" by William D
620 Clinger. */
621static unsigned int
622HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
623{
624 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
625
626 if (HUerr1 + HUerr2 == 0)
627 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */
628 else
629 return inexactMultiply + 2 * (HUerr1 + HUerr2);
630}
631
632/* The number of ulps from the boundary (zero, or half if ISNEAREST)
633 when the least significant BITS are truncated. BITS cannot be
634 zero. */
636ulpsFromBoundary(const APFloatBase::integerPart *parts, unsigned int bits,
637 bool isNearest) {
638 assert(bits != 0);
639
640 bits--;
641 unsigned count = bits / APFloatBase::integerPartWidth;
642 unsigned partBits = bits % APFloatBase::integerPartWidth + 1;
643
645 parts[count] & (~(APFloatBase::integerPart)0 >>
646 (APFloatBase::integerPartWidth - partBits));
647
649 if (isNearest)
650 boundary = (APFloatBase::integerPart) 1 << (partBits - 1);
651 else
652 boundary = 0;
653
654 if (count == 0) {
655 if (part - boundary <= boundary - part)
656 return part - boundary;
657 else
658 return boundary - part;
659 }
660
661 if (part == boundary) {
662 while (--count)
663 if (parts[count])
664 return ~(APFloatBase::integerPart) 0; /* A lot. */
665
666 return parts[0];
667 } else if (part == boundary - 1) {
668 while (--count)
669 if (~parts[count])
670 return ~(APFloatBase::integerPart) 0; /* A lot. */
671
672 return -parts[0];
673 }
674
675 return ~(APFloatBase::integerPart) 0; /* A lot. */
676}
677
678/* Place pow(5, power) in DST, and return the number of parts used.
679 DST must be at least one part larger than size of the answer. */
680static unsigned int
681powerOf5(APFloatBase::integerPart *dst, unsigned int power) {
682 static const APFloatBase::integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125, 15625, 78125 };
684 pow5s[0] = 78125 * 5;
685
686 unsigned int partsCount = 1;
687 APFloatBase::integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
688 assert(power <= maxExponent);
689
690 p1 = dst;
691 p2 = scratch;
692
693 *p1 = firstEightPowers[power & 7];
694 power >>= 3;
695
696 unsigned result = 1;
697 pow5 = pow5s;
698
699 for (unsigned int n = 0; power; power >>= 1, n++) {
700 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */
701 if (n != 0) {
702 APInt::tcFullMultiply(pow5, pow5 - partsCount, pow5 - partsCount,
703 partsCount, partsCount);
704 partsCount *= 2;
705 if (pow5[partsCount - 1] == 0)
706 partsCount--;
707 }
708
709 if (power & 1) {
711
712 APInt::tcFullMultiply(p2, p1, pow5, result, partsCount);
713 result += partsCount;
714 if (p2[result - 1] == 0)
715 result--;
716
717 /* Now result is in p1 with partsCount parts and p2 is scratch
718 space. */
719 tmp = p1;
720 p1 = p2;
721 p2 = tmp;
722 }
723
724 pow5 += partsCount;
725 }
726
727 if (p1 != dst)
728 APInt::tcAssign(dst, p1, result);
729
730 return result;
731}
732
733/* Zero at the end to avoid modular arithmetic when adding one; used
734 when rounding up during hexadecimal output. */
735static const char hexDigitsLower[] = "0123456789abcdef0";
736static const char hexDigitsUpper[] = "0123456789ABCDEF0";
737static const char infinityL[] = "infinity";
738static const char infinityU[] = "INFINITY";
739static const char NaNL[] = "nan";
740static const char NaNU[] = "NAN";
741
742/* Write out an integerPart in hexadecimal, starting with the most
743 significant nibble. Write out exactly COUNT hexdigits, return
744 COUNT. */
745static unsigned int
746partAsHex (char *dst, APFloatBase::integerPart part, unsigned int count,
747 const char *hexDigitChars)
748{
749 unsigned int result = count;
750
752
753 part >>= (APFloatBase::integerPartWidth - 4 * count);
754 while (count--) {
755 dst[count] = hexDigitChars[part & 0xf];
756 part >>= 4;
757 }
758
759 return result;
760}
761
762/* Write out an unsigned decimal integer. */
763static char *writeUnsignedDecimal(char *dst, unsigned int n) {
764 char buff[40], *p;
765
766 p = buff;
767 do
768 *p++ = '0' + n % 10;
769 while (n /= 10);
770
771 do
772 *dst++ = *--p;
773 while (p != buff);
774
775 return dst;
776}
777
778/* Write out a signed decimal integer. */
779static char *writeSignedDecimal(char *dst, int value) {
780 if (value < 0) {
781 *dst++ = '-';
782 dst = writeUnsignedDecimal(dst, -(unsigned) value);
783 } else {
784 dst = writeUnsignedDecimal(dst, value);
785 }
786
787 return dst;
788}
789
790// Compute the ULP of the input using a definition from:
791// Jean-Michel Muller. On the definition of ulp(x). [Research Report] RR-5504,
792// LIP RR-2005-09, INRIA, LIP. 2005, pp.16. inria-00070503
793static APFloat harrisonUlp(const APFloat &X) {
794 const fltSemantics &Sem = X.getSemantics();
795 switch (X.getCategory()) {
796 case APFloat::fcNaN:
797 return APFloat::getQNaN(Sem);
799 return APFloat::getInf(Sem);
800 case APFloat::fcZero:
801 return APFloat::getSmallest(Sem);
803 break;
804 }
805 if (X.isDenormal() || X.isSmallestNormalized())
806 return APFloat::getSmallest(Sem);
807 int Exp = ilogb(X);
808 if (X.getExactLog2() != INT_MIN)
809 Exp -= 1;
810 return scalbn(APFloat::getOne(Sem), Exp - (Sem.precision - 1),
812}
813
814namespace detail {
815/* Constructors. */
816void IEEEFloat::initialize(const fltSemantics *ourSemantics) {
817 semantics = ourSemantics;
818 unsigned count = partCount();
819 if (count > 1)
820 significand.parts = new integerPart[count];
821}
822
823void IEEEFloat::freeSignificand() {
824 if (needsCleanup())
825 delete [] significand.parts;
826}
827
828void IEEEFloat::assign(const IEEEFloat &rhs) {
829 assert(semantics == rhs.semantics);
830
831 sign = rhs.sign;
832 category = rhs.category;
833 exponent = rhs.exponent;
834 if (isFiniteNonZero() || category == fcNaN)
835 copySignificand(rhs);
836}
837
838void IEEEFloat::copySignificand(const IEEEFloat &rhs) {
839 assert(isFiniteNonZero() || category == fcNaN);
840 assert(rhs.partCount() >= partCount());
841
842 APInt::tcAssign(significandParts(), rhs.significandParts(),
843 partCount());
844}
845
846/* Make this number a NaN, with an arbitrary but deterministic value
847 for the significand. If double or longer, this is a signalling NaN,
848 which may not be ideal. If float, this is QNaN(0). */
849void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) {
850 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
851 llvm_unreachable("This floating point format does not support NaN");
852
853 if (Negative && !semantics->hasSignedRepr)
855 "This floating point format does not support signed values");
856
857 category = fcNaN;
858 sign = Negative;
859 exponent = exponentNaN();
860
861 integerPart *significand = significandParts();
862 unsigned numParts = partCount();
863
864 APInt fill_storage;
865 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
866 // Finite-only types do not distinguish signalling and quiet NaN, so
867 // make them all signalling.
868 SNaN = false;
869 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
870 sign = true;
871 fill_storage = APInt::getZero(semantics->precision - 1);
872 } else {
873 fill_storage = APInt::getAllOnes(semantics->precision - 1);
874 }
875 fill = &fill_storage;
876 }
877
878 // Set the significand bits to the fill.
879 if (!fill || fill->getNumWords() < numParts)
880 APInt::tcSet(significand, 0, numParts);
881 if (fill) {
882 APInt::tcAssign(significand, fill->getRawData(),
883 std::min(fill->getNumWords(), numParts));
884
885 // Zero out the excess bits of the significand.
886 unsigned bitsToPreserve = semantics->precision - 1;
887 unsigned part = bitsToPreserve / 64;
888 bitsToPreserve %= 64;
889 significand[part] &= ((1ULL << bitsToPreserve) - 1);
890 for (part++; part != numParts; ++part)
891 significand[part] = 0;
892 }
893
894 unsigned QNaNBit =
895 (semantics->precision >= 2) ? (semantics->precision - 2) : 0;
896
897 if (SNaN) {
898 // We always have to clear the QNaN bit to make it an SNaN.
899 APInt::tcClearBit(significand, QNaNBit);
900
901 // If there are no bits set in the payload, we have to set
902 // *something* to make it a NaN instead of an infinity;
903 // conventionally, this is the next bit down from the QNaN bit.
904 if (APInt::tcIsZero(significand, numParts))
905 APInt::tcSetBit(significand, QNaNBit - 1);
906 } else if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
907 // The only NaN is a quiet NaN, and it has no bits sets in the significand.
908 // Do nothing.
909 } else {
910 // We always have to set the QNaN bit to make it a QNaN.
911 APInt::tcSetBit(significand, QNaNBit);
912 }
913
914 // For x87 extended precision, we want to make a NaN, not a
915 // pseudo-NaN. Maybe we should expose the ability to make
916 // pseudo-NaNs?
917 if (semantics == &APFloatBase::semX87DoubleExtended)
918 APInt::tcSetBit(significand, QNaNBit + 1);
919}
920
922 if (this != &rhs) {
923 if (semantics != rhs.semantics) {
924 freeSignificand();
925 initialize(rhs.semantics);
926 }
927 assign(rhs);
928 }
929
930 return *this;
931}
932
934 freeSignificand();
935
936 semantics = rhs.semantics;
937 significand = rhs.significand;
938 exponent = rhs.exponent;
939 category = rhs.category;
940 sign = rhs.sign;
941
942 rhs.semantics = &APFloatBase::semBogus;
943 return *this;
944}
945
948 (exponent == semantics->minExponent) &&
949 (APInt::tcExtractBit(significandParts(), semantics->precision - 1) ==
950 0);
951}
952
954 // The smallest number by magnitude in our format will be the smallest
955 // denormal, i.e. the floating point number with exponent being minimum
956 // exponent and significand bitwise equal to 1 (i.e. with MSB equal to 0).
957 return isFiniteNonZero() && exponent == semantics->minExponent &&
958 significandMSB() == 0;
959}
960
962 return getCategory() == fcNormal && exponent == semantics->minExponent &&
963 isSignificandAllZerosExceptMSB();
964}
965
966unsigned int IEEEFloat::getNumHighBits() const {
967 const unsigned int PartCount = partCountForBits(semantics->precision);
968 const unsigned int Bits = PartCount * integerPartWidth;
969
970 // Compute how many bits are used in the final word.
971 // When precision is just 1, it represents the 'Pth'
972 // Precision bit and not the actual significand bit.
973 const unsigned int NumHighBits = (semantics->precision > 1)
974 ? (Bits - semantics->precision + 1)
975 : (Bits - semantics->precision);
976 return NumHighBits;
977}
978
979bool IEEEFloat::isSignificandAllOnes() const {
980 // Test if the significand excluding the integral bit is all ones. This allows
981 // us to test for binade boundaries.
982 const integerPart *Parts = significandParts();
983 const unsigned PartCount = partCountForBits(semantics->precision);
984 for (unsigned i = 0; i < PartCount - 1; i++)
985 if (~Parts[i])
986 return false;
987
988 // Set the unused high bits to all ones when we compare.
989 const unsigned NumHighBits = getNumHighBits();
990 assert(NumHighBits <= integerPartWidth && NumHighBits > 0 &&
991 "Can not have more high bits to fill than integerPartWidth");
992 const integerPart HighBitFill =
993 ~integerPart(0) << (integerPartWidth - NumHighBits);
994 if ((semantics->precision <= 1) || (~(Parts[PartCount - 1] | HighBitFill)))
995 return false;
996
997 return true;
998}
999
1000bool IEEEFloat::isSignificandAllOnesExceptLSB() const {
1001 // Test if the significand excluding the integral bit is all ones except for
1002 // the least significant bit.
1003 const integerPart *Parts = significandParts();
1004
1005 if (Parts[0] & 1)
1006 return false;
1007
1008 const unsigned PartCount = partCountForBits(semantics->precision);
1009 for (unsigned i = 0; i < PartCount - 1; i++) {
1010 if (~Parts[i] & ~unsigned{!i})
1011 return false;
1012 }
1013
1014 // Set the unused high bits to all ones when we compare.
1015 const unsigned NumHighBits = getNumHighBits();
1016 assert(NumHighBits <= integerPartWidth && NumHighBits > 0 &&
1017 "Can not have more high bits to fill than integerPartWidth");
1018 const integerPart HighBitFill = ~integerPart(0)
1019 << (integerPartWidth - NumHighBits);
1020 if (~(Parts[PartCount - 1] | HighBitFill | 0x1))
1021 return false;
1022
1023 return true;
1024}
1025
1026bool IEEEFloat::isSignificandAllZeros() const {
1027 // Test if the significand excluding the integral bit is all zeros. This
1028 // allows us to test for binade boundaries.
1029 const integerPart *Parts = significandParts();
1030 const unsigned PartCount = partCountForBits(semantics->precision);
1031
1032 for (unsigned i = 0; i < PartCount - 1; i++)
1033 if (Parts[i])
1034 return false;
1035
1036 // Compute how many bits are used in the final word.
1037 const unsigned NumHighBits = getNumHighBits();
1038 assert(NumHighBits < integerPartWidth && "Can not have more high bits to "
1039 "clear than integerPartWidth");
1040 const integerPart HighBitMask = ~integerPart(0) >> NumHighBits;
1041
1042 if ((semantics->precision > 1) && (Parts[PartCount - 1] & HighBitMask))
1043 return false;
1044
1045 return true;
1046}
1047
1048bool IEEEFloat::isSignificandAllZerosExceptMSB() const {
1049 const integerPart *Parts = significandParts();
1050 const unsigned PartCount = partCountForBits(semantics->precision);
1051
1052 for (unsigned i = 0; i < PartCount - 1; i++) {
1053 if (Parts[i])
1054 return false;
1055 }
1056
1057 const unsigned NumHighBits = getNumHighBits();
1058 const integerPart MSBMask = integerPart(1)
1059 << (integerPartWidth - NumHighBits);
1060 return ((semantics->precision <= 1) || (Parts[PartCount - 1] == MSBMask));
1061}
1062
1064 bool IsMaxExp = isFiniteNonZero() && exponent == semantics->maxExponent;
1065 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1066 semantics->nanEncoding == fltNanEncoding::AllOnes) {
1067 // The largest number by magnitude in our format will be the floating point
1068 // number with maximum exponent and with significand that is all ones except
1069 // the LSB.
1070 return (IsMaxExp && APFloat::hasSignificand(*semantics))
1071 ? isSignificandAllOnesExceptLSB()
1072 : IsMaxExp;
1073 } else {
1074 // The largest number by magnitude in our format will be the floating point
1075 // number with maximum exponent and with significand that is all ones.
1076 return IsMaxExp && isSignificandAllOnes();
1077 }
1078}
1079
1081 // This could be made more efficient; I'm going for obviously correct.
1082 if (!isFinite()) return false;
1083 IEEEFloat truncated = *this;
1084 truncated.roundToIntegral(rmTowardZero);
1085 return compare(truncated) == cmpEqual;
1086}
1087
1088bool IEEEFloat::bitwiseIsEqual(const IEEEFloat &rhs) const {
1089 if (this == &rhs)
1090 return true;
1091 if (semantics != rhs.semantics ||
1092 category != rhs.category ||
1093 sign != rhs.sign)
1094 return false;
1095 if (category==fcZero || category==fcInfinity)
1096 return true;
1097
1098 if (isFiniteNonZero() && exponent != rhs.exponent)
1099 return false;
1100
1101 return std::equal(significandParts(), significandParts() + partCount(),
1102 rhs.significandParts());
1103}
1104
1106 initialize(&ourSemantics);
1107 sign = 0;
1108 category = fcNormal;
1109 zeroSignificand();
1110 exponent = ourSemantics.precision - 1;
1111 significandParts()[0] = value;
1113}
1114
1116 initialize(&ourSemantics);
1117 // The Float8E8MOFNU format does not have a representation
1118 // for zero. So, use the closest representation instead.
1119 // Moreover, the all-zero encoding represents a valid
1120 // normal value (which is the smallestNormalized here).
1121 // Hence, we call makeSmallestNormalized (where category is
1122 // 'fcNormal') instead of makeZero (where category is 'fcZero').
1123 ourSemantics.hasZero ? makeZero(false) : makeSmallestNormalized(false);
1124}
1125
1126// Delegate to the previous constructor, because later copy constructor may
1127// actually inspects category, which can't be garbage.
1129 : IEEEFloat(ourSemantics) {}
1130
1132 initialize(rhs.semantics);
1133 assign(rhs);
1134}
1135
1136IEEEFloat::IEEEFloat(IEEEFloat &&rhs) : semantics(&APFloatBase::semBogus) {
1137 *this = std::move(rhs);
1138}
1139
1140IEEEFloat::~IEEEFloat() { freeSignificand(); }
1141
1142unsigned int IEEEFloat::partCount() const {
1143 return partCountForBits(semantics->precision + 1);
1144}
1145
1146const APFloat::integerPart *IEEEFloat::significandParts() const {
1147 return const_cast<IEEEFloat *>(this)->significandParts();
1148}
1149
1150APFloat::integerPart *IEEEFloat::significandParts() {
1151 if (partCount() > 1)
1152 return significand.parts;
1153 else
1154 return &significand.part;
1155}
1156
1157void IEEEFloat::zeroSignificand() {
1158 APInt::tcSet(significandParts(), 0, partCount());
1159}
1160
1161/* Increment an fcNormal floating point number's significand. */
1162void IEEEFloat::incrementSignificand() {
1163 [[maybe_unused]] integerPart carry =
1164 APInt::tcIncrement(significandParts(), partCount());
1165
1166 /* Our callers should never cause us to overflow. */
1167 assert(carry == 0);
1168}
1169
1170/* Add the significand of the RHS. Returns the carry flag. */
1171APFloat::integerPart IEEEFloat::addSignificand(const IEEEFloat &rhs) {
1172 integerPart *parts = significandParts();
1173
1174 assert(semantics == rhs.semantics);
1175 assert(exponent == rhs.exponent);
1176
1177 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
1178}
1179
1180/* Subtract the significand of the RHS with a borrow flag. Returns
1181 the borrow flag. */
1182APFloat::integerPart IEEEFloat::subtractSignificand(const IEEEFloat &rhs,
1183 integerPart borrow) {
1184 integerPart *parts = significandParts();
1185
1186 assert(semantics == rhs.semantics);
1187 assert(exponent == rhs.exponent);
1188
1189 return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
1190 partCount());
1191}
1192
1193/* Multiply the significand of the RHS. If ADDEND is non-NULL, add it
1194 on to the full-precision result of the multiplication. Returns the
1195 lost fraction. */
1196lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs,
1197 IEEEFloat addend,
1198 bool ignoreAddend) {
1199 integerPart scratch[4];
1200 bool ignored;
1201
1202 assert(semantics == rhs.semantics);
1203
1204 unsigned precision = semantics->precision;
1205
1206 // Allocate space for twice as many bits as the original significand, plus one
1207 // extra bit for the addition to overflow into.
1208 unsigned newPartsCount = partCountForBits(precision * 2 + 1);
1209
1210 // FIXME: Replace with SmallVector<4>.
1211 integerPart *fullSignificand =
1212 newPartsCount > 4 ? new integerPart[newPartsCount] : scratch;
1213
1214 integerPart *lhsSignificand = significandParts();
1215 unsigned partsCount = partCount();
1216
1217 APInt::tcFullMultiply(fullSignificand, lhsSignificand,
1218 rhs.significandParts(), partsCount, partsCount);
1219
1220 lostFraction lost_fraction = lfExactlyZero;
1221 // One, not zero, based MSB.
1222 unsigned omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
1223 exponent += rhs.exponent;
1224
1225 // Assume the operands involved in the multiplication are single-precision
1226 // FP, and the two multiplicants are:
1227 // *this = a23 . a22 ... a0 * 2^e1
1228 // rhs = b23 . b22 ... b0 * 2^e2
1229 // the result of multiplication is:
1230 // *this = c48 c47 c46 . c45 ... c0 * 2^(e1+e2)
1231 // Note that there are three significant bits at the left-hand side of the
1232 // radix point: two for the multiplication, and an overflow bit for the
1233 // addition (that will always be zero at this point). Move the radix point
1234 // toward left by two bits, and adjust exponent accordingly.
1235 exponent += 2;
1236
1237 if (!ignoreAddend && addend.isNonZero()) {
1238 // The intermediate result of the multiplication has "2 * precision"
1239 // signicant bit; adjust the addend to be consistent with mul result.
1240 //
1241 Significand savedSignificand = significand;
1242 const fltSemantics *savedSemantics = semantics;
1243
1244 // Normalize our MSB to one below the top bit to allow for overflow.
1245 unsigned extendedPrecision = 2 * precision + 1;
1246 if (omsb != extendedPrecision - 1) {
1247 assert(extendedPrecision > omsb);
1248 APInt::tcShiftLeft(fullSignificand, newPartsCount,
1249 (extendedPrecision - 1) - omsb);
1250 exponent -= (extendedPrecision - 1) - omsb;
1251 }
1252
1253 /* Create new semantics. */
1254 fltSemantics extendedSemantics = *semantics;
1255 extendedSemantics.precision = extendedPrecision;
1256
1257 if (newPartsCount == 1)
1258 significand.part = fullSignificand[0];
1259 else
1260 significand.parts = fullSignificand;
1261 semantics = &extendedSemantics;
1262
1263 // Make a copy so we can convert it to the extended semantics.
1264 // Note that we cannot convert the addend directly, as the extendedSemantics
1265 // is a local variable (which we take a reference to).
1266 IEEEFloat extendedAddend(addend);
1267 [[maybe_unused]] opStatus status = extendedAddend.convert(
1268 extendedSemantics, APFloat::rmTowardZero, &ignored);
1269 assert(status == APFloat::opOK);
1270
1271 // Shift the significand of the addend right by one bit. This guarantees
1272 // that the high bit of the significand is zero (same as fullSignificand),
1273 // so the addition will overflow (if it does overflow at all) into the top bit.
1274 lost_fraction = extendedAddend.shiftSignificandRight(1);
1275 assert(lost_fraction == lfExactlyZero &&
1276 "Lost precision while shifting addend for fused-multiply-add.");
1277
1278 lost_fraction = addOrSubtractSignificand(extendedAddend, false);
1279
1280 /* Restore our state. */
1281 if (newPartsCount == 1)
1282 fullSignificand[0] = significand.part;
1283 significand = savedSignificand;
1284 semantics = savedSemantics;
1285
1286 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
1287 }
1288
1289 // Convert the result having "2 * precision" significant-bits back to the one
1290 // having "precision" significant-bits. First, move the radix point from
1291 // poision "2*precision - 1" to "precision - 1". The exponent need to be
1292 // adjusted by "2*precision - 1" - "precision - 1" = "precision".
1293 exponent -= precision + 1;
1294
1295 // In case MSB resides at the left-hand side of radix point, shift the
1296 // mantissa right by some amount to make sure the MSB reside right before
1297 // the radix point (i.e. "MSB . rest-significant-bits").
1298 //
1299 // Note that the result is not normalized when "omsb < precision". So, the
1300 // caller needs to call IEEEFloat::normalize() if normalized value is
1301 // expected.
1302 if (omsb > precision) {
1303 unsigned int bits, significantParts;
1304 lostFraction lf;
1305
1306 bits = omsb - precision;
1307 significantParts = partCountForBits(omsb);
1308 lf = shiftRight(fullSignificand, significantParts, bits);
1309 lost_fraction = combineLostFractions(lf, lost_fraction);
1310 exponent += bits;
1311 }
1312
1313 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
1314
1315 if (newPartsCount > 4)
1316 delete [] fullSignificand;
1317
1318 return lost_fraction;
1319}
1320
1321lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs) {
1322 // When the given semantics has zero, the addend here is a zero.
1323 // i.e . it belongs to the 'fcZero' category.
1324 // But when the semantics does not support zero, we need to
1325 // explicitly convey that this addend should be ignored
1326 // for multiplication.
1327 return multiplySignificand(rhs, IEEEFloat(*semantics), !semantics->hasZero);
1328}
1329
1330/* Multiply the significands of LHS and RHS to DST. */
1331lostFraction IEEEFloat::divideSignificand(const IEEEFloat &rhs) {
1332 integerPart scratch[4];
1333
1334 assert(semantics == rhs.semantics);
1335
1336 integerPart *lhsSignificand = significandParts();
1337 const integerPart *rhsSignificand = rhs.significandParts();
1338 unsigned partsCount = partCount();
1339
1340 integerPart *dividend =
1341 partsCount > 2 ? new integerPart[partsCount * 2] : scratch;
1342 integerPart *divisor = dividend + partsCount;
1343
1344 /* Copy the dividend and divisor as they will be modified in-place. */
1345 for (unsigned i = 0; i < partsCount; i++) {
1346 dividend[i] = lhsSignificand[i];
1347 divisor[i] = rhsSignificand[i];
1348 lhsSignificand[i] = 0;
1349 }
1350
1351 exponent -= rhs.exponent;
1352
1353 unsigned int precision = semantics->precision;
1354
1355 /* Normalize the divisor. */
1356 unsigned bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
1357 if (bit) {
1358 exponent += bit;
1359 APInt::tcShiftLeft(divisor, partsCount, bit);
1360 }
1361
1362 /* Normalize the dividend. */
1363 bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
1364 if (bit) {
1365 exponent -= bit;
1366 APInt::tcShiftLeft(dividend, partsCount, bit);
1367 }
1368
1369 /* Ensure the dividend >= divisor initially for the loop below.
1370 Incidentally, this means that the division loop below is
1371 guaranteed to set the integer bit to one. */
1372 if (APInt::tcCompare(dividend, divisor, partsCount) < 0) {
1373 exponent--;
1374 APInt::tcShiftLeft(dividend, partsCount, 1);
1375 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
1376 }
1377
1378 /* Long division. */
1379 for (bit = precision; bit; bit -= 1) {
1380 if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
1381 APInt::tcSubtract(dividend, divisor, 0, partsCount);
1382 APInt::tcSetBit(lhsSignificand, bit - 1);
1383 }
1384
1385 APInt::tcShiftLeft(dividend, partsCount, 1);
1386 }
1387
1388 /* Figure out the lost fraction. */
1389 int cmp = APInt::tcCompare(dividend, divisor, partsCount);
1390
1391 lostFraction lost_fraction;
1392 if (cmp > 0)
1393 lost_fraction = lfMoreThanHalf;
1394 else if (cmp == 0)
1395 lost_fraction = lfExactlyHalf;
1396 else if (APInt::tcIsZero(dividend, partsCount))
1397 lost_fraction = lfExactlyZero;
1398 else
1399 lost_fraction = lfLessThanHalf;
1400
1401 if (partsCount > 2)
1402 delete [] dividend;
1403
1404 return lost_fraction;
1405}
1406
1407unsigned int IEEEFloat::significandMSB() const {
1408 return APInt::tcMSB(significandParts(), partCount());
1409}
1410
1411unsigned int IEEEFloat::significandLSB() const {
1412 return APInt::tcLSB(significandParts(), partCount());
1413}
1414
1415/* Note that a zero result is NOT normalized to fcZero. */
1416lostFraction IEEEFloat::shiftSignificandRight(unsigned int bits) {
1417 /* Our exponent should not overflow. */
1418 assert((ExponentType) (exponent + bits) >= exponent);
1419
1420 exponent += bits;
1421
1422 return shiftRight(significandParts(), partCount(), bits);
1423}
1424
1425/* Shift the significand left BITS bits, subtract BITS from its exponent. */
1426void IEEEFloat::shiftSignificandLeft(unsigned int bits) {
1427 assert(bits < semantics->precision ||
1428 (semantics->precision == 1 && bits <= 1));
1429
1430 if (bits) {
1431 unsigned int partsCount = partCount();
1432
1433 APInt::tcShiftLeft(significandParts(), partsCount, bits);
1434 exponent -= bits;
1435
1436 assert(!APInt::tcIsZero(significandParts(), partsCount));
1437 }
1438}
1439
1441 assert(semantics == rhs.semantics);
1443 assert(rhs.isFiniteNonZero());
1444
1445 int compare = exponent - rhs.exponent;
1446
1447 /* If exponents are equal, do an unsigned bignum comparison of the
1448 significands. */
1449 if (compare == 0)
1450 compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
1451 partCount());
1452
1453 if (compare > 0)
1454 return cmpGreaterThan;
1455 else if (compare < 0)
1456 return cmpLessThan;
1457 else
1458 return cmpEqual;
1459}
1460
1461/* Set the least significant BITS bits of a bignum, clear the
1462 rest. */
1463static void tcSetLeastSignificantBits(APInt::WordType *dst, unsigned parts,
1464 unsigned bits) {
1465 unsigned i = 0;
1466 while (bits > APInt::APINT_BITS_PER_WORD) {
1467 dst[i++] = ~(APInt::WordType)0;
1469 }
1470
1471 if (bits)
1472 dst[i++] = ~(APInt::WordType)0 >> (APInt::APINT_BITS_PER_WORD - bits);
1473
1474 while (i < parts)
1475 dst[i++] = 0;
1476}
1477
1478/* Handle overflow. Sign is preserved. We either become infinity or
1479 the largest finite number. */
1480APFloat::opStatus IEEEFloat::handleOverflow(roundingMode rounding_mode) {
1482 /* Infinity? */
1483 if (rounding_mode == rmNearestTiesToEven ||
1484 rounding_mode == rmNearestTiesToAway ||
1485 (rounding_mode == rmTowardPositive && !sign) ||
1486 (rounding_mode == rmTowardNegative && sign)) {
1488 makeNaN(false, sign);
1489 else
1490 category = fcInfinity;
1491 return static_cast<opStatus>(opOverflow | opInexact);
1492 }
1493 }
1494
1495 /* Otherwise we become the largest finite number. */
1496 category = fcNormal;
1497 exponent = semantics->maxExponent;
1498 tcSetLeastSignificantBits(significandParts(), partCount(),
1499 semantics->precision);
1500 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1501 semantics->nanEncoding == fltNanEncoding::AllOnes)
1502 APInt::tcClearBit(significandParts(), 0);
1503
1504 return opInexact;
1505}
1506
1507/* Returns TRUE if, when truncating the current number, with BIT the
1508 new LSB, with the given lost fraction and rounding mode, the result
1509 would need to be rounded away from zero (i.e., by increasing the
1510 signficand). This routine must work for fcZero of both signs, and
1511 fcNormal numbers. */
1512bool IEEEFloat::roundAwayFromZero(roundingMode rounding_mode,
1513 lostFraction lost_fraction,
1514 unsigned int bit) const {
1515 /* NaNs and infinities should not have lost fractions. */
1516 assert(isFiniteNonZero() || category == fcZero);
1517
1518 /* Current callers never pass this so we don't handle it. */
1519 assert(lost_fraction != lfExactlyZero);
1520
1521 switch (rounding_mode) {
1523 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
1524
1526 if (lost_fraction == lfMoreThanHalf)
1527 return true;
1528
1529 /* Our zeroes don't have a significand to test. */
1530 if (lost_fraction == lfExactlyHalf && category != fcZero)
1531 return APInt::tcExtractBit(significandParts(), bit);
1532
1533 return false;
1534
1535 case rmTowardZero:
1536 return false;
1537
1538 case rmTowardPositive:
1539 return !sign;
1540
1541 case rmTowardNegative:
1542 return sign;
1543
1544 default:
1545 break;
1546 }
1547 llvm_unreachable("Invalid rounding mode found");
1548}
1549
1550APFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode,
1551 lostFraction lost_fraction) {
1552 if (!isFiniteNonZero())
1553 return opOK;
1554
1555 /* Before rounding normalize the exponent of fcNormal numbers. */
1556 /* One, not zero, based MSB. */
1557 unsigned omsb = significandMSB() + 1;
1558
1559 // Only skip this `if` if the value is exactly zero.
1560 if (omsb || lost_fraction != lfExactlyZero) {
1561 /* OMSB is numbered from 1. We want to place it in the integer
1562 bit numbered PRECISION if possible, with a compensating change in
1563 the exponent. */
1564 int exponentChange = omsb - semantics->precision;
1565
1566 /* If the resulting exponent is too high, overflow according to
1567 the rounding mode. */
1568 if (exponent + exponentChange > semantics->maxExponent)
1569 return handleOverflow(rounding_mode);
1570
1571 /* Subnormal numbers have exponent minExponent, and their MSB
1572 is forced based on that. */
1573 if (exponent + exponentChange < semantics->minExponent)
1574 exponentChange = semantics->minExponent - exponent;
1575
1576 /* Shifting left is easy as we don't lose precision. */
1577 if (exponentChange < 0) {
1578 assert(lost_fraction == lfExactlyZero);
1579
1580 shiftSignificandLeft(-exponentChange);
1581
1582 return opOK;
1583 }
1584
1585 if (exponentChange > 0) {
1586 lostFraction lf;
1587
1588 /* Shift right and capture any new lost fraction. */
1589 lf = shiftSignificandRight(exponentChange);
1590
1591 lost_fraction = combineLostFractions(lf, lost_fraction);
1592
1593 /* Keep OMSB up-to-date. */
1594 if (omsb > (unsigned) exponentChange)
1595 omsb -= exponentChange;
1596 else
1597 omsb = 0;
1598 }
1599 }
1600
1601 // The all-ones values is an overflow if NaN is all ones. If NaN is
1602 // represented by negative zero, then it is a valid finite value.
1603 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1604 semantics->nanEncoding == fltNanEncoding::AllOnes &&
1605 exponent == semantics->maxExponent && isSignificandAllOnes())
1606 return handleOverflow(rounding_mode);
1607
1608 /* Now round the number according to rounding_mode given the lost
1609 fraction. */
1610
1611 /* As specified in IEEE 754, since we do not trap we do not report
1612 underflow for exact results. */
1613 if (lost_fraction == lfExactlyZero) {
1614 /* Canonicalize zeroes. */
1615 if (omsb == 0) {
1616 category = fcZero;
1617 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
1618 sign = false;
1619 if (!semantics->hasZero)
1621 }
1622
1623 return opOK;
1624 }
1625
1626 /* Increment the significand if we're rounding away from zero. */
1627 if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
1628 if (omsb == 0)
1629 exponent = semantics->minExponent;
1630
1631 incrementSignificand();
1632 omsb = significandMSB() + 1;
1633
1634 /* Did the significand increment overflow? */
1635 if (omsb == (unsigned) semantics->precision + 1) {
1636 /* Renormalize by incrementing the exponent and shifting our
1637 significand right one. However if we already have the
1638 maximum exponent we overflow to infinity. */
1639 if (exponent == semantics->maxExponent)
1640 // Invoke overflow handling with a rounding mode that will guarantee
1641 // that the result gets turned into the correct infinity representation.
1642 // This is needed instead of just setting the category to infinity to
1643 // account for 8-bit floating point types that have no inf, only NaN.
1644 return handleOverflow(sign ? rmTowardNegative : rmTowardPositive);
1645
1646 shiftSignificandRight(1);
1647
1648 return opInexact;
1649 }
1650
1651 // The all-ones values is an overflow if NaN is all ones. If NaN is
1652 // represented by negative zero, then it is a valid finite value.
1653 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1654 semantics->nanEncoding == fltNanEncoding::AllOnes &&
1655 exponent == semantics->maxExponent && isSignificandAllOnes())
1656 return handleOverflow(rounding_mode);
1657 }
1658
1659 /* The normal case - we were and are not denormal, and any
1660 significand increment above didn't overflow. */
1661 if (omsb == semantics->precision)
1662 return opInexact;
1663
1664 /* We have a non-zero denormal. */
1665 assert(omsb < semantics->precision);
1666
1667 /* Canonicalize zeroes. */
1668 if (omsb == 0) {
1669 category = fcZero;
1670 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
1671 sign = false;
1672 // This condition handles the case where the semantics
1673 // does not have zero but uses the all-zero encoding
1674 // to represent the smallest normal value.
1675 if (!semantics->hasZero)
1677 }
1678
1679 /* The fcZero case is a denormal that underflowed to zero. */
1680 return (opStatus) (opUnderflow | opInexact);
1681}
1682
1683APFloat::opStatus IEEEFloat::addOrSubtractSpecials(const IEEEFloat &rhs,
1684 bool subtract) {
1685 switch (PackCategoriesIntoKey(category, rhs.category)) {
1686 default:
1687 llvm_unreachable(nullptr);
1688
1692 assign(rhs);
1693 [[fallthrough]];
1698 if (isSignaling()) {
1699 makeQuiet();
1700 return opInvalidOp;
1701 }
1702 return rhs.isSignaling() ? opInvalidOp : opOK;
1703
1707 return opOK;
1708
1711 category = fcInfinity;
1712 sign = rhs.sign ^ subtract;
1713 return opOK;
1714
1716 assign(rhs);
1717 sign = rhs.sign ^ subtract;
1718 return opOK;
1719
1721 /* Sign depends on rounding mode; handled by caller. */
1722 return opOK;
1723
1725 /* Differently signed infinities can only be validly
1726 subtracted. */
1727 if (((sign ^ rhs.sign)!=0) != subtract) {
1728 makeNaN();
1729 return opInvalidOp;
1730 }
1731
1732 return opOK;
1733
1735 return opDivByZero;
1736 }
1737}
1738
1739/* Add or subtract two normal numbers. */
1740lostFraction IEEEFloat::addOrSubtractSignificand(const IEEEFloat &rhs,
1741 bool subtract) {
1742 [[maybe_unused]] integerPart carry = 0;
1743 lostFraction lost_fraction;
1744
1745 /* Determine if the operation on the absolute values is effectively
1746 an addition or subtraction. */
1747 subtract ^= static_cast<bool>(sign ^ rhs.sign);
1748
1749 /* Are we bigger exponent-wise than the RHS? */
1750 int bits = exponent - rhs.exponent;
1751
1752 /* Subtraction is more subtle than one might naively expect. */
1753 if (subtract) {
1754 if ((bits < 0) && !semantics->hasSignedRepr)
1756 "This floating point format does not support signed values");
1757
1758 IEEEFloat temp_rhs(rhs);
1759 bool lost_fraction_is_from_rhs = false;
1760
1761 if (bits == 0)
1762 lost_fraction = lfExactlyZero;
1763 else if (bits > 0) {
1764 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
1765 lost_fraction_is_from_rhs = true;
1766 shiftSignificandLeft(1);
1767 } else {
1768 lost_fraction = shiftSignificandRight(-bits - 1);
1769 temp_rhs.shiftSignificandLeft(1);
1770 }
1771
1772 // Should we reverse the subtraction.
1773 cmpResult cmp_result = compareAbsoluteValue(temp_rhs);
1774 if (cmp_result == cmpLessThan) {
1775 bool borrow =
1776 lost_fraction != lfExactlyZero && !lost_fraction_is_from_rhs;
1777 if (borrow) {
1778 // The lost fraction is being subtracted, borrow from the significand
1779 // and invert `lost_fraction`.
1780 if (lost_fraction == lfLessThanHalf)
1781 lost_fraction = lfMoreThanHalf;
1782 else if (lost_fraction == lfMoreThanHalf)
1783 lost_fraction = lfLessThanHalf;
1784 }
1785 carry = temp_rhs.subtractSignificand(*this, borrow);
1786 copySignificand(temp_rhs);
1787 sign = !sign;
1788 } else if (cmp_result == cmpGreaterThan) {
1789 bool borrow = lost_fraction != lfExactlyZero && lost_fraction_is_from_rhs;
1790 if (borrow) {
1791 // The lost fraction is being subtracted, borrow from the significand
1792 // and invert `lost_fraction`.
1793 if (lost_fraction == lfLessThanHalf)
1794 lost_fraction = lfMoreThanHalf;
1795 else if (lost_fraction == lfMoreThanHalf)
1796 lost_fraction = lfLessThanHalf;
1797 }
1798 carry = subtractSignificand(temp_rhs, borrow);
1799 } else { // cmpEqual
1800 zeroSignificand();
1801 if (lost_fraction != lfExactlyZero && lost_fraction_is_from_rhs) {
1802 // rhs is slightly larger due to the lost fraction, flip the sign.
1803 sign = !sign;
1804 }
1805 }
1806
1807 /* The code above is intended to ensure that no borrow is
1808 necessary. */
1809 assert(!carry);
1810 } else {
1811 if (bits > 0) {
1812 IEEEFloat temp_rhs(rhs);
1813
1814 lost_fraction = temp_rhs.shiftSignificandRight(bits);
1815 carry = addSignificand(temp_rhs);
1816 } else {
1817 lost_fraction = shiftSignificandRight(-bits);
1818 carry = addSignificand(rhs);
1819 }
1820
1821 /* We have a guard bit; generating a carry cannot happen. */
1822 assert(!carry);
1823 }
1824
1825 return lost_fraction;
1826}
1827
1828APFloat::opStatus IEEEFloat::multiplySpecials(const IEEEFloat &rhs) {
1829 switch (PackCategoriesIntoKey(category, rhs.category)) {
1830 default:
1831 llvm_unreachable(nullptr);
1832
1836 assign(rhs);
1837 sign = false;
1838 [[fallthrough]];
1843 sign ^= rhs.sign; // restore the original sign
1844 if (isSignaling()) {
1845 makeQuiet();
1846 return opInvalidOp;
1847 }
1848 return rhs.isSignaling() ? opInvalidOp : opOK;
1849
1853 category = fcInfinity;
1854 return opOK;
1855
1859 category = fcZero;
1860 return opOK;
1861
1864 makeNaN();
1865 return opInvalidOp;
1866
1868 return opOK;
1869 }
1870}
1871
1872APFloat::opStatus IEEEFloat::divideSpecials(const IEEEFloat &rhs) {
1873 switch (PackCategoriesIntoKey(category, rhs.category)) {
1874 default:
1875 llvm_unreachable(nullptr);
1876
1880 assign(rhs);
1881 sign = false;
1882 [[fallthrough]];
1887 sign ^= rhs.sign; // restore the original sign
1888 if (isSignaling()) {
1889 makeQuiet();
1890 return opInvalidOp;
1891 }
1892 return rhs.isSignaling() ? opInvalidOp : opOK;
1893
1898 return opOK;
1899
1901 category = fcZero;
1902 return opOK;
1903
1905 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly)
1906 makeNaN(false, sign);
1907 else
1908 category = fcInfinity;
1909 return opDivByZero;
1910
1913 makeNaN();
1914 return opInvalidOp;
1915
1917 return opOK;
1918 }
1919}
1920
1921APFloat::opStatus IEEEFloat::modSpecials(const IEEEFloat &rhs) {
1922 switch (PackCategoriesIntoKey(category, rhs.category)) {
1923 default:
1924 llvm_unreachable(nullptr);
1925
1929 assign(rhs);
1930 [[fallthrough]];
1935 if (isSignaling()) {
1936 makeQuiet();
1937 return opInvalidOp;
1938 }
1939 return rhs.isSignaling() ? opInvalidOp : opOK;
1940
1944 return opOK;
1945
1951 makeNaN();
1952 return opInvalidOp;
1953
1955 return opOK;
1956 }
1957}
1958
1959APFloat::opStatus IEEEFloat::remainderSpecials(const IEEEFloat &rhs) {
1960 switch (PackCategoriesIntoKey(category, rhs.category)) {
1961 default:
1962 llvm_unreachable(nullptr);
1963
1967 assign(rhs);
1968 [[fallthrough]];
1973 if (isSignaling()) {
1974 makeQuiet();
1975 return opInvalidOp;
1976 }
1977 return rhs.isSignaling() ? opInvalidOp : opOK;
1978
1982 return opOK;
1983
1989 makeNaN();
1990 return opInvalidOp;
1991
1993 return opDivByZero; // fake status, indicating this is not a special case
1994 }
1995}
1996
1997/* Change sign. */
1999 // With NaN-as-negative-zero, neither NaN or negative zero can change
2000 // their signs.
2001 if (semantics->nanEncoding == fltNanEncoding::NegativeZero &&
2002 (isZero() || isNaN()))
2003 return;
2004 /* Look mummy, this one's easy. */
2005 sign = !sign;
2006}
2007
2008/* Normalized addition or subtraction. */
2009APFloat::opStatus IEEEFloat::addOrSubtract(const IEEEFloat &rhs,
2010 roundingMode rounding_mode,
2011 bool subtract) {
2012 opStatus fs = addOrSubtractSpecials(rhs, subtract);
2013
2014 /* This return code means it was not a simple case. */
2015 if (fs == opDivByZero) {
2016 lostFraction lost_fraction;
2017
2018 lost_fraction = addOrSubtractSignificand(rhs, subtract);
2019 fs = normalize(rounding_mode, lost_fraction);
2020
2021 /* Can only be zero if we lost no fraction. */
2022 assert(category != fcZero || lost_fraction == lfExactlyZero);
2023 }
2024
2025 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
2026 positive zero unless rounding to minus infinity, except that
2027 adding two like-signed zeroes gives that zero. */
2028 if (category == fcZero) {
2029 if (rhs.category != fcZero || (sign == rhs.sign) == subtract)
2030 sign = (rounding_mode == rmTowardNegative);
2031 // NaN-in-negative-zero means zeros need to be normalized to +0.
2032 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2033 sign = false;
2034 }
2035
2036 return fs;
2037}
2038
2039/* Normalized addition. */
2041 roundingMode rounding_mode) {
2042 return addOrSubtract(rhs, rounding_mode, false);
2043}
2044
2045/* Normalized subtraction. */
2047 roundingMode rounding_mode) {
2048 return addOrSubtract(rhs, rounding_mode, true);
2049}
2050
2051/* Normalized multiply. */
2053 roundingMode rounding_mode) {
2054 sign ^= rhs.sign;
2055 opStatus fs = multiplySpecials(rhs);
2056
2057 if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero)
2058 sign = false;
2059 if (isFiniteNonZero()) {
2060 lostFraction lost_fraction = multiplySignificand(rhs);
2061 fs = normalize(rounding_mode, lost_fraction);
2062 if (lost_fraction != lfExactlyZero)
2063 fs = (opStatus) (fs | opInexact);
2064 }
2065
2066 return fs;
2067}
2068
2069/* Normalized divide. */
2071 roundingMode rounding_mode) {
2072 sign ^= rhs.sign;
2073 opStatus fs = divideSpecials(rhs);
2074
2075 if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero)
2076 sign = false;
2077 if (isFiniteNonZero()) {
2078 lostFraction lost_fraction = divideSignificand(rhs);
2079 fs = normalize(rounding_mode, lost_fraction);
2080 if (lost_fraction != lfExactlyZero)
2081 fs = (opStatus) (fs | opInexact);
2082 }
2083
2084 return fs;
2085}
2086
2087/* Normalized remainder. */
2089 unsigned int origSign = sign;
2090
2091 // First handle the special cases.
2092 opStatus fs = remainderSpecials(rhs);
2093 if (fs != opDivByZero)
2094 return fs;
2095
2096 fs = opOK;
2097
2098 // Make sure the current value is less than twice the denom. If the addition
2099 // did not succeed (an overflow has happened), which means that the finite
2100 // value we currently posses must be less than twice the denom (as we are
2101 // using the same semantics).
2102 IEEEFloat P2 = rhs;
2103 if (P2.add(rhs, rmNearestTiesToEven) == opOK) {
2104 fs = mod(P2);
2105 assert(fs == opOK);
2106 }
2107
2108 // Lets work with absolute numbers.
2109 IEEEFloat P = rhs;
2110 P.sign = false;
2111 sign = false;
2112
2113 //
2114 // To calculate the remainder we use the following scheme.
2115 //
2116 // The remainder is defained as follows:
2117 //
2118 // remainder = numer - rquot * denom = x - r * p
2119 //
2120 // Where r is the result of: x/p, rounded toward the nearest integral value
2121 // (with halfway cases rounded toward the even number).
2122 //
2123 // Currently, (after x mod 2p):
2124 // r is the number of 2p's present inside x, which is inherently, an even
2125 // number of p's.
2126 //
2127 // We may split the remaining calculation into 4 options:
2128 // - if x < 0.5p then we round to the nearest number with is 0, and are done.
2129 // - if x == 0.5p then we round to the nearest even number which is 0, and we
2130 // are done as well.
2131 // - if 0.5p < x < p then we round to nearest number which is 1, and we have
2132 // to subtract 1p at least once.
2133 // - if x >= p then we must subtract p at least once, as x must be a
2134 // remainder.
2135 //
2136 // By now, we were done, or we added 1 to r, which in turn, now an odd number.
2137 //
2138 // We can now split the remaining calculation to the following 3 options:
2139 // - if x < 0.5p then we round to the nearest number with is 0, and are done.
2140 // - if x == 0.5p then we round to the nearest even number. As r is odd, we
2141 // must round up to the next even number. so we must subtract p once more.
2142 // - if x > 0.5p (and inherently x < p) then we must round r up to the next
2143 // integral, and subtract p once more.
2144 //
2145
2146 // Extend the semantics to prevent an overflow/underflow or inexact result.
2147 bool losesInfo;
2148 fltSemantics extendedSemantics = *semantics;
2149 extendedSemantics.maxExponent++;
2150 extendedSemantics.minExponent--;
2151 extendedSemantics.precision += 2;
2152
2153 IEEEFloat VEx = *this;
2154 fs = VEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2155 assert(fs == opOK && !losesInfo);
2156 IEEEFloat PEx = P;
2157 fs = PEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2158 assert(fs == opOK && !losesInfo);
2159
2160 // It is simpler to work with 2x instead of 0.5p, and we do not need to lose
2161 // any fraction.
2162 fs = VEx.add(VEx, rmNearestTiesToEven);
2163 assert(fs == opOK);
2164
2165 if (VEx.compare(PEx) == cmpGreaterThan) {
2167 assert(fs == opOK);
2168
2169 // Make VEx = this.add(this), but because we have different semantics, we do
2170 // not want to `convert` again, so we just subtract PEx twice (which equals
2171 // to the desired value).
2172 fs = VEx.subtract(PEx, rmNearestTiesToEven);
2173 assert(fs == opOK);
2174 fs = VEx.subtract(PEx, rmNearestTiesToEven);
2175 assert(fs == opOK);
2176
2177 cmpResult result = VEx.compare(PEx);
2178 if (result == cmpGreaterThan || result == cmpEqual) {
2180 assert(fs == opOK);
2181 }
2182 }
2183
2184 if (isZero()) {
2185 sign = origSign; // IEEE754 requires this
2186 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2187 // But some 8-bit floats only have positive 0.
2188 sign = false;
2189 } else {
2190 sign ^= origSign;
2191 }
2192 return fs;
2193}
2194
2195/* Normalized llvm frem (C fmod). */
2197 opStatus fs = modSpecials(rhs);
2198 unsigned int origSign = sign;
2199
2200 while (isFiniteNonZero() && rhs.isFiniteNonZero() &&
2202 int Exp = ilogb(*this) - ilogb(rhs);
2203 IEEEFloat V = scalbn(rhs, Exp, rmNearestTiesToEven);
2204 // V can overflow to NaN with fltNonfiniteBehavior::NanOnly, so explicitly
2205 // check for it.
2206 if (V.isNaN() || compareAbsoluteValue(V) == cmpLessThan)
2207 V = scalbn(rhs, Exp - 1, rmNearestTiesToEven);
2208 V.sign = sign;
2209
2211
2212 // When the semantics supports zero, this loop's
2213 // exit-condition is handled by the 'isFiniteNonZero'
2214 // category check above. However, when the semantics
2215 // does not have 'fcZero' and we have reached the
2216 // minimum possible value, (and any further subtract
2217 // will underflow to the same value) explicitly
2218 // provide an exit-path here.
2219 if (!semantics->hasZero && this->isSmallest())
2220 break;
2221
2222 assert(fs==opOK);
2223 }
2224 if (isZero()) {
2225 sign = origSign; // fmod requires this
2226 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2227 sign = false;
2228 }
2229 return fs;
2230}
2231
2232/* Normalized fused-multiply-add. */
2234 const IEEEFloat &addend,
2235 roundingMode rounding_mode) {
2236 opStatus fs;
2237
2238 /* Post-multiplication sign, before addition. */
2239 sign ^= multiplicand.sign;
2240
2241 /* If and only if all arguments are normal do we need to do an
2242 extended-precision calculation. */
2243 if (isFiniteNonZero() &&
2244 multiplicand.isFiniteNonZero() &&
2245 addend.isFinite()) {
2246 lostFraction lost_fraction;
2247
2248 lost_fraction = multiplySignificand(multiplicand, addend);
2249 fs = normalize(rounding_mode, lost_fraction);
2250 if (lost_fraction != lfExactlyZero)
2251 fs = (opStatus) (fs | opInexact);
2252
2253 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
2254 positive zero unless rounding to minus infinity, except that
2255 adding two like-signed zeroes gives that zero. */
2256 if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign) {
2257 sign = (rounding_mode == rmTowardNegative);
2258 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2259 sign = false;
2260 }
2261 } else {
2262 fs = multiplySpecials(multiplicand);
2263
2264 /* FS can only be opOK or opInvalidOp. There is no more work
2265 to do in the latter case. The IEEE-754R standard says it is
2266 implementation-defined in this case whether, if ADDEND is a
2267 quiet NaN, we raise invalid op; this implementation does so.
2268
2269 If we need to do the addition we can do so with normal
2270 precision. */
2271 if (fs == opOK)
2272 fs = addOrSubtract(addend, rounding_mode, false);
2273 }
2274
2275 return fs;
2276}
2277
2278/* Rounding-mode correct round to integral value. */
2280 if (isInfinity())
2281 // [IEEE Std 754-2008 6.1]:
2282 // The behavior of infinity in floating-point arithmetic is derived from the
2283 // limiting cases of real arithmetic with operands of arbitrarily
2284 // large magnitude, when such a limit exists.
2285 // ...
2286 // Operations on infinite operands are usually exact and therefore signal no
2287 // exceptions ...
2288 return opOK;
2289
2290 if (isNaN()) {
2291 if (isSignaling()) {
2292 // [IEEE Std 754-2008 6.2]:
2293 // Under default exception handling, any operation signaling an invalid
2294 // operation exception and for which a floating-point result is to be
2295 // delivered shall deliver a quiet NaN.
2296 makeQuiet();
2297 // [IEEE Std 754-2008 6.2]:
2298 // Signaling NaNs shall be reserved operands that, under default exception
2299 // handling, signal the invalid operation exception(see 7.2) for every
2300 // general-computational and signaling-computational operation except for
2301 // the conversions described in 5.12.
2302 return opInvalidOp;
2303 } else {
2304 // [IEEE Std 754-2008 6.2]:
2305 // For an operation with quiet NaN inputs, other than maximum and minimum
2306 // operations, if a floating-point result is to be delivered the result
2307 // shall be a quiet NaN which should be one of the input NaNs.
2308 // ...
2309 // Every general-computational and quiet-computational operation involving
2310 // one or more input NaNs, none of them signaling, shall signal no
2311 // exception, except fusedMultiplyAdd might signal the invalid operation
2312 // exception(see 7.2).
2313 return opOK;
2314 }
2315 }
2316
2317 if (isZero()) {
2318 // [IEEE Std 754-2008 6.3]:
2319 // ... the sign of the result of conversions, the quantize operation, the
2320 // roundToIntegral operations, and the roundToIntegralExact(see 5.3.1) is
2321 // the sign of the first or only operand.
2322 return opOK;
2323 }
2324
2325 // If the exponent is large enough, we know that this value is already
2326 // integral, and the arithmetic below would potentially cause it to saturate
2327 // to +/-Inf. Bail out early instead.
2328 if (exponent + 1 >= (int)APFloat::semanticsPrecision(*semantics))
2329 return opOK;
2330
2331 // The algorithm here is quite simple: we add 2^(p-1), where p is the
2332 // precision of our format, and then subtract it back off again. The choice
2333 // of rounding modes for the addition/subtraction determines the rounding mode
2334 // for our integral rounding as well.
2335 // NOTE: When the input value is negative, we do subtraction followed by
2336 // addition instead.
2337 APInt IntegerConstant(NextPowerOf2(APFloat::semanticsPrecision(*semantics)),
2338 1);
2339 IntegerConstant <<= APFloat::semanticsPrecision(*semantics) - 1;
2340 IEEEFloat MagicConstant(*semantics);
2341 opStatus fs = MagicConstant.convertFromAPInt(IntegerConstant, false,
2343 assert(fs == opOK);
2344 MagicConstant.sign = sign;
2345
2346 // Preserve the input sign so that we can handle the case of zero result
2347 // correctly.
2348 bool inputSign = isNegative();
2349
2350 fs = add(MagicConstant, rounding_mode);
2351
2352 // Current value and 'MagicConstant' are both integers, so the result of the
2353 // subtraction is always exact according to Sterbenz' lemma.
2354 subtract(MagicConstant, rounding_mode);
2355
2356 // Restore the input sign.
2357 if (inputSign != isNegative())
2358 changeSign();
2359
2360 return fs;
2361}
2362
2363/* Comparison requires normalized numbers. */
2365 assert(semantics == rhs.semantics);
2366
2367 switch (PackCategoriesIntoKey(category, rhs.category)) {
2368 default:
2369 llvm_unreachable(nullptr);
2370
2378 return cmpUnordered;
2379
2383 if (sign)
2384 return cmpLessThan;
2385 else
2386 return cmpGreaterThan;
2387
2391 if (rhs.sign)
2392 return cmpGreaterThan;
2393 else
2394 return cmpLessThan;
2395
2397 if (sign == rhs.sign)
2398 return cmpEqual;
2399 else if (sign)
2400 return cmpLessThan;
2401 else
2402 return cmpGreaterThan;
2403
2405 return cmpEqual;
2406
2408 break;
2409 }
2410
2411 cmpResult result;
2412 /* Two normal numbers. Do they have the same sign? */
2413 if (sign != rhs.sign) {
2414 if (sign)
2415 result = cmpLessThan;
2416 else
2417 result = cmpGreaterThan;
2418 } else {
2419 /* Compare absolute values; invert result if negative. */
2420 result = compareAbsoluteValue(rhs);
2421
2422 if (sign) {
2423 if (result == cmpLessThan)
2424 result = cmpGreaterThan;
2425 else if (result == cmpGreaterThan)
2426 result = cmpLessThan;
2427 }
2428 }
2429
2430 return result;
2431}
2432
2433/// IEEEFloat::convert - convert a value of one floating point type to another.
2434/// The return value corresponds to the IEEE754 exceptions. *losesInfo
2435/// records whether the transformation lost information, i.e. whether
2436/// converting the result back to the original type will produce the
2437/// original value (this is almost the same as return value==fsOK, but there
2438/// are edge cases where this is not so).
2439
2441 roundingMode rounding_mode,
2442 bool *losesInfo) {
2443 opStatus fs;
2444 const fltSemantics &fromSemantics = *semantics;
2445 bool is_signaling = isSignaling();
2446
2448 unsigned newPartCount = partCountForBits(toSemantics.precision + 1);
2449 unsigned oldPartCount = partCount();
2450 int shift = toSemantics.precision - fromSemantics.precision;
2451
2452 bool X86SpecialNan = false;
2453 if (&fromSemantics == &APFloatBase::semX87DoubleExtended &&
2454 &toSemantics != &APFloatBase::semX87DoubleExtended && category == fcNaN &&
2455 (!(*significandParts() & 0x8000000000000000ULL) ||
2456 !(*significandParts() & 0x4000000000000000ULL))) {
2457 // x86 has some unusual NaNs which cannot be represented in any other
2458 // format; note them here.
2459 X86SpecialNan = true;
2460 }
2461
2462 // If this is a truncation of a denormal number, and the target semantics
2463 // has larger exponent range than the source semantics (this can happen
2464 // when truncating from PowerPC double-double to double format), the
2465 // right shift could lose result mantissa bits. Adjust exponent instead
2466 // of performing excessive shift.
2467 // Also do a similar trick in case shifting denormal would produce zero
2468 // significand as this case isn't handled correctly by normalize.
2469 if (shift < 0 && isFiniteNonZero()) {
2470 int omsb = significandMSB() + 1;
2471 int exponentChange = omsb - fromSemantics.precision;
2472 if (exponent + exponentChange < toSemantics.minExponent)
2473 exponentChange = toSemantics.minExponent - exponent;
2474 exponentChange = std::max(exponentChange, shift);
2475 if (exponentChange < 0) {
2476 shift -= exponentChange;
2477 exponent += exponentChange;
2478 } else if (omsb <= -shift) {
2479 exponentChange = omsb + shift - 1; // leave at least one bit set
2480 shift -= exponentChange;
2481 exponent += exponentChange;
2482 }
2483 }
2484
2485 // If this is a truncation, perform the shift before we narrow the storage.
2486 if (shift < 0 && (isFiniteNonZero() ||
2487 (category == fcNaN && semantics->nonFiniteBehavior !=
2489 lostFraction = shiftRight(significandParts(), oldPartCount, -shift);
2490
2491 // Fix the storage so it can hold to new value.
2492 if (newPartCount > oldPartCount) {
2493 // The new type requires more storage; make it available.
2494 integerPart *newParts;
2495 newParts = new integerPart[newPartCount];
2496 APInt::tcSet(newParts, 0, newPartCount);
2497 if (isFiniteNonZero() || category==fcNaN)
2498 APInt::tcAssign(newParts, significandParts(), oldPartCount);
2499 freeSignificand();
2500 significand.parts = newParts;
2501 } else if (newPartCount == 1 && oldPartCount != 1) {
2502 // Switch to built-in storage for a single part.
2503 integerPart newPart = 0;
2504 if (isFiniteNonZero() || category==fcNaN)
2505 newPart = significandParts()[0];
2506 freeSignificand();
2507 significand.part = newPart;
2508 }
2509
2510 // Now that we have the right storage, switch the semantics.
2511 semantics = &toSemantics;
2512
2513 // If this is an extension, perform the shift now that the storage is
2514 // available.
2515 if (shift > 0 && (isFiniteNonZero() || category==fcNaN))
2516 APInt::tcShiftLeft(significandParts(), newPartCount, shift);
2517
2518 if (isFiniteNonZero()) {
2519 fs = normalize(rounding_mode, lostFraction);
2520 *losesInfo = (fs != opOK);
2521 } else if (category == fcNaN) {
2522 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
2523 *losesInfo =
2525 makeNaN(false, sign);
2526 return is_signaling ? opInvalidOp : opOK;
2527 }
2528
2529 // If NaN is negative zero, we need to create a new NaN to avoid converting
2530 // NaN to -Inf.
2531 if (fromSemantics.nanEncoding == fltNanEncoding::NegativeZero &&
2532 semantics->nanEncoding != fltNanEncoding::NegativeZero)
2533 makeNaN(false, false);
2534
2535 *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan;
2536
2537 // For x87 extended precision, we want to make a NaN, not a special NaN if
2538 // the input wasn't special either.
2539 if (!X86SpecialNan && semantics == &APFloatBase::semX87DoubleExtended)
2540 APInt::tcSetBit(significandParts(), semantics->precision - 1);
2541
2542 // Convert of sNaN creates qNaN and raises an exception (invalid op).
2543 // This also guarantees that a sNaN does not become Inf on a truncation
2544 // that loses all payload bits.
2545 if (is_signaling) {
2546 makeQuiet();
2547 fs = opInvalidOp;
2548 } else {
2549 fs = opOK;
2550 }
2551 } else if (category == fcInfinity &&
2552 semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
2553 makeNaN(false, sign);
2554 *losesInfo = true;
2555 fs = opInexact;
2556 } else if (category == fcZero &&
2557 semantics->nanEncoding == fltNanEncoding::NegativeZero) {
2558 // Negative zero loses info, but positive zero doesn't.
2559 *losesInfo =
2560 fromSemantics.nanEncoding != fltNanEncoding::NegativeZero && sign;
2561 fs = *losesInfo ? opInexact : opOK;
2562 // NaN is negative zero means -0 -> +0, which can lose information
2563 sign = false;
2564 } else {
2565 *losesInfo = false;
2566 fs = opOK;
2567 }
2568
2569 if (category == fcZero && !semantics->hasZero)
2571 return fs;
2572}
2573
2574/* Convert a floating point number to an integer according to the
2575 rounding mode. If the rounded integer value is out of range this
2576 returns an invalid operation exception and the contents of the
2577 destination parts are unspecified. If the rounded value is in
2578 range but the floating point number is not the exact integer, the C
2579 standard doesn't require an inexact exception to be raised. IEEE
2580 854 does require it so we do that.
2581
2582 Note that for conversions to integer type the C standard requires
2583 round-to-zero to always be used. */
2584APFloat::opStatus IEEEFloat::convertToSignExtendedInteger(
2585 MutableArrayRef<integerPart> parts, unsigned int width, bool isSigned,
2586 roundingMode rounding_mode, bool *isExact) const {
2587 *isExact = false;
2588
2589 /* Handle the three special cases first. */
2590 if (category == fcInfinity || category == fcNaN)
2591 return opInvalidOp;
2592
2593 unsigned dstPartsCount = partCountForBits(width);
2594 assert(dstPartsCount <= parts.size() && "Integer too big");
2595
2596 if (category == fcZero) {
2597 APInt::tcSet(parts.data(), 0, dstPartsCount);
2598 // Negative zero can't be represented as an int.
2599 *isExact = !sign;
2600 return opOK;
2601 }
2602
2603 const integerPart *src = significandParts();
2604
2605 unsigned truncatedBits;
2606 /* Step 1: place our absolute value, with any fraction truncated, in
2607 the destination. */
2608 if (exponent < 0) {
2609 /* Our absolute value is less than one; truncate everything. */
2610 APInt::tcSet(parts.data(), 0, dstPartsCount);
2611 /* For exponent -1 the integer bit represents .5, look at that.
2612 For smaller exponents leftmost truncated bit is 0. */
2613 truncatedBits = semantics->precision -1U - exponent;
2614 } else {
2615 /* We want the most significant (exponent + 1) bits; the rest are
2616 truncated. */
2617 unsigned int bits = exponent + 1U;
2618
2619 /* Hopelessly large in magnitude? */
2620 if (bits > width)
2621 return opInvalidOp;
2622
2623 if (bits < semantics->precision) {
2624 /* We truncate (semantics->precision - bits) bits. */
2625 truncatedBits = semantics->precision - bits;
2626 APInt::tcExtract(parts.data(), dstPartsCount, src, bits, truncatedBits);
2627 } else {
2628 /* We want at least as many bits as are available. */
2629 APInt::tcExtract(parts.data(), dstPartsCount, src, semantics->precision,
2630 0);
2631 APInt::tcShiftLeft(parts.data(), dstPartsCount,
2632 bits - semantics->precision);
2633 truncatedBits = 0;
2634 }
2635 }
2636
2637 /* Step 2: work out any lost fraction, and increment the absolute
2638 value if we would round away from zero. */
2639 lostFraction lost_fraction;
2640 if (truncatedBits) {
2641 lost_fraction = lostFractionThroughTruncation(src, partCount(),
2642 truncatedBits);
2643 if (lost_fraction != lfExactlyZero &&
2644 roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) {
2645 if (APInt::tcIncrement(parts.data(), dstPartsCount))
2646 return opInvalidOp; /* Overflow. */
2647 }
2648 } else {
2649 lost_fraction = lfExactlyZero;
2650 }
2651
2652 /* Step 3: check if we fit in the destination. */
2653 unsigned int omsb = APInt::tcMSB(parts.data(), dstPartsCount) + 1;
2654
2655 if (sign) {
2656 if (!isSigned) {
2657 /* Negative numbers cannot be represented as unsigned. */
2658 if (omsb != 0)
2659 return opInvalidOp;
2660 } else {
2661 /* It takes omsb bits to represent the unsigned integer value.
2662 We lose a bit for the sign, but care is needed as the
2663 maximally negative integer is a special case. */
2664 if (omsb == width &&
2665 APInt::tcLSB(parts.data(), dstPartsCount) + 1 != omsb)
2666 return opInvalidOp;
2667
2668 /* This case can happen because of rounding. */
2669 if (omsb > width)
2670 return opInvalidOp;
2671 }
2672
2673 APInt::tcNegate (parts.data(), dstPartsCount);
2674 } else {
2675 if (omsb >= width + !isSigned)
2676 return opInvalidOp;
2677 }
2678
2679 if (lost_fraction == lfExactlyZero) {
2680 *isExact = true;
2681 return opOK;
2682 }
2683 return opInexact;
2684}
2685
2686/* Same as convertToSignExtendedInteger, except we provide
2687 deterministic values in case of an invalid operation exception,
2688 namely zero for NaNs and the minimal or maximal value respectively
2689 for underflow or overflow.
2690 The *isExact output tells whether the result is exact, in the sense
2691 that converting it back to the original floating point type produces
2692 the original value. This is almost equivalent to result==opOK,
2693 except for negative zeroes.
2694*/
2697 unsigned int width, bool isSigned,
2698 roundingMode rounding_mode, bool *isExact) const {
2699 opStatus fs = convertToSignExtendedInteger(parts, width, isSigned,
2700 rounding_mode, isExact);
2701
2702 if (fs == opInvalidOp) {
2703 unsigned int bits, dstPartsCount;
2704
2705 dstPartsCount = partCountForBits(width);
2706 assert(dstPartsCount <= parts.size() && "Integer too big");
2707
2708 if (category == fcNaN)
2709 bits = 0;
2710 else if (sign)
2711 bits = isSigned;
2712 else
2713 bits = width - isSigned;
2714
2715 tcSetLeastSignificantBits(parts.data(), dstPartsCount, bits);
2716 if (sign && isSigned)
2717 APInt::tcShiftLeft(parts.data(), dstPartsCount, width - 1);
2718 }
2719
2720 return fs;
2721}
2722
2723/* Convert an unsigned integer SRC to a floating point number,
2724 rounding according to ROUNDING_MODE. The sign of the floating
2725 point number is not modified. */
2726APFloat::opStatus IEEEFloat::convertFromUnsignedParts(
2727 const integerPart *src, unsigned int srcCount, roundingMode rounding_mode) {
2728 category = fcNormal;
2729 unsigned omsb = APInt::tcMSB(src, srcCount) + 1;
2730 integerPart *dst = significandParts();
2731 unsigned dstCount = partCount();
2732 unsigned precision = semantics->precision;
2733
2734 /* We want the most significant PRECISION bits of SRC. There may not
2735 be that many; extract what we can. */
2736 lostFraction lost_fraction;
2737 if (precision <= omsb) {
2738 exponent = omsb - 1;
2739 lost_fraction = lostFractionThroughTruncation(src, srcCount,
2740 omsb - precision);
2741 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);
2742 } else {
2743 exponent = precision - 1;
2744 lost_fraction = lfExactlyZero;
2745 APInt::tcExtract(dst, dstCount, src, omsb, 0);
2746 }
2747
2748 return normalize(rounding_mode, lost_fraction);
2749}
2750
2752 roundingMode rounding_mode) {
2753 unsigned int partCount = Val.getNumWords();
2754 APInt api = Val;
2755
2756 sign = false;
2757 if (isSigned && api.isNegative()) {
2758 sign = true;
2759 api = -api;
2760 }
2761
2762 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2763}
2764
2766IEEEFloat::convertFromHexadecimalString(StringRef s,
2767 roundingMode rounding_mode) {
2768 lostFraction lost_fraction = lfExactlyZero;
2769
2770 category = fcNormal;
2771 zeroSignificand();
2772 exponent = 0;
2773
2774 integerPart *significand = significandParts();
2775 unsigned partsCount = partCount();
2776 unsigned bitPos = partsCount * integerPartWidth;
2777 bool computedTrailingFraction = false;
2778
2779 // Skip leading zeroes and any (hexa)decimal point.
2780 StringRef::iterator begin = s.begin();
2781 StringRef::iterator end = s.end();
2783 auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot);
2784 if (!PtrOrErr)
2785 return PtrOrErr.takeError();
2786 StringRef::iterator p = *PtrOrErr;
2787 StringRef::iterator firstSignificantDigit = p;
2788
2789 while (p != end) {
2790 integerPart hex_value;
2791
2792 if (*p == '.') {
2793 if (dot != end)
2794 return createError("String contains multiple dots");
2795 dot = p++;
2796 continue;
2797 }
2798
2799 hex_value = hexDigitValue(*p);
2800 if (hex_value == UINT_MAX)
2801 break;
2802
2803 p++;
2804
2805 // Store the number while we have space.
2806 if (bitPos) {
2807 bitPos -= 4;
2808 hex_value <<= bitPos % integerPartWidth;
2809 significand[bitPos / integerPartWidth] |= hex_value;
2810 } else if (!computedTrailingFraction) {
2811 auto FractOrErr = trailingHexadecimalFraction(p, end, hex_value);
2812 if (!FractOrErr)
2813 return FractOrErr.takeError();
2814 lost_fraction = *FractOrErr;
2815 computedTrailingFraction = true;
2816 }
2817 }
2818
2819 /* Hex floats require an exponent but not a hexadecimal point. */
2820 if (p == end)
2821 return createError("Hex strings require an exponent");
2822 if (*p != 'p' && *p != 'P')
2823 return createError("Invalid character in significand");
2824 if (p == begin)
2825 return createError("Significand has no digits");
2826 if (dot != end && p - begin == 1)
2827 return createError("Significand has no digits");
2828
2829 /* Ignore the exponent if we are zero. */
2830 if (p != firstSignificantDigit) {
2831 int expAdjustment;
2832
2833 /* Implicit hexadecimal point? */
2834 if (dot == end)
2835 dot = p;
2836
2837 /* Calculate the exponent adjustment implicit in the number of
2838 significant digits. */
2839 expAdjustment = static_cast<int>(dot - firstSignificantDigit);
2840 if (expAdjustment < 0)
2841 expAdjustment++;
2842 expAdjustment = expAdjustment * 4 - 1;
2843
2844 /* Adjust for writing the significand starting at the most
2845 significant nibble. */
2846 expAdjustment += semantics->precision;
2847 expAdjustment -= partsCount * integerPartWidth;
2848
2849 /* Adjust for the given exponent. */
2850 auto ExpOrErr = totalExponent(p + 1, end, expAdjustment);
2851 if (!ExpOrErr)
2852 return ExpOrErr.takeError();
2853 exponent = *ExpOrErr;
2854 }
2855
2856 return normalize(rounding_mode, lost_fraction);
2857}
2858
2860IEEEFloat::roundSignificandWithExponent(const integerPart *decSigParts,
2861 unsigned sigPartCount, int exp,
2862 roundingMode rounding_mode) {
2863 fltSemantics calcSemantics = { 32767, -32767, 0, 0 };
2865
2866 bool isNearest = rounding_mode == rmNearestTiesToEven ||
2867 rounding_mode == rmNearestTiesToAway;
2868
2869 unsigned parts = partCountForBits(semantics->precision + 11);
2870
2871 /* Calculate pow(5, abs(exp)). */
2872 unsigned pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp : -exp);
2873
2874 for (;; parts *= 2) {
2875 unsigned int excessPrecision, truncatedBits;
2876
2877 calcSemantics.precision = parts * integerPartWidth - 1;
2878 excessPrecision = calcSemantics.precision - semantics->precision;
2879 truncatedBits = excessPrecision;
2880
2881 IEEEFloat decSig(calcSemantics, uninitialized);
2882 decSig.makeZero(sign);
2883 IEEEFloat pow5(calcSemantics);
2884
2885 opStatus sigStatus = decSig.convertFromUnsignedParts(
2886 decSigParts, sigPartCount, rmNearestTiesToEven);
2887 opStatus powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
2889 /* Add exp, as 10^n = 5^n * 2^n. */
2890 decSig.exponent += exp;
2891
2892 lostFraction calcLostFraction;
2893 integerPart HUerr, HUdistance;
2894 unsigned int powHUerr;
2895
2896 if (exp >= 0) {
2897 /* multiplySignificand leaves the precision-th bit set to 1. */
2898 calcLostFraction = decSig.multiplySignificand(pow5);
2899 powHUerr = powStatus != opOK;
2900 } else {
2901 calcLostFraction = decSig.divideSignificand(pow5);
2902 /* Denormal numbers have less precision. */
2903 if (decSig.exponent < semantics->minExponent) {
2904 excessPrecision += (semantics->minExponent - decSig.exponent);
2905 truncatedBits = excessPrecision;
2906 excessPrecision = std::min(excessPrecision, calcSemantics.precision);
2907 }
2908 /* Extra half-ulp lost in reciprocal of exponent. */
2909 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2;
2910 }
2911
2912 /* Both multiplySignificand and divideSignificand return the
2913 result with the integer bit set. */
2915 (decSig.significandParts(), calcSemantics.precision - 1) == 1);
2916
2917 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
2918 powHUerr);
2919 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
2920 excessPrecision, isNearest);
2921
2922 /* Are we guaranteed to round correctly if we truncate? */
2923 if (HUdistance >= HUerr) {
2924 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
2925 calcSemantics.precision - excessPrecision,
2926 excessPrecision);
2927 /* Take the exponent of decSig. If we tcExtract-ed less bits
2928 above we must adjust our exponent to compensate for the
2929 implicit right shift. */
2930 exponent = (decSig.exponent + semantics->precision
2931 - (calcSemantics.precision - excessPrecision));
2932 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
2933 decSig.partCount(),
2934 truncatedBits);
2935 return static_cast<opStatus>(normalize(rounding_mode, calcLostFraction) |
2936 ((sigStatus | powStatus) & opInexact));
2937 }
2938 }
2939}
2940
2941Expected<APFloat::opStatus>
2942IEEEFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) {
2943 decimalInfo D;
2944 opStatus fs;
2945
2946 /* Scan the text. */
2947 StringRef::iterator p = str.begin();
2948 if (Error Err = interpretDecimal(p, str.end(), &D))
2949 return std::move(Err);
2950
2951 /* Handle the quick cases. First the case of no significant digits,
2952 i.e. zero, and then exponents that are obviously too large or too
2953 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp
2954 definitely overflows if
2955
2956 (exp - 1) * L >= maxExponent
2957
2958 and definitely underflows to zero where
2959
2960 (exp + 1) * L <= minExponent - precision
2961
2962 With integer arithmetic the tightest bounds for L are
2963
2964 93/28 < L < 196/59 [ numerator <= 256 ]
2965 42039/12655 < L < 28738/8651 [ numerator <= 65536 ]
2966 */
2967
2968 // Test if we have a zero number allowing for strings with no null terminators
2969 // and zero decimals with non-zero exponents.
2970 //
2971 // We computed firstSigDigit by ignoring all zeros and dots. Thus if
2972 // D->firstSigDigit equals str.end(), every digit must be a zero and there can
2973 // be at most one dot. On the other hand, if we have a zero with a non-zero
2974 // exponent, then we know that D.firstSigDigit will be non-numeric.
2975 if (D.firstSigDigit == str.end() || decDigitValue(*D.firstSigDigit) >= 10U) {
2976 category = fcZero;
2977 fs = opOK;
2978 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2979 sign = false;
2980 if (!semantics->hasZero)
2982
2983 /* Check whether the normalized exponent is high enough to overflow
2984 max during the log-rebasing in the max-exponent check below. */
2985 } else if (D.normalizedExponent - 1 > INT_MAX / 42039) {
2986 fs = handleOverflow(rounding_mode);
2987
2988 /* If it wasn't, then it also wasn't high enough to overflow max
2989 during the log-rebasing in the min-exponent check. Check that it
2990 won't overflow min in either check, then perform the min-exponent
2991 check. */
2992 } else if (D.normalizedExponent - 1 < INT_MIN / 42039 ||
2993 (D.normalizedExponent + 1) * 28738 <=
2994 8651 * (semantics->minExponent - (int) semantics->precision)) {
2995 /* Underflow to zero and round. */
2996 category = fcNormal;
2997 zeroSignificand();
2998 fs = normalize(rounding_mode, lfLessThanHalf);
2999
3000 /* We can finally safely perform the max-exponent check. */
3001 } else if ((D.normalizedExponent - 1) * 42039
3002 >= 12655 * semantics->maxExponent) {
3003 /* Overflow and round. */
3004 fs = handleOverflow(rounding_mode);
3005 } else {
3006 integerPart *decSignificand;
3007 unsigned int partCount;
3008
3009 /* A tight upper bound on number of bits required to hold an
3010 N-digit decimal integer is N * 196 / 59. Allocate enough space
3011 to hold the full significand, and an extra part required by
3012 tcMultiplyPart. */
3013 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1;
3014 partCount = partCountForBits(1 + 196 * partCount / 59);
3015 decSignificand = new integerPart[partCount + 1];
3016 partCount = 0;
3017
3018 /* Convert to binary efficiently - we do almost all multiplication
3019 in an integerPart. When this would overflow do we do a single
3020 bignum multiplication, and then revert again to multiplication
3021 in an integerPart. */
3022 do {
3023 integerPart decValue, val, multiplier;
3024
3025 val = 0;
3026 multiplier = 1;
3027
3028 do {
3029 if (*p == '.') {
3030 p++;
3031 if (p == str.end()) {
3032 break;
3033 }
3034 }
3035 decValue = decDigitValue(*p++);
3036 if (decValue >= 10U) {
3037 delete[] decSignificand;
3038 return createError("Invalid character in significand");
3039 }
3040 multiplier *= 10;
3041 val = val * 10 + decValue;
3042 /* The maximum number that can be multiplied by ten with any
3043 digit added without overflowing an integerPart. */
3044 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);
3045
3046 /* Multiply out the current part. */
3047 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
3048 partCount, partCount + 1, false);
3049
3050 /* If we used another part (likely but not guaranteed), increase
3051 the count. */
3052 if (decSignificand[partCount])
3053 partCount++;
3054 } while (p <= D.lastSigDigit);
3055
3056 category = fcNormal;
3057 fs = roundSignificandWithExponent(decSignificand, partCount,
3058 D.exponent, rounding_mode);
3059
3060 delete [] decSignificand;
3061 }
3062
3063 return fs;
3064}
3065
3066bool IEEEFloat::convertFromStringSpecials(StringRef str) {
3067 const size_t MIN_NAME_SIZE = 3;
3068
3069 if (str.size() < MIN_NAME_SIZE)
3070 return false;
3071
3072 if (str == "inf" || str == "INFINITY" || str == "+Inf" || str == "+inf") {
3073 makeInf(false);
3074 return true;
3075 }
3076
3077 bool IsNegative = str.consume_front("-");
3078 if (IsNegative) {
3079 if (str.size() < MIN_NAME_SIZE)
3080 return false;
3081
3082 if (str == "inf" || str == "INFINITY" || str == "Inf") {
3083 makeInf(true);
3084 return true;
3085 }
3086 }
3087
3088 // If we have a 's' (or 'S') prefix, then this is a Signaling NaN.
3089 bool IsSignaling = str.consume_front_insensitive("s");
3090 if (IsSignaling) {
3091 if (str.size() < MIN_NAME_SIZE)
3092 return false;
3093 }
3094
3095 if (str.consume_front("nan") || str.consume_front("NaN")) {
3096 // A NaN without payload.
3097 if (str.empty()) {
3098 makeNaN(IsSignaling, IsNegative);
3099 return true;
3100 }
3101
3102 // Allow the payload to be inside parentheses.
3103 if (str.front() == '(') {
3104 // Parentheses should be balanced (and not empty).
3105 if (str.size() <= 2 || str.back() != ')')
3106 return false;
3107
3108 str = str.slice(1, str.size() - 1);
3109 }
3110
3111 // Determine the payload number's radix.
3112 unsigned Radix = 10;
3113 if (str[0] == '0') {
3114 if (str.size() > 1 && tolower(str[1]) == 'x') {
3115 str = str.drop_front(2);
3116 Radix = 16;
3117 } else {
3118 Radix = 8;
3119 }
3120 }
3121
3122 // Parse the payload and make the NaN.
3123 APInt Payload;
3124 if (!str.getAsInteger(Radix, Payload)) {
3125 makeNaN(IsSignaling, IsNegative, &Payload);
3126 return true;
3127 }
3128 }
3129
3130 return false;
3131}
3132
3133Expected<APFloat::opStatus>
3135 if (str.empty())
3136 return createError("Invalid string length");
3137
3138 // Handle special cases.
3139 if (convertFromStringSpecials(str))
3140 return opOK;
3141
3142 /* Handle a leading minus sign. */
3143 StringRef::iterator p = str.begin();
3144 size_t slen = str.size();
3145 sign = *p == '-' ? 1 : 0;
3146 if (sign && !semantics->hasSignedRepr)
3148 "This floating point format does not support signed values");
3149
3150 if (*p == '-' || *p == '+') {
3151 p++;
3152 slen--;
3153 if (!slen)
3154 return createError("String has no digits");
3155 }
3156
3157 if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
3158 if (slen == 2)
3159 return createError("Invalid string");
3160 return convertFromHexadecimalString(StringRef(p + 2, slen - 2),
3161 rounding_mode);
3162 }
3163
3164 return convertFromDecimalString(StringRef(p, slen), rounding_mode);
3165}
3166
3167/* Write out a hexadecimal representation of the floating point value
3168 to DST, which must be of sufficient size, in the C99 form
3169 [-]0xh.hhhhp[+-]d. Return the number of characters written,
3170 excluding the terminating NUL.
3171
3172 If UPPERCASE, the output is in upper case, otherwise in lower case.
3173
3174 HEXDIGITS digits appear altogether, rounding the value if
3175 necessary. If HEXDIGITS is 0, the minimal precision to display the
3176 number precisely is used instead. If nothing would appear after
3177 the decimal point it is suppressed.
3178
3179 The decimal exponent is always printed and has at least one digit.
3180 Zero values display an exponent of zero. Infinities and NaNs
3181 appear as "infinity" or "nan" respectively.
3182
3183 The above rules are as specified by C99. There is ambiguity about
3184 what the leading hexadecimal digit should be. This implementation
3185 uses whatever is necessary so that the exponent is displayed as
3186 stored. This implies the exponent will fall within the IEEE format
3187 range, and the leading hexadecimal digit will be 0 (for denormals),
3188 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
3189 any other digits zero).
3190*/
3191unsigned int IEEEFloat::convertToHexString(char *dst, unsigned int hexDigits,
3192 bool upperCase,
3193 roundingMode rounding_mode) const {
3194 char *p = dst;
3195 if (sign)
3196 *dst++ = '-';
3197
3198 switch (category) {
3199 case fcInfinity:
3200 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);
3201 dst += sizeof infinityL - 1;
3202 break;
3203
3204 case fcNaN:
3205 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);
3206 dst += sizeof NaNU - 1;
3207 break;
3208
3209 case fcZero:
3210 *dst++ = '0';
3211 *dst++ = upperCase ? 'X': 'x';
3212 *dst++ = '0';
3213 if (hexDigits > 1) {
3214 *dst++ = '.';
3215 memset (dst, '0', hexDigits - 1);
3216 dst += hexDigits - 1;
3217 }
3218 *dst++ = upperCase ? 'P': 'p';
3219 *dst++ = '0';
3220 break;
3221
3222 case fcNormal:
3223 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
3224 break;
3225 }
3226
3227 *dst = 0;
3228
3229 return static_cast<unsigned int>(dst - p);
3230}
3231
3232/* Does the hard work of outputting the correctly rounded hexadecimal
3233 form of a normal floating point number with the specified number of
3234 hexadecimal digits. If HEXDIGITS is zero the minimum number of
3235 digits necessary to print the value precisely is output. */
3236char *IEEEFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
3237 bool upperCase,
3238 roundingMode rounding_mode) const {
3239 *dst++ = '0';
3240 *dst++ = upperCase ? 'X': 'x';
3241
3242 bool roundUp = false;
3243 const char *hexDigitChars = upperCase ? hexDigitsUpper : hexDigitsLower;
3244
3245 const integerPart *significand = significandParts();
3246 unsigned partsCount = partCount();
3247
3248 /* +3 because the first digit only uses the single integer bit, so
3249 we have 3 virtual zero most-significant-bits. */
3250 unsigned valueBits = semantics->precision + 3;
3251 unsigned shift = integerPartWidth - valueBits % integerPartWidth;
3252
3253 /* The natural number of digits required ignoring trailing
3254 insignificant zeroes. */
3255 unsigned outputDigits = (valueBits - significandLSB() + 3) / 4;
3256
3257 /* hexDigits of zero means use the required number for the
3258 precision. Otherwise, see if we are truncating. If we are,
3259 find out if we need to round away from zero. */
3260 if (hexDigits) {
3261 if (hexDigits < outputDigits) {
3262 /* We are dropping non-zero bits, so need to check how to round.
3263 "bits" is the number of dropped bits. */
3264 unsigned int bits;
3265 lostFraction fraction;
3266
3267 bits = valueBits - hexDigits * 4;
3268 fraction = lostFractionThroughTruncation (significand, partsCount, bits);
3269 roundUp = roundAwayFromZero(rounding_mode, fraction, bits);
3270 }
3271 outputDigits = hexDigits;
3272 }
3273
3274 /* Write the digits consecutively, and start writing in the location
3275 of the hexadecimal point. We move the most significant digit
3276 left and add the hexadecimal point later. */
3277 char *p = ++dst;
3278
3279 unsigned count = (valueBits + integerPartWidth - 1) / integerPartWidth;
3280
3281 while (outputDigits && count) {
3282 integerPart part;
3283
3284 /* Put the most significant integerPartWidth bits in "part". */
3285 if (--count == partsCount)
3286 part = 0; /* An imaginary higher zero part. */
3287 else
3288 part = significand[count] << shift;
3289
3290 if (count && shift)
3291 part |= significand[count - 1] >> (integerPartWidth - shift);
3292
3293 /* Convert as much of "part" to hexdigits as we can. */
3294 unsigned int curDigits = integerPartWidth / 4;
3295
3296 curDigits = std::min(curDigits, outputDigits);
3297 dst += partAsHex (dst, part, curDigits, hexDigitChars);
3298 outputDigits -= curDigits;
3299 }
3300
3301 if (roundUp) {
3302 char *q = dst;
3303
3304 /* Note that hexDigitChars has a trailing '0'. */
3305 do {
3306 q--;
3307 *q = hexDigitChars[hexDigitValue (*q) + 1];
3308 } while (*q == '0');
3309 assert(q >= p);
3310 } else {
3311 /* Add trailing zeroes. */
3312 memset (dst, '0', outputDigits);
3313 dst += outputDigits;
3314 }
3315
3316 /* Move the most significant digit to before the point, and if there
3317 is something after the decimal point add it. This must come
3318 after rounding above. */
3319 p[-1] = p[0];
3320 if (dst -1 == p)
3321 dst--;
3322 else
3323 p[0] = '.';
3324
3325 /* Finally output the exponent. */
3326 *dst++ = upperCase ? 'P': 'p';
3327
3328 return writeSignedDecimal (dst, exponent);
3329}
3330
3332 if (!Arg.isFiniteNonZero())
3333 return hash_combine((uint8_t)Arg.category,
3334 // NaN has no sign, fix it at zero.
3335 Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign,
3336 Arg.semantics->precision);
3337
3338 // Normal floats need their exponent and significand hashed.
3339 return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign,
3340 Arg.semantics->precision, Arg.exponent,
3342 Arg.significandParts(),
3343 Arg.significandParts() + Arg.partCount()));
3344}
3345
3346// Conversion from APFloat to/from host float/double. It may eventually be
3347// possible to eliminate these and have everybody deal with APFloats, but that
3348// will take a while. This approach will not easily extend to long double.
3349// Current implementation requires integerPartWidth==64, which is correct at
3350// the moment but could be made more general.
3351
3352// Denormals have exponent minExponent in APFloat, but minExponent-1 in
3353// the actual IEEE respresentations. We compensate for that here.
3354
3355APInt IEEEFloat::convertF80LongDoubleAPFloatToAPInt() const {
3356 assert(semantics ==
3357 (const llvm::fltSemantics *)&APFloatBase::semX87DoubleExtended);
3358 assert(partCount()==2);
3359
3360 uint64_t myexponent, mysignificand;
3361
3362 if (isFiniteNonZero()) {
3363 myexponent = exponent+16383; //bias
3364 mysignificand = significandParts()[0];
3365 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
3366 myexponent = 0; // denormal
3367 } else if (category==fcZero) {
3368 myexponent = 0;
3369 mysignificand = 0;
3370 } else if (category==fcInfinity) {
3371 myexponent = 0x7fff;
3372 mysignificand = 0x8000000000000000ULL;
3373 } else {
3374 assert(category == fcNaN && "Unknown category");
3375 myexponent = 0x7fff;
3376 mysignificand = significandParts()[0];
3377 }
3378
3379 uint64_t words[2];
3380 words[0] = mysignificand;
3381 words[1] = ((uint64_t)(sign & 1) << 15) |
3382 (myexponent & 0x7fffLL);
3383 return APInt(80, words);
3384}
3385
3386APInt IEEEFloat::convertPPCDoubleDoubleLegacyAPFloatToAPInt() const {
3387 assert(semantics ==
3388 (const llvm::fltSemantics *)&APFloatBase::semPPCDoubleDoubleLegacy);
3389 assert(partCount()==2);
3390
3391 uint64_t words[2];
3392 bool losesInfo;
3393
3394 // Convert number to double. To avoid spurious underflows, we re-
3395 // normalize against the "double" minExponent first, and only *then*
3396 // truncate the mantissa. The result of that second conversion
3397 // may be inexact, but should never underflow.
3398 // Declare fltSemantics before APFloat that uses it (and
3399 // saves pointer to it) to ensure correct destruction order.
3400 fltSemantics extendedSemantics = *semantics;
3401 extendedSemantics.minExponent = APFloatBase::semIEEEdouble.minExponent;
3402 IEEEFloat extended(*this);
3403 [[maybe_unused]] opStatus fs =
3404 extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
3405 assert(fs == opOK && !losesInfo);
3406
3407 IEEEFloat u(extended);
3408 fs = u.convert(APFloatBase::semIEEEdouble, rmNearestTiesToEven, &losesInfo);
3409 assert(fs == opOK || fs == opInexact);
3410 words[0] = *u.convertDoubleAPFloatToAPInt().getRawData();
3411
3412 // If conversion was exact or resulted in a special case, we're done;
3413 // just set the second double to zero. Otherwise, re-convert back to
3414 // the extended format and compute the difference. This now should
3415 // convert exactly to double.
3416 if (u.isFiniteNonZero() && losesInfo) {
3417 fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
3418 assert(fs == opOK && !losesInfo);
3419
3420 IEEEFloat v(extended);
3421 v.subtract(u, rmNearestTiesToEven);
3422 fs = v.convert(APFloatBase::semIEEEdouble, rmNearestTiesToEven, &losesInfo);
3423 assert(fs == opOK && !losesInfo);
3424 words[1] = *v.convertDoubleAPFloatToAPInt().getRawData();
3425 } else {
3426 words[1] = 0;
3427 }
3428
3429 return APInt(128, words);
3430}
3431
3432template <const fltSemantics &S>
3433APInt IEEEFloat::convertIEEEFloatToAPInt() const {
3434 assert(semantics == &S);
3435 constexpr unsigned int trailing_significand_bits = S.precision - 1;
3436 constexpr int integer_bit_part = trailing_significand_bits / integerPartWidth;
3437 constexpr integerPart integer_bit =
3438 integerPart{1} << (trailing_significand_bits % integerPartWidth);
3439 constexpr uint64_t significand_mask = integer_bit - 1;
3440 constexpr unsigned int exponent_bits =
3441 S.sizeInBits - (S.hasSignedRepr ? 1 : 0) - trailing_significand_bits;
3442 static_assert(exponent_bits < 64);
3443 constexpr uint64_t exponent_mask = (uint64_t{1} << exponent_bits) - 1;
3444 constexpr bool is_zero_exp_reserved = S.hasDenormals || S.hasZero;
3445 constexpr int bias = -(S.minExponent - (is_zero_exp_reserved ? 1 : 0));
3446
3447 uint64_t myexponent;
3448 std::array<integerPart, partCountForBits(trailing_significand_bits)>
3449 mysignificand;
3450
3451 if (isFiniteNonZero()) {
3452 myexponent = exponent + bias;
3453 std::copy_n(significandParts(), mysignificand.size(),
3454 mysignificand.begin());
3455 if (myexponent == 1 &&
3456 !(significandParts()[integer_bit_part] & integer_bit))
3457 myexponent = 0; // denormal
3458 } else if (category == fcZero) {
3459 if (!S.hasZero)
3460 llvm_unreachable("semantics does not support zero!");
3461 myexponent = ::exponentZero(S) + bias;
3462 mysignificand.fill(0);
3463 } else if (category == fcInfinity) {
3464 if (S.nonFiniteBehavior == fltNonfiniteBehavior::NanOnly ||
3465 S.nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
3466 llvm_unreachable("semantics don't support inf!");
3467 myexponent = ::exponentInf(S) + bias;
3468 mysignificand.fill(0);
3469 } else {
3470 assert(category == fcNaN && "Unknown category!");
3471 if (S.nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
3472 llvm_unreachable("semantics don't support NaN!");
3473 myexponent = ::exponentNaN(S) + bias;
3474 std::copy_n(significandParts(), mysignificand.size(),
3475 mysignificand.begin());
3476 }
3477 std::array<uint64_t, (S.sizeInBits + 63) / 64> words;
3478 auto words_iter =
3479 std::copy_n(mysignificand.begin(), mysignificand.size(), words.begin());
3480 if constexpr (significand_mask != 0 || trailing_significand_bits == 0) {
3481 // Clear the integer bit.
3482 words[mysignificand.size() - 1] &= significand_mask;
3483 }
3484 std::fill(words_iter, words.end(), uint64_t{0});
3485 constexpr size_t last_word = words.size() - 1;
3486 uint64_t shifted_sign = static_cast<uint64_t>(sign & 1)
3487 << ((S.sizeInBits - 1) % 64);
3488 words[last_word] |= shifted_sign;
3489 uint64_t shifted_exponent = (myexponent & exponent_mask)
3490 << (trailing_significand_bits % 64);
3491 words[last_word] |= shifted_exponent;
3492 if constexpr (last_word == 0) {
3493 return APInt(S.sizeInBits, words[0]);
3494 }
3495 return APInt(S.sizeInBits, words);
3496}
3497
3498APInt IEEEFloat::convertQuadrupleAPFloatToAPInt() const {
3499 assert(partCount() == 2);
3500 return convertIEEEFloatToAPInt<APFloatBase::semIEEEquad>();
3501}
3502
3503APInt IEEEFloat::convertDoubleAPFloatToAPInt() const {
3504 assert(partCount()==1);
3505 return convertIEEEFloatToAPInt<APFloatBase::semIEEEdouble>();
3506}
3507
3508APInt IEEEFloat::convertFloatAPFloatToAPInt() const {
3509 assert(partCount()==1);
3510 return convertIEEEFloatToAPInt<APFloatBase::semIEEEsingle>();
3511}
3512
3513APInt IEEEFloat::convertBFloatAPFloatToAPInt() const {
3514 assert(partCount() == 1);
3515 return convertIEEEFloatToAPInt<APFloatBase::semBFloat>();
3516}
3517
3518APInt IEEEFloat::convertHalfAPFloatToAPInt() const {
3519 assert(partCount()==1);
3520 return convertIEEEFloatToAPInt<APFloatBase::APFloatBase::semIEEEhalf>();
3521}
3522
3523APInt IEEEFloat::convertFloat8E5M2APFloatToAPInt() const {
3524 assert(partCount() == 1);
3525 return convertIEEEFloatToAPInt<APFloatBase::semFloat8E5M2>();
3526}
3527
3528APInt IEEEFloat::convertFloat8E5M2FNUZAPFloatToAPInt() const {
3529 assert(partCount() == 1);
3530 return convertIEEEFloatToAPInt<APFloatBase::semFloat8E5M2FNUZ>();
3531}
3532
3533APInt IEEEFloat::convertFloat8E4M3APFloatToAPInt() const {
3534 assert(partCount() == 1);
3535 return convertIEEEFloatToAPInt<APFloatBase::semFloat8E4M3>();
3536}
3537
3538APInt IEEEFloat::convertFloat8E4M3FNAPFloatToAPInt() const {
3539 assert(partCount() == 1);
3540 return convertIEEEFloatToAPInt<APFloatBase::semFloat8E4M3FN>();
3541}
3542
3543APInt IEEEFloat::convertFloat8E4M3FNUZAPFloatToAPInt() const {
3544 assert(partCount() == 1);
3545 return convertIEEEFloatToAPInt<APFloatBase::semFloat8E4M3FNUZ>();
3546}
3547
3548APInt IEEEFloat::convertFloat8E4M3B11FNUZAPFloatToAPInt() const {
3549 assert(partCount() == 1);
3550 return convertIEEEFloatToAPInt<APFloatBase::semFloat8E4M3B11FNUZ>();
3551}
3552
3553APInt IEEEFloat::convertFloat8E3M4APFloatToAPInt() const {
3554 assert(partCount() == 1);
3555 return convertIEEEFloatToAPInt<APFloatBase::semFloat8E3M4>();
3556}
3557
3558APInt IEEEFloat::convertFloatTF32APFloatToAPInt() const {
3559 assert(partCount() == 1);
3560 return convertIEEEFloatToAPInt<APFloatBase::semFloatTF32>();
3561}
3562
3563APInt IEEEFloat::convertFloat8E8M0FNUAPFloatToAPInt() const {
3564 assert(partCount() == 1);
3565 return convertIEEEFloatToAPInt<APFloatBase::semFloat8E8M0FNU>();
3566}
3567
3568APInt IEEEFloat::convertFloat6E3M2FNAPFloatToAPInt() const {
3569 assert(partCount() == 1);
3570 return convertIEEEFloatToAPInt<APFloatBase::semFloat6E3M2FN>();
3571}
3572
3573APInt IEEEFloat::convertFloat6E2M3FNAPFloatToAPInt() const {
3574 assert(partCount() == 1);
3575 return convertIEEEFloatToAPInt<APFloatBase::semFloat6E2M3FN>();
3576}
3577
3578APInt IEEEFloat::convertFloat4E2M1FNAPFloatToAPInt() const {
3579 assert(partCount() == 1);
3580 return convertIEEEFloatToAPInt<APFloatBase::semFloat4E2M1FN>();
3581}
3582
3583// This function creates an APInt that is just a bit map of the floating
3584// point constant as it would appear in memory. It is not a conversion,
3585// and treating the result as a normal integer is unlikely to be useful.
3586
3588 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEhalf)
3589 return convertHalfAPFloatToAPInt();
3590
3591 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semBFloat)
3592 return convertBFloatAPFloatToAPInt();
3593
3594 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEsingle)
3595 return convertFloatAPFloatToAPInt();
3596
3597 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEdouble)
3598 return convertDoubleAPFloatToAPInt();
3599
3600 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEquad)
3601 return convertQuadrupleAPFloatToAPInt();
3602
3603 if (semantics ==
3604 (const llvm::fltSemantics *)&APFloatBase::semPPCDoubleDoubleLegacy)
3605 return convertPPCDoubleDoubleLegacyAPFloatToAPInt();
3606
3607 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E5M2)
3608 return convertFloat8E5M2APFloatToAPInt();
3609
3610 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E5M2FNUZ)
3611 return convertFloat8E5M2FNUZAPFloatToAPInt();
3612
3613 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E4M3)
3614 return convertFloat8E4M3APFloatToAPInt();
3615
3616 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E4M3FN)
3617 return convertFloat8E4M3FNAPFloatToAPInt();
3618
3619 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E4M3FNUZ)
3620 return convertFloat8E4M3FNUZAPFloatToAPInt();
3621
3622 if (semantics ==
3623 (const llvm::fltSemantics *)&APFloatBase::semFloat8E4M3B11FNUZ)
3624 return convertFloat8E4M3B11FNUZAPFloatToAPInt();
3625
3626 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E3M4)
3627 return convertFloat8E3M4APFloatToAPInt();
3628
3629 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloatTF32)
3630 return convertFloatTF32APFloatToAPInt();
3631
3632 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E8M0FNU)
3633 return convertFloat8E8M0FNUAPFloatToAPInt();
3634
3635 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat6E3M2FN)
3636 return convertFloat6E3M2FNAPFloatToAPInt();
3637
3638 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat6E2M3FN)
3639 return convertFloat6E2M3FNAPFloatToAPInt();
3640
3641 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat4E2M1FN)
3642 return convertFloat4E2M1FNAPFloatToAPInt();
3643
3644 assert(semantics ==
3645 (const llvm::fltSemantics *)&APFloatBase::semX87DoubleExtended &&
3646 "unknown format!");
3647 return convertF80LongDoubleAPFloatToAPInt();
3648}
3649
3651 assert(semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEsingle &&
3652 "Float semantics are not IEEEsingle");
3653 APInt api = bitcastToAPInt();
3654 return api.bitsToFloat();
3655}
3656
3658 assert(semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEdouble &&
3659 "Float semantics are not IEEEdouble");
3660 APInt api = bitcastToAPInt();
3661 return api.bitsToDouble();
3662}
3663
3664#ifdef HAS_IEE754_FLOAT128
3665float128 IEEEFloat::convertToQuad() const {
3666 assert(semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEquad &&
3667 "Float semantics are not IEEEquads");
3668 APInt api = bitcastToAPInt();
3669 return api.bitsToQuad();
3670}
3671#endif
3672
3673/// Integer bit is explicit in this format. Intel hardware (387 and later)
3674/// does not support these bit patterns:
3675/// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
3676/// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
3677/// exponent!=0 nor all 1's, integer bit 0 ("unnormal")
3678/// exponent = 0, integer bit 1 ("pseudodenormal")
3679/// At the moment, the first three are treated as NaNs, the last one as Normal.
3680void IEEEFloat::initFromF80LongDoubleAPInt(const APInt &api) {
3681 uint64_t i1 = api.getRawData()[0];
3682 uint64_t i2 = api.getRawData()[1];
3683 uint64_t myexponent = (i2 & 0x7fff);
3684 uint64_t mysignificand = i1;
3685 uint8_t myintegerbit = mysignificand >> 63;
3686
3687 initialize(&APFloatBase::semX87DoubleExtended);
3688 assert(partCount()==2);
3689
3690 sign = static_cast<unsigned int>(i2>>15);
3691 if (myexponent == 0 && mysignificand == 0) {
3692 makeZero(sign);
3693 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
3694 makeInf(sign);
3695 } else if ((myexponent == 0x7fff && mysignificand != 0x8000000000000000ULL) ||
3696 (myexponent != 0x7fff && myexponent != 0 && myintegerbit == 0)) {
3697 category = fcNaN;
3698 exponent = exponentNaN();
3699 significandParts()[0] = mysignificand;
3700 significandParts()[1] = 0;
3701 } else {
3702 category = fcNormal;
3703 exponent = myexponent - 16383;
3704 significandParts()[0] = mysignificand;
3705 significandParts()[1] = 0;
3706 if (myexponent==0) // denormal
3707 exponent = -16382;
3708 }
3709}
3710
3711void IEEEFloat::initFromPPCDoubleDoubleLegacyAPInt(const APInt &api) {
3712 uint64_t i1 = api.getRawData()[0];
3713 uint64_t i2 = api.getRawData()[1];
3714 bool losesInfo;
3715
3716 // Get the first double and convert to our format.
3717 initFromDoubleAPInt(APInt(64, i1));
3718 [[maybe_unused]] opStatus fs = convert(APFloatBase::semPPCDoubleDoubleLegacy,
3719 rmNearestTiesToEven, &losesInfo);
3720 // (convert may return opInvalidOp if i1 is an sNaN).
3721 assert((fs == opOK || fs == opInvalidOp) && !losesInfo);
3722
3723 // Unless we have a special case, add in second double.
3724 if (isFiniteNonZero()) {
3725 IEEEFloat v(APFloatBase::semIEEEdouble, APInt(64, i2));
3726 fs = v.convert(APFloatBase::semPPCDoubleDoubleLegacy, rmNearestTiesToEven,
3727 &losesInfo);
3728 assert(fs == opOK && !losesInfo);
3729
3731 }
3732}
3733
3734// The E8M0 format has the following characteristics:
3735// It is an 8-bit unsigned format with only exponents (no actual significand).
3736// No encodings for {zero, infinities or denorms}.
3737// NaN is represented by all 1's.
3738// Bias is 127.
3739void IEEEFloat::initFromFloat8E8M0FNUAPInt(const APInt &api) {
3740 initFromIEEEAPInt<APFloatBase::semFloat8E8M0FNU>(api);
3741}
3742
3743template <const fltSemantics &S>
3744void IEEEFloat::initFromIEEEAPInt(const APInt &api) {
3745 assert(api.getBitWidth() == S.sizeInBits);
3746
3747 constexpr unsigned int trailing_significand_bits = S.precision - 1;
3748 constexpr integerPart integer_bit =
3749 integerPart{1} << (trailing_significand_bits % integerPartWidth);
3750 constexpr uint64_t significand_mask = integer_bit - 1;
3751 constexpr unsigned int exponent_bits =
3752 S.sizeInBits - (S.hasSignedRepr ? 1 : 0) - trailing_significand_bits;
3753 constexpr unsigned int stored_significand_parts =
3754 partCountForBits(trailing_significand_bits);
3755 static_assert(exponent_bits < 64);
3756 constexpr uint64_t exponent_mask = (uint64_t{1} << exponent_bits) - 1;
3757 constexpr bool is_zero_exp_reserved = S.hasDenormals || S.hasZero;
3758 constexpr int bias = -(S.minExponent - (is_zero_exp_reserved ? 1 : 0));
3759 constexpr bool has_significand = trailing_significand_bits > 0;
3760
3761 // Copy the bits of the significand. We need to clear out the exponent and
3762 // sign bit in the last word.
3763 std::array<integerPart, stored_significand_parts> mysignificand;
3764 if constexpr (has_significand) {
3765 std::copy_n(api.getRawData(), mysignificand.size(), mysignificand.begin());
3766 if constexpr (significand_mask != 0) {
3767 mysignificand[mysignificand.size() - 1] &= significand_mask;
3768 }
3769 } else {
3770 std::fill_n(mysignificand.begin(), mysignificand.size(), 0);
3771 // Always set integer bit to 1 for consistency in APFloat's internal
3772 // representation.
3773 mysignificand[0] = 1;
3774 }
3775
3776 // We assume the last word holds the sign bit, the exponent, and potentially
3777 // some of the trailing significand field.
3778 uint64_t last_word = api.getRawData()[api.getNumWords() - 1];
3779 uint64_t myexponent =
3780 (last_word >> (trailing_significand_bits % 64)) & exponent_mask;
3781
3782 initialize(&S);
3783 assert(partCount() == mysignificand.size());
3784
3785 sign = S.hasSignedRepr
3786 ? static_cast<unsigned int>(last_word >> ((S.sizeInBits - 1) % 64))
3787 : 0;
3788
3789 bool all_zero_significand =
3790 has_significand && llvm::all_of(mysignificand, equal_to(0));
3791
3792 bool is_zero = myexponent == 0 && all_zero_significand && S.hasZero;
3793
3794 if constexpr (S.nonFiniteBehavior == fltNonfiniteBehavior::IEEE754) {
3795 if (myexponent - bias == ::exponentInf(S) && all_zero_significand) {
3796 makeInf(sign);
3797 return;
3798 }
3799 }
3800
3801 bool is_nan = false;
3802
3803 if constexpr (S.nanEncoding == fltNanEncoding::IEEE) {
3804 is_nan = myexponent - bias == ::exponentNaN(S) && !all_zero_significand;
3805 } else if constexpr (S.nanEncoding == fltNanEncoding::AllOnes) {
3806 bool all_ones_significand =
3807 std::all_of(mysignificand.begin(), mysignificand.end() - 1,
3808 [](integerPart bits) { return bits == ~integerPart{0}; }) &&
3809 (!significand_mask ||
3810 mysignificand[mysignificand.size() - 1] == significand_mask);
3811 is_nan = myexponent - bias == ::exponentNaN(S) && all_ones_significand;
3812 } else if constexpr (S.nanEncoding == fltNanEncoding::NegativeZero) {
3813 is_nan = is_zero && sign;
3814 }
3815
3816 if (is_nan) {
3817 category = fcNaN;
3818 exponent = ::exponentNaN(S);
3819 std::copy_n(mysignificand.begin(), mysignificand.size(),
3820 significandParts());
3821 return;
3822 }
3823
3824 if (is_zero) {
3825 makeZero(sign);
3826 return;
3827 }
3828
3829 category = fcNormal;
3830 exponent = myexponent - bias;
3831 std::copy_n(mysignificand.begin(), mysignificand.size(), significandParts());
3832 if (myexponent == 0 && S.hasDenormals) // denormal
3833 exponent = S.minExponent;
3834 else
3835 significandParts()[mysignificand.size()-1] |= integer_bit; // integer bit
3836}
3837
3838void IEEEFloat::initFromQuadrupleAPInt(const APInt &api) {
3839 initFromIEEEAPInt<APFloatBase::semIEEEquad>(api);
3840}
3841
3842void IEEEFloat::initFromDoubleAPInt(const APInt &api) {
3843 initFromIEEEAPInt<APFloatBase::semIEEEdouble>(api);
3844}
3845
3846void IEEEFloat::initFromFloatAPInt(const APInt &api) {
3847 initFromIEEEAPInt<APFloatBase::semIEEEsingle>(api);
3848}
3849
3850void IEEEFloat::initFromBFloatAPInt(const APInt &api) {
3851 initFromIEEEAPInt<APFloatBase::semBFloat>(api);
3852}
3853
3854void IEEEFloat::initFromHalfAPInt(const APInt &api) {
3855 initFromIEEEAPInt<APFloatBase::semIEEEhalf>(api);
3856}
3857
3858void IEEEFloat::initFromFloat8E5M2APInt(const APInt &api) {
3859 initFromIEEEAPInt<APFloatBase::semFloat8E5M2>(api);
3860}
3861
3862void IEEEFloat::initFromFloat8E5M2FNUZAPInt(const APInt &api) {
3863 initFromIEEEAPInt<APFloatBase::semFloat8E5M2FNUZ>(api);
3864}
3865
3866void IEEEFloat::initFromFloat8E4M3APInt(const APInt &api) {
3867 initFromIEEEAPInt<APFloatBase::semFloat8E4M3>(api);
3868}
3869
3870void IEEEFloat::initFromFloat8E4M3FNAPInt(const APInt &api) {
3871 initFromIEEEAPInt<APFloatBase::semFloat8E4M3FN>(api);
3872}
3873
3874void IEEEFloat::initFromFloat8E4M3FNUZAPInt(const APInt &api) {
3875 initFromIEEEAPInt<APFloatBase::semFloat8E4M3FNUZ>(api);
3876}
3877
3878void IEEEFloat::initFromFloat8E4M3B11FNUZAPInt(const APInt &api) {
3879 initFromIEEEAPInt<APFloatBase::semFloat8E4M3B11FNUZ>(api);
3880}
3881
3882void IEEEFloat::initFromFloat8E3M4APInt(const APInt &api) {
3883 initFromIEEEAPInt<APFloatBase::semFloat8E3M4>(api);
3884}
3885
3886void IEEEFloat::initFromFloatTF32APInt(const APInt &api) {
3887 initFromIEEEAPInt<APFloatBase::semFloatTF32>(api);
3888}
3889
3890void IEEEFloat::initFromFloat6E3M2FNAPInt(const APInt &api) {
3891 initFromIEEEAPInt<APFloatBase::semFloat6E3M2FN>(api);
3892}
3893
3894void IEEEFloat::initFromFloat6E2M3FNAPInt(const APInt &api) {
3895 initFromIEEEAPInt<APFloatBase::semFloat6E2M3FN>(api);
3896}
3897
3898void IEEEFloat::initFromFloat4E2M1FNAPInt(const APInt &api) {
3899 initFromIEEEAPInt<APFloatBase::semFloat4E2M1FN>(api);
3900}
3901
3902/// Treat api as containing the bits of a floating point number.
3903void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) {
3904 assert(api.getBitWidth() == Sem->sizeInBits);
3905 if (Sem == &APFloatBase::semIEEEhalf)
3906 return initFromHalfAPInt(api);
3907 if (Sem == &APFloatBase::semBFloat)
3908 return initFromBFloatAPInt(api);
3909 if (Sem == &APFloatBase::semIEEEsingle)
3910 return initFromFloatAPInt(api);
3911 if (Sem == &APFloatBase::semIEEEdouble)
3912 return initFromDoubleAPInt(api);
3913 if (Sem == &APFloatBase::semX87DoubleExtended)
3914 return initFromF80LongDoubleAPInt(api);
3915 if (Sem == &APFloatBase::semIEEEquad)
3916 return initFromQuadrupleAPInt(api);
3917 if (Sem == &APFloatBase::semPPCDoubleDoubleLegacy)
3918 return initFromPPCDoubleDoubleLegacyAPInt(api);
3919 if (Sem == &APFloatBase::semFloat8E5M2)
3920 return initFromFloat8E5M2APInt(api);
3921 if (Sem == &APFloatBase::semFloat8E5M2FNUZ)
3922 return initFromFloat8E5M2FNUZAPInt(api);
3923 if (Sem == &APFloatBase::semFloat8E4M3)
3924 return initFromFloat8E4M3APInt(api);
3925 if (Sem == &APFloatBase::semFloat8E4M3FN)
3926 return initFromFloat8E4M3FNAPInt(api);
3927 if (Sem == &APFloatBase::semFloat8E4M3FNUZ)
3928 return initFromFloat8E4M3FNUZAPInt(api);
3929 if (Sem == &APFloatBase::semFloat8E4M3B11FNUZ)
3930 return initFromFloat8E4M3B11FNUZAPInt(api);
3931 if (Sem == &APFloatBase::semFloat8E3M4)
3932 return initFromFloat8E3M4APInt(api);
3933 if (Sem == &APFloatBase::semFloatTF32)
3934 return initFromFloatTF32APInt(api);
3935 if (Sem == &APFloatBase::semFloat8E8M0FNU)
3936 return initFromFloat8E8M0FNUAPInt(api);
3937 if (Sem == &APFloatBase::semFloat6E3M2FN)
3938 return initFromFloat6E3M2FNAPInt(api);
3939 if (Sem == &APFloatBase::semFloat6E2M3FN)
3940 return initFromFloat6E2M3FNAPInt(api);
3941 if (Sem == &APFloatBase::semFloat4E2M1FN)
3942 return initFromFloat4E2M1FNAPInt(api);
3943
3944 llvm_unreachable("unsupported semantics");
3945}
3946
3947/// Make this number the largest magnitude normal number in the given
3948/// semantics.
3949void IEEEFloat::makeLargest(bool Negative) {
3950 if (Negative && !semantics->hasSignedRepr)
3952 "This floating point format does not support signed values");
3953 // We want (in interchange format):
3954 // sign = {Negative}
3955 // exponent = 1..10
3956 // significand = 1..1
3957 category = fcNormal;
3958 sign = Negative;
3959 exponent = semantics->maxExponent;
3960
3961 // Use memset to set all but the highest integerPart to all ones.
3962 integerPart *significand = significandParts();
3963 unsigned PartCount = partCount();
3964 memset(significand, 0xFF, sizeof(integerPart)*(PartCount - 1));
3965
3966 // Set the high integerPart especially setting all unused top bits for
3967 // internal consistency.
3968 const unsigned NumUnusedHighBits =
3969 PartCount*integerPartWidth - semantics->precision;
3970 significand[PartCount - 1] = (NumUnusedHighBits < integerPartWidth)
3971 ? (~integerPart(0) >> NumUnusedHighBits)
3972 : 0;
3973 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
3974 semantics->nanEncoding == fltNanEncoding::AllOnes &&
3975 (semantics->precision > 1))
3976 significand[0] &= ~integerPart(1);
3977}
3978
3979/// Make this number the smallest magnitude denormal number in the given
3980/// semantics.
3981void IEEEFloat::makeSmallest(bool Negative) {
3982 if (Negative && !semantics->hasSignedRepr)
3984 "This floating point format does not support signed values");
3985 // We want (in interchange format):
3986 // sign = {Negative}
3987 // exponent = 0..0
3988 // significand = 0..01
3989 category = fcNormal;
3990 sign = Negative;
3991 exponent = semantics->minExponent;
3992 APInt::tcSet(significandParts(), 1, partCount());
3993}
3994
3996 if (Negative && !semantics->hasSignedRepr)
3998 "This floating point format does not support signed values");
3999 // We want (in interchange format):
4000 // sign = {Negative}
4001 // exponent = 0..0
4002 // significand = 10..0
4003
4004 category = fcNormal;
4005 zeroSignificand();
4006 sign = Negative;
4007 exponent = semantics->minExponent;
4008 APInt::tcSetBit(significandParts(), semantics->precision - 1);
4009}
4010
4011IEEEFloat::IEEEFloat(const fltSemantics &Sem, const APInt &API) {
4012 initFromAPInt(&Sem, API);
4013}
4014
4016 initFromAPInt(&APFloatBase::semIEEEsingle, APInt::floatToBits(f));
4017}
4018
4020 initFromAPInt(&APFloatBase::semIEEEdouble, APInt::doubleToBits(d));
4021}
4022
4023namespace {
4024 void append(SmallVectorImpl<char> &Buffer, StringRef Str) {
4025 Buffer.append(Str.begin(), Str.end());
4026 }
4027
4028 /// Removes data from the given significand until it is no more
4029 /// precise than is required for the desired precision.
4030 void AdjustToPrecision(APInt &significand,
4031 int &exp, unsigned FormatPrecision) {
4032 unsigned bits = significand.getActiveBits();
4033
4034 // 196/59 is a very slight overestimate of lg_2(10).
4035 unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59;
4036
4037 if (bits <= bitsRequired) return;
4038
4039 unsigned tensRemovable = (bits - bitsRequired) * 59 / 196;
4040 if (!tensRemovable) return;
4041
4042 exp += tensRemovable;
4043
4044 APInt divisor(significand.getBitWidth(), 1);
4045 APInt powten(significand.getBitWidth(), 10);
4046 while (true) {
4047 if (tensRemovable & 1)
4048 divisor *= powten;
4049 tensRemovable >>= 1;
4050 if (!tensRemovable) break;
4051 powten *= powten;
4052 }
4053
4054 significand = significand.udiv(divisor);
4055
4056 // Truncate the significand down to its active bit count.
4057 significand = significand.trunc(significand.getActiveBits());
4058 }
4059
4060
4061 void AdjustToPrecision(SmallVectorImpl<char> &buffer,
4062 int &exp, unsigned FormatPrecision) {
4063 unsigned N = buffer.size();
4064 if (N <= FormatPrecision) return;
4065
4066 // The most significant figures are the last ones in the buffer.
4067 unsigned FirstSignificant = N - FormatPrecision;
4068
4069 // Round.
4070 // FIXME: this probably shouldn't use 'round half up'.
4071
4072 // Rounding down is just a truncation, except we also want to drop
4073 // trailing zeros from the new result.
4074 if (buffer[FirstSignificant - 1] < '5') {
4075 while (FirstSignificant < N && buffer[FirstSignificant] == '0')
4076 FirstSignificant++;
4077
4078 exp += FirstSignificant;
4079 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
4080 return;
4081 }
4082
4083 // Rounding up requires a decimal add-with-carry. If we continue
4084 // the carry, the newly-introduced zeros will just be truncated.
4085 for (unsigned I = FirstSignificant; I != N; ++I) {
4086 if (buffer[I] == '9') {
4087 FirstSignificant++;
4088 } else {
4089 buffer[I]++;
4090 break;
4091 }
4092 }
4093
4094 // If we carried through, we have exactly one digit of precision.
4095 if (FirstSignificant == N) {
4096 exp += FirstSignificant;
4097 buffer.clear();
4098 buffer.push_back('1');
4099 return;
4100 }
4101
4102 exp += FirstSignificant;
4103 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
4104 }
4105
4106 void toStringImpl(SmallVectorImpl<char> &Str, const bool isNeg, int exp,
4107 APInt significand, unsigned FormatPrecision,
4108 unsigned FormatMaxPadding, bool TruncateZero) {
4109 const int semanticsPrecision = significand.getBitWidth();
4110
4111 if (isNeg)
4112 Str.push_back('-');
4113
4114 // Set FormatPrecision if zero. We want to do this before we
4115 // truncate trailing zeros, as those are part of the precision.
4116 if (!FormatPrecision) {
4117 // We use enough digits so the number can be round-tripped back to an
4118 // APFloat. The formula comes from "How to Print Floating-Point Numbers
4119 // Accurately" by Steele and White.
4120 // FIXME: Using a formula based purely on the precision is conservative;
4121 // we can print fewer digits depending on the actual value being printed.
4122
4123 // FormatPrecision = 2 + floor(significandBits / lg_2(10))
4124 FormatPrecision = 2 + semanticsPrecision * 59 / 196;
4125 }
4126
4127 // Ignore trailing binary zeros.
4128 int trailingZeros = significand.countr_zero();
4129 exp += trailingZeros;
4130 significand.lshrInPlace(trailingZeros);
4131
4132 // Change the exponent from 2^e to 10^e.
4133 if (exp == 0) {
4134 // Nothing to do.
4135 } else if (exp > 0) {
4136 // Just shift left.
4137 significand = significand.zext(semanticsPrecision + exp);
4138 significand <<= exp;
4139 exp = 0;
4140 } else { /* exp < 0 */
4141 int texp = -exp;
4142
4143 // We transform this using the identity:
4144 // (N)(2^-e) == (N)(5^e)(10^-e)
4145 // This means we have to multiply N (the significand) by 5^e.
4146 // To avoid overflow, we have to operate on numbers large
4147 // enough to store N * 5^e:
4148 // log2(N * 5^e) == log2(N) + e * log2(5)
4149 // <= semantics->precision + e * 137 / 59
4150 // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59)
4151
4152 unsigned precision = semanticsPrecision + (137 * texp + 136) / 59;
4153
4154 // Multiply significand by 5^e.
4155 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
4156 significand = significand.zext(precision);
4157 APInt five_to_the_i(precision, 5);
4158 while (true) {
4159 if (texp & 1)
4160 significand *= five_to_the_i;
4161
4162 texp >>= 1;
4163 if (!texp)
4164 break;
4165 five_to_the_i *= five_to_the_i;
4166 }
4167 }
4168
4169 AdjustToPrecision(significand, exp, FormatPrecision);
4170
4172
4173 // Fill the buffer.
4174 unsigned precision = significand.getBitWidth();
4175 if (precision < 4) {
4176 // We need enough precision to store the value 10.
4177 precision = 4;
4178 significand = significand.zext(precision);
4179 }
4180 APInt ten(precision, 10);
4181 APInt digit(precision, 0);
4182
4183 bool inTrail = true;
4184 while (significand != 0) {
4185 // digit <- significand % 10
4186 // significand <- significand / 10
4187 APInt::udivrem(significand, ten, significand, digit);
4188
4189 unsigned d = digit.getZExtValue();
4190
4191 // Drop trailing zeros.
4192 if (inTrail && !d)
4193 exp++;
4194 else {
4195 buffer.push_back((char) ('0' + d));
4196 inTrail = false;
4197 }
4198 }
4199
4200 assert(!buffer.empty() && "no characters in buffer!");
4201
4202 // Drop down to FormatPrecision.
4203 // TODO: don't do more precise calculations above than are required.
4204 AdjustToPrecision(buffer, exp, FormatPrecision);
4205
4206 unsigned NDigits = buffer.size();
4207
4208 // Check whether we should use scientific notation.
4209 bool FormatScientific;
4210 if (!FormatMaxPadding) {
4211 FormatScientific = true;
4212 } else {
4213 if (exp >= 0) {
4214 // 765e3 --> 765000
4215 // ^^^
4216 // But we shouldn't make the number look more precise than it is.
4217 FormatScientific = ((unsigned) exp > FormatMaxPadding ||
4218 NDigits + (unsigned) exp > FormatPrecision);
4219 } else {
4220 // Power of the most significant digit.
4221 int MSD = exp + (int) (NDigits - 1);
4222 if (MSD >= 0) {
4223 // 765e-2 == 7.65
4224 FormatScientific = false;
4225 } else {
4226 // 765e-5 == 0.00765
4227 // ^ ^^
4228 FormatScientific = ((unsigned) -MSD) > FormatMaxPadding;
4229 }
4230 }
4231 }
4232
4233 // Scientific formatting is pretty straightforward.
4234 if (FormatScientific) {
4235 exp += (NDigits - 1);
4236
4237 Str.push_back(buffer[NDigits-1]);
4238 Str.push_back('.');
4239 if (NDigits == 1 && TruncateZero)
4240 Str.push_back('0');
4241 else
4242 for (unsigned I = 1; I != NDigits; ++I)
4243 Str.push_back(buffer[NDigits-1-I]);
4244 // Fill with zeros up to FormatPrecision.
4245 if (!TruncateZero && FormatPrecision > NDigits - 1)
4246 Str.append(FormatPrecision - NDigits + 1, '0');
4247 // For !TruncateZero we use lower 'e'.
4248 Str.push_back(TruncateZero ? 'E' : 'e');
4249
4250 Str.push_back(exp >= 0 ? '+' : '-');
4251 if (exp < 0)
4252 exp = -exp;
4253 SmallVector<char, 6> expbuf;
4254 do {
4255 expbuf.push_back((char) ('0' + (exp % 10)));
4256 exp /= 10;
4257 } while (exp);
4258 // Exponent always at least two digits if we do not truncate zeros.
4259 if (!TruncateZero && expbuf.size() < 2)
4260 expbuf.push_back('0');
4261 for (unsigned I = 0, E = expbuf.size(); I != E; ++I)
4262 Str.push_back(expbuf[E-1-I]);
4263 return;
4264 }
4265
4266 // Non-scientific, positive exponents.
4267 if (exp >= 0) {
4268 for (unsigned I = 0; I != NDigits; ++I)
4269 Str.push_back(buffer[NDigits-1-I]);
4270 for (unsigned I = 0; I != (unsigned) exp; ++I)
4271 Str.push_back('0');
4272 return;
4273 }
4274
4275 // Non-scientific, negative exponents.
4276
4277 // The number of digits to the left of the decimal point.
4278 int NWholeDigits = exp + (int) NDigits;
4279
4280 unsigned I = 0;
4281 if (NWholeDigits > 0) {
4282 for (; I != (unsigned) NWholeDigits; ++I)
4283 Str.push_back(buffer[NDigits-I-1]);
4284 Str.push_back('.');
4285 } else {
4286 unsigned NZeros = 1 + (unsigned) -NWholeDigits;
4287
4288 Str.push_back('0');
4289 Str.push_back('.');
4290 for (unsigned Z = 1; Z != NZeros; ++Z)
4291 Str.push_back('0');
4292 }
4293
4294 for (; I != NDigits; ++I)
4295 Str.push_back(buffer[NDigits-I-1]);
4296
4297 }
4298} // namespace
4299
4300void IEEEFloat::toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision,
4301 unsigned FormatMaxPadding, bool TruncateZero) const {
4302 switch (category) {
4303 case fcInfinity:
4304 if (isNegative())
4305 return append(Str, "-Inf");
4306 else
4307 return append(Str, "+Inf");
4308
4309 case fcNaN: return append(Str, "NaN");
4310
4311 case fcZero:
4312 if (isNegative())
4313 Str.push_back('-');
4314
4315 if (!FormatMaxPadding) {
4316 if (TruncateZero)
4317 append(Str, "0.0E+0");
4318 else {
4319 append(Str, "0.0");
4320 if (FormatPrecision > 1)
4321 Str.append(FormatPrecision - 1, '0');
4322 append(Str, "e+00");
4323 }
4324 } else {
4325 Str.push_back('0');
4326 }
4327 return;
4328
4329 case fcNormal:
4330 break;
4331 }
4332
4333 // Decompose the number into an APInt and an exponent.
4334 int exp = exponent - ((int) semantics->precision - 1);
4335 APInt significand(
4336 semantics->precision,
4337 ArrayRef(significandParts(), partCountForBits(semantics->precision)));
4338
4339 toStringImpl(Str, isNegative(), exp, significand, FormatPrecision,
4340 FormatMaxPadding, TruncateZero);
4341
4342}
4343
4345 if (!isFinite() || isZero())
4346 return INT_MIN;
4347
4348 const integerPart *Parts = significandParts();
4349 const int PartCount = partCountForBits(semantics->precision);
4350
4351 int PopCount = 0;
4352 for (int i = 0; i < PartCount; ++i) {
4353 PopCount += llvm::popcount(Parts[i]);
4354 if (PopCount > 1)
4355 return INT_MIN;
4356 }
4357
4358 if (exponent != semantics->minExponent)
4359 return exponent;
4360
4361 int CountrParts = 0;
4362 for (int i = 0; i < PartCount;
4363 ++i, CountrParts += APInt::APINT_BITS_PER_WORD) {
4364 if (Parts[i] != 0) {
4365 return exponent - semantics->precision + CountrParts +
4366 llvm::countr_zero(Parts[i]) + 1;
4367 }
4368 }
4369
4370 llvm_unreachable("didn't find the set bit");
4371}
4372
4374 if (!isNaN())
4375 return false;
4376 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly ||
4377 semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
4378 return false;
4379
4380 // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the
4381 // first bit of the trailing significand being 0.
4382 return !APInt::tcExtractBit(significandParts(), semantics->precision - 2);
4383}
4384
4385/// IEEE-754R 2008 5.3.1: nextUp/nextDown.
4386///
4387/// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with
4388/// appropriate sign switching before/after the computation.
4390 // If we are performing nextDown, swap sign so we have -x.
4391 if (nextDown)
4392 changeSign();
4393
4394 // Compute nextUp(x)
4395 opStatus result = opOK;
4396
4397 // Handle each float category separately.
4398 switch (category) {
4399 case fcInfinity:
4400 // nextUp(+inf) = +inf
4401 if (!isNegative())
4402 break;
4403 // nextUp(-inf) = -getLargest()
4404 makeLargest(true);
4405 break;
4406 case fcNaN:
4407 // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag.
4408 // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not
4409 // change the payload.
4410 if (isSignaling()) {
4411 result = opInvalidOp;
4412 // For consistency, propagate the sign of the sNaN to the qNaN.
4413 makeNaN(false, isNegative(), nullptr);
4414 }
4415 break;
4416 case fcZero:
4417 // nextUp(pm 0) = +getSmallest()
4418 makeSmallest(false);
4419 break;
4420 case fcNormal:
4421 // nextUp(-getSmallest()) = -0
4422 if (isSmallest() && isNegative()) {
4423 APInt::tcSet(significandParts(), 0, partCount());
4424 category = fcZero;
4425 exponent = 0;
4426 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
4427 sign = false;
4428 if (!semantics->hasZero)
4430 break;
4431 }
4432
4433 if (isLargest() && !isNegative()) {
4434 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
4435 // nextUp(getLargest()) == NAN
4436 makeNaN();
4437 break;
4438 } else if (semantics->nonFiniteBehavior ==
4440 // nextUp(getLargest()) == getLargest()
4441 break;
4442 } else {
4443 // nextUp(getLargest()) == INFINITY
4444 APInt::tcSet(significandParts(), 0, partCount());
4445 category = fcInfinity;
4446 exponent = semantics->maxExponent + 1;
4447 break;
4448 }
4449 }
4450
4451 // nextUp(normal) == normal + inc.
4452 if (isNegative()) {
4453 // If we are negative, we need to decrement the significand.
4454
4455 // We only cross a binade boundary that requires adjusting the exponent
4456 // if:
4457 // 1. exponent != semantics->minExponent. This implies we are not in the
4458 // smallest binade or are dealing with denormals.
4459 // 2. Our significand excluding the integral bit is all zeros.
4460 bool WillCrossBinadeBoundary =
4461 exponent != semantics->minExponent && isSignificandAllZeros();
4462
4463 // Decrement the significand.
4464 //
4465 // We always do this since:
4466 // 1. If we are dealing with a non-binade decrement, by definition we
4467 // just decrement the significand.
4468 // 2. If we are dealing with a normal -> normal binade decrement, since
4469 // we have an explicit integral bit the fact that all bits but the
4470 // integral bit are zero implies that subtracting one will yield a
4471 // significand with 0 integral bit and 1 in all other spots. Thus we
4472 // must just adjust the exponent and set the integral bit to 1.
4473 // 3. If we are dealing with a normal -> denormal binade decrement,
4474 // since we set the integral bit to 0 when we represent denormals, we
4475 // just decrement the significand.
4476 integerPart *Parts = significandParts();
4477 APInt::tcDecrement(Parts, partCount());
4478
4479 if (WillCrossBinadeBoundary) {
4480 // Our result is a normal number. Do the following:
4481 // 1. Set the integral bit to 1.
4482 // 2. Decrement the exponent.
4483 APInt::tcSetBit(Parts, semantics->precision - 1);
4484 exponent--;
4485 }
4486 } else {
4487 // If we are positive, we need to increment the significand.
4488
4489 // We only cross a binade boundary that requires adjusting the exponent if
4490 // the input is not a denormal and all of said input's significand bits
4491 // are set. If all of said conditions are true: clear the significand, set
4492 // the integral bit to 1, and increment the exponent. If we have a
4493 // denormal always increment since moving denormals and the numbers in the
4494 // smallest normal binade have the same exponent in our representation.
4495 // If there are only exponents, any increment always crosses the
4496 // BinadeBoundary.
4497 bool WillCrossBinadeBoundary = !APFloat::hasSignificand(*semantics) ||
4498 (!isDenormal() && isSignificandAllOnes());
4499
4500 if (WillCrossBinadeBoundary) {
4501 integerPart *Parts = significandParts();
4502 APInt::tcSet(Parts, 0, partCount());
4503 APInt::tcSetBit(Parts, semantics->precision - 1);
4504 assert(exponent != semantics->maxExponent &&
4505 "We can not increment an exponent beyond the maxExponent allowed"
4506 " by the given floating point semantics.");
4507 exponent++;
4508 } else {
4509 incrementSignificand();
4510 }
4511 }
4512 break;
4513 }
4514
4515 // If we are performing nextDown, swap sign so we have -nextUp(-x)
4516 if (nextDown)
4517 changeSign();
4518
4519 return result;
4520}
4521
4523 assert(isNaN() && "Can only be called on NaN values");
4524 // Number of bits in the payload, excluding the (maybe implied) integer bit.
4525 unsigned Bits = semantics->precision - 1;
4526 return APInt(Bits, ArrayRef(significandParts(), partCountForBits(Bits)));
4527}
4528
4529APFloatBase::ExponentType IEEEFloat::exponentNaN() const {
4530 return ::exponentNaN(*semantics);
4531}
4532
4533APFloatBase::ExponentType IEEEFloat::exponentInf() const {
4534 return ::exponentInf(*semantics);
4535}
4536
4537APFloatBase::ExponentType IEEEFloat::exponentZero() const {
4538 return ::exponentZero(*semantics);
4539}
4540
4541void IEEEFloat::makeInf(bool Negative) {
4542 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
4543 llvm_unreachable("This floating point format does not support Inf");
4544
4545 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
4546 // There is no Inf, so make NaN instead.
4547 makeNaN(false, Negative);
4548 return;
4549 }
4550 category = fcInfinity;
4551 sign = Negative;
4552 exponent = exponentInf();
4553 APInt::tcSet(significandParts(), 0, partCount());
4554}
4555
4556void IEEEFloat::makeZero(bool Negative) {
4557 if (!semantics->hasZero)
4558 llvm_unreachable("This floating point format does not support Zero");
4559
4560 category = fcZero;
4561 sign = Negative;
4562 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
4563 // Merge negative zero to positive because 0b10000...000 is used for NaN
4564 sign = false;
4565 }
4566 exponent = exponentZero();
4567 APInt::tcSet(significandParts(), 0, partCount());
4568}
4569
4571 assert(isNaN());
4572 if (semantics->nonFiniteBehavior != fltNonfiniteBehavior::NanOnly)
4573 APInt::tcSetBit(significandParts(), semantics->precision - 2);
4574}
4575
4576int ilogb(const IEEEFloat &Arg) {
4577 if (Arg.isNaN())
4578 return APFloat::IEK_NaN;
4579 if (Arg.isZero())
4580 return APFloat::IEK_Zero;
4581 if (Arg.isInfinity())
4582 return APFloat::IEK_Inf;
4583 if (!Arg.isDenormal())
4584 return Arg.exponent;
4585
4586 IEEEFloat Normalized(Arg);
4587 int SignificandBits = Arg.getSemantics().precision - 1;
4588
4589 Normalized.exponent += SignificandBits;
4590 Normalized.normalize(APFloat::rmNearestTiesToEven, lfExactlyZero);
4591 return Normalized.exponent - SignificandBits;
4592}
4593
4595 auto MaxExp = X.getSemantics().maxExponent;
4596 auto MinExp = X.getSemantics().minExponent;
4597
4598 // If Exp is wildly out-of-scale, simply adding it to X.exponent will
4599 // overflow; clamp it to a safe range before adding, but ensure that the range
4600 // is large enough that the clamp does not change the result. The range we
4601 // need to support is the difference between the largest possible exponent and
4602 // the normalized exponent of half the smallest denormal.
4603
4604 int SignificandBits = X.getSemantics().precision - 1;
4605 int MaxIncrement = MaxExp - (MinExp - SignificandBits) + 1;
4606
4607 // Clamp to one past the range ends to let normalize handle overlflow.
4608 X.exponent += std::clamp(Exp, -MaxIncrement - 1, MaxIncrement);
4609 X.normalize(RoundingMode, lfExactlyZero);
4610 if (X.isNaN())
4611 X.makeQuiet();
4612 return X;
4613}
4614
4615IEEEFloat frexp(const IEEEFloat &Val, int &Exp, roundingMode RM) {
4616 Exp = ilogb(Val);
4617
4618 // Quiet signalling nans.
4619 if (Exp == APFloat::IEK_NaN) {
4620 IEEEFloat Quiet(Val);
4621 Quiet.makeQuiet();
4622 return Quiet;
4623 }
4624
4625 if (Exp == APFloat::IEK_Inf)
4626 return Val;
4627
4628 // 1 is added because frexp is defined to return a normalized fraction in
4629 // +/-[0.5, 1.0), rather than the usual +/-[1.0, 2.0).
4630 Exp = Exp == APFloat::IEK_Zero ? 0 : Exp + 1;
4631 return scalbn(Val, -Exp, RM);
4632}
4633
4635 : Semantics(&S),
4636 Floats(new APFloat[2]{APFloat(APFloatBase::semIEEEdouble),
4637 APFloat(APFloatBase::semIEEEdouble)}) {
4638 assert(Semantics == &APFloatBase::semPPCDoubleDouble);
4639}
4640
4642 : Semantics(&S), Floats(new APFloat[2]{
4643 APFloat(APFloatBase::semIEEEdouble, uninitialized),
4644 APFloat(APFloatBase::semIEEEdouble, uninitialized)}) {
4645 assert(Semantics == &APFloatBase::semPPCDoubleDouble);
4646}
4647
4649 : Semantics(&S),
4650 Floats(new APFloat[2]{APFloat(APFloatBase::semIEEEdouble, I),
4651 APFloat(APFloatBase::semIEEEdouble)}) {
4652 assert(Semantics == &APFloatBase::semPPCDoubleDouble);
4653}
4654
4656 : Semantics(&S),
4657 Floats(new APFloat[2]{
4658 APFloat(APFloatBase::semIEEEdouble, APInt(64, I.getRawData()[0])),
4659 APFloat(APFloatBase::semIEEEdouble, APInt(64, I.getRawData()[1]))}) {
4660 assert(Semantics == &APFloatBase::semPPCDoubleDouble);
4661}
4662
4664 APFloat &&Second)
4665 : Semantics(&S),
4666 Floats(new APFloat[2]{std::move(First), std::move(Second)}) {
4667 assert(Semantics == &APFloatBase::semPPCDoubleDouble);
4668 assert(&Floats[0].getSemantics() == &APFloatBase::semIEEEdouble);
4669 assert(&Floats[1].getSemantics() == &APFloatBase::semIEEEdouble);
4670}
4671
4673 : Semantics(RHS.Semantics),
4674 Floats(RHS.Floats ? new APFloat[2]{APFloat(RHS.Floats[0]),
4675 APFloat(RHS.Floats[1])}
4676 : nullptr) {
4677 assert(Semantics == &APFloatBase::semPPCDoubleDouble);
4678}
4679
4681 : Semantics(RHS.Semantics), Floats(RHS.Floats) {
4682 RHS.Semantics = &APFloatBase::semBogus;
4683 RHS.Floats = nullptr;
4684 assert(Semantics == &APFloatBase::semPPCDoubleDouble);
4685}
4686
4688 if (Semantics == RHS.Semantics && RHS.Floats) {
4689 Floats[0] = RHS.Floats[0];
4690 Floats[1] = RHS.Floats[1];
4691 } else if (this != &RHS) {
4692 this->~DoubleAPFloat();
4693 new (this) DoubleAPFloat(RHS);
4694 }
4695 return *this;
4696}
4697
4698// Returns a result such that:
4699// 1. abs(Lo) <= ulp(Hi)/2
4700// 2. Hi == RTNE(Hi + Lo)
4701// 3. Hi + Lo == X + Y
4702//
4703// Requires that log2(X) >= log2(Y).
4704static std::pair<APFloat, APFloat> fastTwoSum(APFloat X, APFloat Y) {
4705 if (!X.isFinite())
4706 return {X, APFloat::getZero(X.getSemantics(), /*Negative=*/false)};
4707 APFloat Hi = X + Y;
4708 APFloat Delta = Hi - X;
4709 APFloat Lo = Y - Delta;
4710 return {Hi, Lo};
4711}
4712
4713// Implement addition, subtraction, multiplication and division based on:
4714// "Software for Doubled-Precision Floating-Point Computations",
4715// by Seppo Linnainmaa, ACM TOMS vol 7 no 3, September 1981, pages 272-283.
4716APFloat::opStatus DoubleAPFloat::addImpl(const APFloat &a, const APFloat &aa,
4717 const APFloat &c, const APFloat &cc,
4718 roundingMode RM) {
4719 int Status = opOK;
4720 APFloat z = a;
4721 Status |= z.add(c, RM);
4722 if (!z.isFinite()) {
4723 if (!z.isInfinity()) {
4724 Floats[0] = std::move(z);
4725 Floats[1].makeZero(/* Neg = */ false);
4726 return (opStatus)Status;
4727 }
4728 Status = opOK;
4729 auto AComparedToC = a.compareAbsoluteValue(c);
4730 z = cc;
4731 Status |= z.add(aa, RM);
4732 if (AComparedToC == APFloat::cmpGreaterThan) {
4733 // z = cc + aa + c + a;
4734 Status |= z.add(c, RM);
4735 Status |= z.add(a, RM);
4736 } else {
4737 // z = cc + aa + a + c;
4738 Status |= z.add(a, RM);
4739 Status |= z.add(c, RM);
4740 }
4741 if (!z.isFinite()) {
4742 Floats[0] = std::move(z);
4743 Floats[1].makeZero(/* Neg = */ false);
4744 return (opStatus)Status;
4745 }
4746 Floats[0] = z;
4747 APFloat zz = aa;
4748 Status |= zz.add(cc, RM);
4749 if (AComparedToC == APFloat::cmpGreaterThan) {
4750 // Floats[1] = a - z + c + zz;
4751 Floats[1] = a;
4752 Status |= Floats[1].subtract(z, RM);
4753 Status |= Floats[1].add(c, RM);
4754 Status |= Floats[1].add(zz, RM);
4755 } else {
4756 // Floats[1] = c - z + a + zz;
4757 Floats[1] = c;
4758 Status |= Floats[1].subtract(z, RM);
4759 Status |= Floats[1].add(a, RM);
4760 Status |= Floats[1].add(zz, RM);
4761 }
4762 } else {
4763 // q = a - z;
4764 APFloat q = a;
4765 Status |= q.subtract(z, RM);
4766
4767 // zz = q + c + (a - (q + z)) + aa + cc;
4768 // Compute a - (q + z) as -((q + z) - a) to avoid temporary copies.
4769 auto zz = q;
4770 Status |= zz.add(c, RM);
4771 Status |= q.add(z, RM);
4772 Status |= q.subtract(a, RM);
4773 q.changeSign();
4774 Status |= zz.add(q, RM);
4775 Status |= zz.add(aa, RM);
4776 Status |= zz.add(cc, RM);
4777 if (zz.isZero() && !zz.isNegative()) {
4778 Floats[0] = std::move(z);
4779 Floats[1].makeZero(/* Neg = */ false);
4780 return opOK;
4781 }
4782 Floats[0] = z;
4783 Status |= Floats[0].add(zz, RM);
4784 if (!Floats[0].isFinite()) {
4785 Floats[1].makeZero(/* Neg = */ false);
4786 return (opStatus)Status;
4787 }
4788 Floats[1] = std::move(z);
4789 Status |= Floats[1].subtract(Floats[0], RM);
4790 Status |= Floats[1].add(zz, RM);
4791 }
4792 return (opStatus)Status;
4793}
4794
4795APFloat::opStatus DoubleAPFloat::addWithSpecial(const DoubleAPFloat &LHS,
4796 const DoubleAPFloat &RHS,
4797 DoubleAPFloat &Out,
4798 roundingMode RM) {
4799 if (LHS.getCategory() == fcNaN) {
4800 Out = LHS;
4801 return opOK;
4802 }
4803 if (RHS.getCategory() == fcNaN) {
4804 Out = RHS;
4805 return opOK;
4806 }
4807 if (LHS.getCategory() == fcZero) {
4808 Out = RHS;
4809 return opOK;
4810 }
4811 if (RHS.getCategory() == fcZero) {
4812 Out = LHS;
4813 return opOK;
4814 }
4815 if (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcInfinity &&
4816 LHS.isNegative() != RHS.isNegative()) {
4817 Out.makeNaN(false, Out.isNegative(), nullptr);
4818 return opInvalidOp;
4819 }
4820 if (LHS.getCategory() == fcInfinity) {
4821 Out = LHS;
4822 return opOK;
4823 }
4824 if (RHS.getCategory() == fcInfinity) {
4825 Out = RHS;
4826 return opOK;
4827 }
4828 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal);
4829
4830 APFloat A(LHS.Floats[0]), AA(LHS.Floats[1]), C(RHS.Floats[0]),
4831 CC(RHS.Floats[1]);
4832 assert(&A.getSemantics() == &APFloatBase::semIEEEdouble);
4833 assert(&AA.getSemantics() == &APFloatBase::semIEEEdouble);
4834 assert(&C.getSemantics() == &APFloatBase::semIEEEdouble);
4835 assert(&CC.getSemantics() == &APFloatBase::semIEEEdouble);
4836 assert(&Out.Floats[0].getSemantics() == &APFloatBase::semIEEEdouble);
4837 assert(&Out.Floats[1].getSemantics() == &APFloatBase::semIEEEdouble);
4838 return Out.addImpl(A, AA, C, CC, RM);
4839}
4840
4842 roundingMode RM) {
4843 return addWithSpecial(*this, RHS, *this, RM);
4844}
4845
4847 roundingMode RM) {
4848 changeSign();
4849 auto Ret = add(RHS, RM);
4850 changeSign();
4851 return Ret;
4852}
4853
4856 const auto &LHS = *this;
4857 auto &Out = *this;
4858 /* Interesting observation: For special categories, finding the lowest
4859 common ancestor of the following layered graph gives the correct
4860 return category:
4861
4862 NaN
4863 / \
4864 Zero Inf
4865 \ /
4866 Normal
4867
4868 e.g. NaN * NaN = NaN
4869 Zero * Inf = NaN
4870 Normal * Zero = Zero
4871 Normal * Inf = Inf
4872 */
4873 if (LHS.getCategory() == fcNaN) {
4874 Out = LHS;
4875 return opOK;
4876 }
4877 if (RHS.getCategory() == fcNaN) {
4878 Out = RHS;
4879 return opOK;
4880 }
4881 if ((LHS.getCategory() == fcZero && RHS.getCategory() == fcInfinity) ||
4882 (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcZero)) {
4883 Out.makeNaN(false, false, nullptr);
4884 return opOK;
4885 }
4886 if (LHS.getCategory() == fcZero || LHS.getCategory() == fcInfinity) {
4887 Out = LHS;
4888 return opOK;
4889 }
4890 if (RHS.getCategory() == fcZero || RHS.getCategory() == fcInfinity) {
4891 Out = RHS;
4892 return opOK;
4893 }
4894 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal &&
4895 "Special cases not handled exhaustively");
4896
4897 int Status = opOK;
4898 APFloat A = Floats[0], B = Floats[1], C = RHS.Floats[0], D = RHS.Floats[1];
4899 // t = a * c
4900 APFloat T = A;
4901 Status |= T.multiply(C, RM);
4902 if (!T.isFiniteNonZero()) {
4903 Floats[0] = std::move(T);
4904 Floats[1].makeZero(/* Neg = */ false);
4905 return (opStatus)Status;
4906 }
4907
4908 // tau = fmsub(a, c, t), that is -fmadd(-a, c, t).
4909 APFloat Tau = A;
4910 T.changeSign();
4911 Status |= Tau.fusedMultiplyAdd(C, T, RM);
4912 T.changeSign();
4913 {
4914 // v = a * d
4915 APFloat V = A;
4916 Status |= V.multiply(D, RM);
4917 // w = b * c
4918 APFloat W = B;
4919 Status |= W.multiply(C, RM);
4920 Status |= V.add(W, RM);
4921 // tau += v + w
4922 Status |= Tau.add(V, RM);
4923 }
4924 // u = t + tau
4925 APFloat U = T;
4926 Status |= U.add(Tau, RM);
4927
4928 Floats[0] = U;
4929 if (!U.isFinite()) {
4930 Floats[1].makeZero(/* Neg = */ false);
4931 } else {
4932 // Floats[1] = (t - u) + tau
4933 Status |= T.subtract(U, RM);
4934 Status |= T.add(Tau, RM);
4935 Floats[1] = std::move(T);
4936 }
4937 return (opStatus)Status;
4938}
4939
4942 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
4943 "Unexpected Semantics");
4944 APFloat Tmp(APFloatBase::semPPCDoubleDoubleLegacy, bitcastToAPInt());
4945 auto Ret = Tmp.divide(
4946 APFloat(APFloatBase::semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()), RM);
4947 *this = DoubleAPFloat(APFloatBase::semPPCDoubleDouble, Tmp.bitcastToAPInt());
4948 return Ret;
4949}
4950
4952 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
4953 "Unexpected Semantics");
4954 APFloat Tmp(APFloatBase::semPPCDoubleDoubleLegacy, bitcastToAPInt());
4955 auto Ret = Tmp.remainder(
4956 APFloat(APFloatBase::semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
4957 *this = DoubleAPFloat(APFloatBase::semPPCDoubleDouble, Tmp.bitcastToAPInt());
4958 return Ret;
4959}
4960
4962 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
4963 "Unexpected Semantics");
4964 APFloat Tmp(APFloatBase::semPPCDoubleDoubleLegacy, bitcastToAPInt());
4965 auto Ret = Tmp.mod(
4966 APFloat(APFloatBase::semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
4967 *this = DoubleAPFloat(APFloatBase::semPPCDoubleDouble, Tmp.bitcastToAPInt());
4968 return Ret;
4969}
4970
4973 const DoubleAPFloat &Addend,
4975 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
4976 "Unexpected Semantics");
4977 APFloat Tmp(APFloatBase::semPPCDoubleDoubleLegacy, bitcastToAPInt());
4978 auto Ret = Tmp.fusedMultiplyAdd(
4979 APFloat(APFloatBase::semPPCDoubleDoubleLegacy,
4980 Multiplicand.bitcastToAPInt()),
4981 APFloat(APFloatBase::semPPCDoubleDoubleLegacy, Addend.bitcastToAPInt()),
4982 RM);
4983 *this = DoubleAPFloat(APFloatBase::semPPCDoubleDouble, Tmp.bitcastToAPInt());
4984 return Ret;
4985}
4986
4988 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
4989 "Unexpected Semantics");
4990 const APFloat &Hi = getFirst();
4991 const APFloat &Lo = getSecond();
4992
4993 APFloat RoundedHi = Hi;
4994 const opStatus HiStatus = RoundedHi.roundToIntegral(RM);
4995
4996 // We can reduce the problem to just the high part if the input:
4997 // 1. Represents a non-finite value.
4998 // 2. Has a component which is zero.
4999 if (!Hi.isFiniteNonZero() || Lo.isZero()) {
5000 Floats[0] = std::move(RoundedHi);
5001 Floats[1].makeZero(/*Neg=*/false);
5002 return HiStatus;
5003 }
5004
5005 // Adjust `Rounded` in the direction of `TieBreaker` if `ToRound` was at a
5006 // halfway point.
5007 auto RoundToNearestHelper = [](APFloat ToRound, APFloat Rounded,
5008 APFloat TieBreaker) {
5009 // RoundingError tells us which direction we rounded:
5010 // - RoundingError > 0: we rounded up.
5011 // - RoundingError < 0: we rounded down.
5012 // Sterbenz' lemma ensures that RoundingError is exact.
5013 const APFloat RoundingError = Rounded - ToRound;
5014 if (TieBreaker.isNonZero() &&
5015 TieBreaker.isNegative() != RoundingError.isNegative() &&
5016 abs(RoundingError).isExactlyValue(0.5))
5017 Rounded.add(
5018 APFloat::getOne(Rounded.getSemantics(), TieBreaker.isNegative()),
5020 return Rounded;
5021 };
5022
5023 // Case 1: Hi is not an integer.
5024 // Special cases are for rounding modes that are sensitive to ties.
5025 if (RoundedHi != Hi) {
5026 // We need to consider the case where Hi was between two integers and the
5027 // rounding mode broke the tie when, in fact, Lo may have had a different
5028 // sign than Hi.
5029 if (RM == rmNearestTiesToAway || RM == rmNearestTiesToEven)
5030 RoundedHi = RoundToNearestHelper(Hi, RoundedHi, Lo);
5031
5032 Floats[0] = std::move(RoundedHi);
5033 Floats[1].makeZero(/*Neg=*/false);
5034 return HiStatus;
5035 }
5036
5037 // Case 2: Hi is an integer.
5038 // Special cases are for rounding modes which are rounding towards or away from zero.
5039 RoundingMode LoRoundingMode;
5040 if (RM == rmTowardZero)
5041 // When our input is positive, we want the Lo component rounded toward
5042 // negative infinity to get the smallest result magnitude. Likewise,
5043 // negative inputs want the Lo component rounded toward positive infinity.
5044 LoRoundingMode = isNegative() ? rmTowardPositive : rmTowardNegative;
5045 else
5046 LoRoundingMode = RM;
5047
5048 APFloat RoundedLo = Lo;
5049 const opStatus LoStatus = RoundedLo.roundToIntegral(LoRoundingMode);
5050 if (LoRoundingMode == rmNearestTiesToAway)
5051 // We need to consider the case where Lo was between two integers and the
5052 // rounding mode broke the tie when, in fact, Hi may have had a different
5053 // sign than Lo.
5054 RoundedLo = RoundToNearestHelper(Lo, RoundedLo, Hi);
5055
5056 // We must ensure that the final result has no overlap between the two APFloat values.
5057 std::tie(RoundedHi, RoundedLo) = fastTwoSum(RoundedHi, RoundedLo);
5058
5059 Floats[0] = std::move(RoundedHi);
5060 Floats[1] = std::move(RoundedLo);
5061 return LoStatus;
5062}
5063
5065 Floats[0].changeSign();
5066 Floats[1].changeSign();
5067}
5068
5071 // Compare absolute values of the high parts.
5072 const cmpResult HiPartCmp = Floats[0].compareAbsoluteValue(RHS.Floats[0]);
5073 if (HiPartCmp != cmpEqual)
5074 return HiPartCmp;
5075
5076 // Zero, regardless of sign, is equal.
5077 if (Floats[1].isZero() && RHS.Floats[1].isZero())
5078 return cmpEqual;
5079
5080 // At this point, |this->Hi| == |RHS.Hi|.
5081 // The magnitude is |Hi+Lo| which is Hi+|Lo| if signs of Hi and Lo are the
5082 // same, and Hi-|Lo| if signs are different.
5083 const bool ThisIsSubtractive =
5084 Floats[0].isNegative() != Floats[1].isNegative();
5085 const bool RHSIsSubtractive =
5086 RHS.Floats[0].isNegative() != RHS.Floats[1].isNegative();
5087
5088 // Case 1: The low part of 'this' is zero.
5089 if (Floats[1].isZero())
5090 // We are comparing |Hi| vs. |Hi| ± |RHS.Lo|.
5091 // If RHS is subtractive, its magnitude is smaller.
5092 // If RHS is additive, its magnitude is larger.
5093 return RHSIsSubtractive ? cmpGreaterThan : cmpLessThan;
5094
5095 // Case 2: The low part of 'RHS' is zero (and we know 'this' is not).
5096 if (RHS.Floats[1].isZero())
5097 // We are comparing |Hi| ± |This.Lo| vs. |Hi|.
5098 // If 'this' is subtractive, its magnitude is smaller.
5099 // If 'this' is additive, its magnitude is larger.
5100 return ThisIsSubtractive ? cmpLessThan : cmpGreaterThan;
5101
5102 // If their natures differ, the additive one is larger.
5103 if (ThisIsSubtractive != RHSIsSubtractive)
5104 return ThisIsSubtractive ? cmpLessThan : cmpGreaterThan;
5105
5106 // Case 3: Both are additive (Hi+|Lo|) or both are subtractive (Hi-|Lo|).
5107 // The comparison now depends on the magnitude of the low parts.
5108 const cmpResult LoPartCmp = Floats[1].compareAbsoluteValue(RHS.Floats[1]);
5109
5110 if (ThisIsSubtractive) {
5111 // Both are subtractive (Hi-|Lo|), so the comparison of |Lo| is inverted.
5112 if (LoPartCmp == cmpLessThan)
5113 return cmpGreaterThan;
5114 if (LoPartCmp == cmpGreaterThan)
5115 return cmpLessThan;
5116 }
5117
5118 // If additive, the comparison of |Lo| is direct.
5119 // If equal, they are equal.
5120 return LoPartCmp;
5121}
5122
5124 return Floats[0].getCategory();
5125}
5126
5127bool DoubleAPFloat::isNegative() const { return Floats[0].isNegative(); }
5128
5130 Floats[0].makeInf(Neg);
5131 Floats[1].makeZero(/* Neg = */ false);
5132}
5133
5135 Floats[0].makeZero(Neg);
5136 Floats[1].makeZero(/* Neg = */ false);
5137}
5138
5140 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5141 "Unexpected Semantics");
5142 Floats[0] =
5143 APFloat(APFloatBase::semIEEEdouble, APInt(64, 0x7fefffffffffffffull));
5144 Floats[1] =
5145 APFloat(APFloatBase::semIEEEdouble, APInt(64, 0x7c8ffffffffffffeull));
5146 if (Neg)
5147 changeSign();
5148}
5149
5151 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5152 "Unexpected Semantics");
5153 Floats[0].makeSmallest(Neg);
5154 Floats[1].makeZero(/* Neg = */ false);
5155}
5156
5158 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5159 "Unexpected Semantics");
5160 Floats[0] =
5161 APFloat(APFloatBase::semIEEEdouble, APInt(64, 0x0360000000000000ull));
5162 if (Neg)
5163 Floats[0].changeSign();
5164 Floats[1].makeZero(/* Neg = */ false);
5165}
5166
5167void DoubleAPFloat::makeNaN(bool SNaN, bool Neg, const APInt *fill) {
5168 Floats[0].makeNaN(SNaN, Neg, fill);
5169 Floats[1].makeZero(/* Neg = */ false);
5170}
5171
5173 auto Result = Floats[0].compare(RHS.Floats[0]);
5174 // |Float[0]| > |Float[1]|
5175 if (Result == APFloat::cmpEqual)
5176 return Floats[1].compare(RHS.Floats[1]);
5177 return Result;
5178}
5179
5181 return Floats[0].bitwiseIsEqual(RHS.Floats[0]) &&
5182 Floats[1].bitwiseIsEqual(RHS.Floats[1]);
5183}
5184
5186 if (Arg.Floats)
5187 return hash_combine(hash_value(Arg.Floats[0]), hash_value(Arg.Floats[1]));
5188 return hash_combine(Arg.Semantics);
5189}
5190
5192 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5193 "Unexpected Semantics");
5194 uint64_t Data[] = {
5195 Floats[0].bitcastToAPInt().getRawData()[0],
5196 Floats[1].bitcastToAPInt().getRawData()[0],
5197 };
5198 return APInt(128, Data);
5199}
5200
5202 roundingMode RM) {
5203 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5204 "Unexpected Semantics");
5205 APFloat Tmp(APFloatBase::semPPCDoubleDoubleLegacy);
5206 auto Ret = Tmp.convertFromString(S, RM);
5207 *this = DoubleAPFloat(APFloatBase::semPPCDoubleDouble, Tmp.bitcastToAPInt());
5208 return Ret;
5209}
5210
5211// The double-double lattice of values corresponds to numbers which obey:
5212// - abs(lo) <= 1/2 * ulp(hi)
5213// - roundTiesToEven(hi + lo) == hi
5214//
5215// nextUp must choose the smallest output > input that follows these rules.
5216// nexDown must choose the largest output < input that follows these rules.
5218 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5219 "Unexpected Semantics");
5220 // nextDown(x) = -nextUp(-x)
5221 if (nextDown) {
5222 changeSign();
5223 APFloat::opStatus Result = next(/*nextDown=*/false);
5224 changeSign();
5225 return Result;
5226 }
5227 switch (getCategory()) {
5228 case fcInfinity:
5229 // nextUp(+inf) = +inf
5230 // nextUp(-inf) = -getLargest()
5231 if (isNegative())
5232 makeLargest(true);
5233 return opOK;
5234
5235 case fcNaN:
5236 // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag.
5237 // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not
5238 // change the payload.
5239 if (getFirst().isSignaling()) {
5240 // For consistency, propagate the sign of the sNaN to the qNaN.
5241 makeNaN(false, isNegative(), nullptr);
5242 return opInvalidOp;
5243 }
5244 return opOK;
5245
5246 case fcZero:
5247 // nextUp(pm 0) = +getSmallest()
5248 makeSmallest(false);
5249 return opOK;
5250
5251 case fcNormal:
5252 break;
5253 }
5254
5255 const APFloat &HiOld = getFirst();
5256 const APFloat &LoOld = getSecond();
5257
5258 APFloat NextLo = LoOld;
5259 NextLo.next(/*nextDown=*/false);
5260
5261 // We want to admit values where:
5262 // 1. abs(Lo) <= ulp(Hi)/2
5263 // 2. Hi == RTNE(Hi + lo)
5264 auto InLattice = [](const APFloat &Hi, const APFloat &Lo) {
5265 return Hi + Lo == Hi;
5266 };
5267
5268 // Check if (HiOld, nextUp(LoOld) is in the lattice.
5269 if (InLattice(HiOld, NextLo)) {
5270 // Yes, the result is (HiOld, nextUp(LoOld)).
5271 Floats[1] = std::move(NextLo);
5272
5273 // TODO: Because we currently rely on semPPCDoubleDoubleLegacy, our maximum
5274 // value is defined to have exactly 106 bits of precision. This limitation
5275 // results in semPPCDoubleDouble being unable to reach its maximum canonical
5276 // value.
5277 DoubleAPFloat Largest{*Semantics, uninitialized};
5278 Largest.makeLargest(/*Neg=*/false);
5279 if (compare(Largest) == cmpGreaterThan)
5280 makeInf(/*Neg=*/false);
5281
5282 return opOK;
5283 }
5284
5285 // Now we need to handle the cases where (HiOld, nextUp(LoOld)) is not the
5286 // correct result. We know the new hi component will be nextUp(HiOld) but our
5287 // lattice rules make it a little ambiguous what the correct NextLo must be.
5288 APFloat NextHi = HiOld;
5289 NextHi.next(/*nextDown=*/false);
5290
5291 // nextUp(getLargest()) == INFINITY
5292 if (NextHi.isInfinity()) {
5293 makeInf(/*Neg=*/false);
5294 return opOK;
5295 }
5296
5297 // IEEE 754-2019 5.3.1:
5298 // "If x is the negative number of least magnitude in x's format, nextUp(x) is
5299 // -0."
5300 if (NextHi.isZero()) {
5301 makeZero(/*Neg=*/true);
5302 return opOK;
5303 }
5304
5305 // abs(NextLo) must be <= ulp(NextHi)/2. We want NextLo to be as close to
5306 // negative infinity as possible.
5307 NextLo = neg(scalbn(harrisonUlp(NextHi), -1, rmTowardZero));
5308 if (!InLattice(NextHi, NextLo))
5309 // RTNE may mean that Lo must be < ulp(NextHi) / 2 so we bump NextLo.
5310 NextLo.next(/*nextDown=*/false);
5311
5312 Floats[0] = std::move(NextHi);
5313 Floats[1] = std::move(NextLo);
5314
5315 return opOK;
5316}
5317
5318APFloat::opStatus DoubleAPFloat::convertToSignExtendedInteger(
5319 MutableArrayRef<integerPart> Input, unsigned int Width, bool IsSigned,
5320 roundingMode RM, bool *IsExact) const {
5321 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5322 "Unexpected Semantics");
5323
5324 // If Hi is not finite, or Lo is zero, the value is entirely represented
5325 // by Hi. Delegate to the simpler single-APFloat conversion.
5326 if (!getFirst().isFiniteNonZero() || getSecond().isZero())
5327 return getFirst().convertToInteger(Input, Width, IsSigned, RM, IsExact);
5328
5329 // First, round the full double-double value to an integral value. This
5330 // simplifies the rest of the function, as we no longer need to consider
5331 // fractional parts.
5332 *IsExact = false;
5333 DoubleAPFloat Integral = *this;
5334 const opStatus RoundStatus = Integral.roundToIntegral(RM);
5335 if (RoundStatus == opInvalidOp)
5336 return opInvalidOp;
5337 const APFloat &IntegralHi = Integral.getFirst();
5338 const APFloat &IntegralLo = Integral.getSecond();
5339
5340 // If rounding results in either component being zero, the sum is trivial.
5341 // Delegate to the simpler single-APFloat conversion.
5342 bool HiIsExact;
5343 if (IntegralHi.isZero() || IntegralLo.isZero()) {
5344 const opStatus HiStatus =
5345 IntegralHi.convertToInteger(Input, Width, IsSigned, RM, &HiIsExact);
5346 // The conversion from an integer-valued float to an APInt may fail if the
5347 // result would be out of range. Regardless, taking this path is only
5348 // possible if rounding occurred during the initial `roundToIntegral`.
5349 return HiStatus == opOK ? opInexact : HiStatus;
5350 }
5351
5352 // A negative number cannot be represented by an unsigned integer.
5353 // Since a double-double is canonical, if Hi is negative, the sum is negative.
5354 if (!IsSigned && IntegralHi.isNegative())
5355 return opInvalidOp;
5356
5357 // Handle the special boundary case where |Hi| is exactly the power of two
5358 // that marks the edge of the integer's range (e.g., 2^63 for int64_t). In
5359 // this situation, Hi itself won't fit, but the sum Hi + Lo might.
5360 // `PositiveOverflowWidth` is the bit number for this boundary (N-1 for
5361 // signed, N for unsigned).
5362 bool LoIsExact;
5363 const int HiExactLog2 = IntegralHi.getExactLog2Abs();
5364 const unsigned PositiveOverflowWidth = IsSigned ? Width - 1 : Width;
5365 if (HiExactLog2 >= 0 &&
5366 static_cast<unsigned>(HiExactLog2) == PositiveOverflowWidth) {
5367 // If Hi and Lo have the same sign, |Hi + Lo| > |Hi|, so the sum is
5368 // guaranteed to overflow. E.g., for uint128_t, (2^128, 1) overflows.
5369 if (IntegralHi.isNegative() == IntegralLo.isNegative())
5370 return opInvalidOp;
5371
5372 // If the signs differ, the sum will fit. We can compute the result using
5373 // properties of two's complement arithmetic without a wide intermediate
5374 // integer. E.g., for uint128_t, (2^128, -1) should be 2^128 - 1.
5375 const opStatus LoStatus = IntegralLo.convertToInteger(
5376 Input, Width, /*IsSigned=*/true, RM, &LoIsExact);
5377 if (LoStatus == opInvalidOp)
5378 return opInvalidOp;
5379
5380 // Adjust the bit pattern of Lo to account for Hi's value:
5381 // - For unsigned (Hi=2^Width): `2^Width + Lo` in `Width`-bit
5382 // arithmetic is equivalent to just `Lo`. The conversion of `Lo` above
5383 // already produced the correct final bit pattern.
5384 // - For signed (Hi=2^(Width-1)): The sum `2^(Width-1) + Lo` (where Lo<0)
5385 // can be computed by taking the two's complement pattern for `Lo` and
5386 // clearing the sign bit.
5387 if (IsSigned && !IntegralHi.isNegative())
5388 APInt::tcClearBit(Input.data(), PositiveOverflowWidth);
5389 *IsExact = RoundStatus == opOK;
5390 return RoundStatus;
5391 }
5392
5393 // Convert Hi into an integer. This may not fit but that is OK: we know that
5394 // Hi + Lo would not fit either in this situation.
5395 const opStatus HiStatus = IntegralHi.convertToInteger(
5396 Input, Width, IsSigned, rmTowardZero, &HiIsExact);
5397 if (HiStatus == opInvalidOp)
5398 return HiStatus;
5399
5400 // Convert Lo into a temporary integer of the same width.
5401 APSInt LoResult{Width, /*isUnsigned=*/!IsSigned};
5402 const opStatus LoStatus =
5403 IntegralLo.convertToInteger(LoResult, rmTowardZero, &LoIsExact);
5404 if (LoStatus == opInvalidOp)
5405 return LoStatus;
5406
5407 // Add Lo to Hi. This addition is guaranteed not to overflow because of the
5408 // double-double canonicalization rule (`|Lo| <= ulp(Hi)/2`). The only case
5409 // where the sum could cross the integer type's boundary is when Hi is a
5410 // power of two, which is handled by the special case block above.
5411 APInt::tcAdd(Input.data(), LoResult.getRawData(), /*carry=*/0, Input.size());
5412
5413 *IsExact = RoundStatus == opOK;
5414 return RoundStatus;
5415}
5416
5419 unsigned int Width, bool IsSigned,
5420 roundingMode RM, bool *IsExact) const {
5421 opStatus FS =
5422 convertToSignExtendedInteger(Input, Width, IsSigned, RM, IsExact);
5423
5424 if (FS == opInvalidOp) {
5425 const unsigned DstPartsCount = partCountForBits(Width);
5426 assert(DstPartsCount <= Input.size() && "Integer too big");
5427
5428 unsigned Bits;
5429 if (getCategory() == fcNaN)
5430 Bits = 0;
5431 else if (isNegative())
5432 Bits = IsSigned;
5433 else
5434 Bits = Width - IsSigned;
5435
5436 tcSetLeastSignificantBits(Input.data(), DstPartsCount, Bits);
5437 if (isNegative() && IsSigned)
5438 APInt::tcShiftLeft(Input.data(), DstPartsCount, Width - 1);
5439 }
5440
5441 return FS;
5442}
5443
5444APFloat::opStatus DoubleAPFloat::handleOverflow(roundingMode RM) {
5445 switch (RM) {
5447 makeLargest(/*Neg=*/isNegative());
5448 break;
5450 if (isNegative())
5451 makeInf(/*Neg=*/true);
5452 else
5453 makeLargest(/*Neg=*/false);
5454 break;
5456 if (isNegative())
5457 makeLargest(/*Neg=*/true);
5458 else
5459 makeInf(/*Neg=*/false);
5460 break;
5463 makeInf(/*Neg=*/isNegative());
5464 break;
5465 default:
5466 llvm_unreachable("Invalid rounding mode found");
5467 }
5468 opStatus S = opInexact;
5469 if (!getFirst().isFinite())
5470 S = static_cast<opStatus>(S | opOverflow);
5471 return S;
5472}
5473
5474APFloat::opStatus DoubleAPFloat::convertFromUnsignedParts(
5475 const integerPart *Src, unsigned int SrcCount, roundingMode RM) {
5476 // Find the most significant bit of the source integer. APInt::tcMSB returns
5477 // UINT_MAX for a zero value.
5478 const unsigned SrcMSB = APInt::tcMSB(Src, SrcCount);
5479 if (SrcMSB == UINT_MAX) {
5480 // The source integer is 0.
5481 makeZero(/*Neg=*/false);
5482 return opOK;
5483 }
5484
5485 // Create a minimally-sized APInt to represent the source value.
5486 const unsigned SrcBitWidth = SrcMSB + 1;
5487 APSInt SrcInt{APInt{/*numBits=*/SrcBitWidth, ArrayRef(Src, SrcCount)},
5488 /*isUnsigned=*/true};
5489
5490 // Stage 1: Initial Approximation.
5491 // Convert the source integer SrcInt to the Hi part of the DoubleAPFloat.
5492 // We use round-to-nearest because it minimizes the initial error, which is
5493 // crucial for the subsequent steps.
5495 Hi.convertFromAPInt(SrcInt, /*IsSigned=*/false, rmNearestTiesToEven);
5496
5497 // If the first approximation already overflows, the number is too large.
5498 // NOTE: The underlying semantics are *more* conservative when choosing to
5499 // overflow because their notion of ULP is much larger. As such, it is always
5500 // safe to overflow at the DoubleAPFloat level if the APFloat overflows.
5501 if (!Hi.isFinite())
5502 return handleOverflow(RM);
5503
5504 // Stage 2: Exact Error Calculation.
5505 // Calculate the exact error of the first approximation: Error = SrcInt - Hi.
5506 // This is done by converting Hi back to an integer and subtracting it from
5507 // the original source.
5508 bool HiAsIntIsExact;
5509 // Create an integer representation of Hi. Its width is determined by the
5510 // exponent of Hi, ensuring it's just large enough. This width can exceed
5511 // SrcBitWidth if the conversion to Hi rounded up to a power of two.
5512 // accurately when converted back to an integer.
5513 APSInt HiAsInt{static_cast<uint32_t>(ilogb(Hi) + 1), /*isUnsigned=*/true};
5514 Hi.convertToInteger(HiAsInt, rmNearestTiesToEven, &HiAsIntIsExact);
5515 const APInt Error = SrcInt.zext(HiAsInt.getBitWidth()) - HiAsInt;
5516
5517 // Stage 3: Error Approximation and Rounding.
5518 // Convert the integer error into the Lo part of the DoubleAPFloat. This step
5519 // captures the remainder of the original number. The rounding mode for this
5520 // conversion (LoRM) may need to be adjusted from the user-requested RM to
5521 // ensure the final sum (Hi + Lo) rounds correctly.
5522 roundingMode LoRM = RM;
5523 // Adjustments are only necessary when the initial approximation Hi was an
5524 // overestimate, making the Error negative.
5525 if (Error.isNegative()) {
5526 if (RM == rmNearestTiesToAway) {
5527 // For rmNearestTiesToAway, a tie should round away from zero. Since
5528 // SrcInt is positive, this means rounding toward +infinity.
5529 // A standard conversion of a negative Error would round ties toward
5530 // -infinity, causing the final sum Hi + Lo to be smaller. To
5531 // counteract this, we detect the tie case and override the rounding
5532 // mode for Lo to rmTowardPositive.
5533 const unsigned ErrorActiveBits = Error.getSignificantBits() - 1;
5534 const unsigned LoPrecision = getSecond().getSemantics().precision;
5535 if (ErrorActiveBits > LoPrecision) {
5536 const unsigned RoundingBoundary = ErrorActiveBits - LoPrecision;
5537 // A tie occurs when the bits to be truncated are of the form 100...0.
5538 // This is detected by checking if the number of trailing zeros is
5539 // exactly one less than the number of bits being truncated.
5540 if (Error.countTrailingZeros() == RoundingBoundary - 1)
5541 LoRM = rmTowardPositive;
5542 }
5543 } else if (RM == rmTowardZero) {
5544 // For rmTowardZero, the final positive result must be truncated (rounded
5545 // down). When Hi is an overestimate, Error is negative. A standard
5546 // rmTowardZero conversion of Error would make it *less* negative,
5547 // effectively rounding the final sum Hi + Lo *up*. To ensure the sum
5548 // rounds down correctly, we force Lo to round toward -infinity.
5549 LoRM = rmTowardNegative;
5550 }
5551 }
5552
5554 opStatus Status = Lo.convertFromAPInt(Error, /*IsSigned=*/true, LoRM);
5555
5556 // Renormalize the pair (Hi, Lo) into a canonical DoubleAPFloat form where the
5557 // components do not overlap. fastTwoSum performs this operation.
5558 std::tie(Hi, Lo) = fastTwoSum(Hi, Lo);
5559 Floats[0] = std::move(Hi);
5560 Floats[1] = std::move(Lo);
5561
5562 // A final check for overflow is needed because fastTwoSum can cause a
5563 // carry-out from Lo that pushes Hi to infinity.
5564 if (!getFirst().isFinite())
5565 return handleOverflow(RM);
5566
5567 // The largest DoubleAPFloat must be canonical. Values which are larger are
5568 // not canonical and are equivalent to overflow.
5569 if (getFirst().isFiniteNonZero() && Floats[0].isLargest()) {
5570 DoubleAPFloat Largest{*Semantics};
5571 Largest.makeLargest(/*Neg=*/false);
5572 if (compare(Largest) == APFloat::cmpGreaterThan)
5573 return handleOverflow(RM);
5574 }
5575
5576 // The final status of the operation is determined by the conversion of the
5577 // error term. If Lo could represent Error exactly, the entire conversion
5578 // is exact. Otherwise, it's inexact.
5579 return Status;
5580}
5581
5583 bool IsSigned,
5584 roundingMode RM) {
5585 const bool NegateInput = IsSigned && Input.isNegative();
5586 APInt API = Input;
5587 if (NegateInput)
5588 API.negate();
5589
5591 convertFromUnsignedParts(API.getRawData(), API.getNumWords(), RM);
5592 if (NegateInput)
5593 changeSign();
5594 return Status;
5595}
5596
5598 unsigned int HexDigits,
5599 bool UpperCase,
5600 roundingMode RM) const {
5601 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5602 "Unexpected Semantics");
5603 return APFloat(APFloatBase::semPPCDoubleDoubleLegacy, bitcastToAPInt())
5604 .convertToHexString(DST, HexDigits, UpperCase, RM);
5605}
5606
5608 return getCategory() == fcNormal &&
5609 (Floats[0].isDenormal() || Floats[1].isDenormal() ||
5610 // (double)(Hi + Lo) == Hi defines a normal number.
5611 Floats[0] != Floats[0] + Floats[1]);
5612}
5613
5615 if (getCategory() != fcNormal)
5616 return false;
5617 DoubleAPFloat Tmp(*this);
5618 Tmp.makeSmallest(this->isNegative());
5619 return Tmp.compare(*this) == cmpEqual;
5620}
5621
5623 if (getCategory() != fcNormal)
5624 return false;
5625
5626 DoubleAPFloat Tmp(*this);
5628 return Tmp.compare(*this) == cmpEqual;
5629}
5630
5632 if (getCategory() != fcNormal)
5633 return false;
5634 DoubleAPFloat Tmp(*this);
5635 Tmp.makeLargest(this->isNegative());
5636 return Tmp.compare(*this) == cmpEqual;
5637}
5638
5640 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5641 "Unexpected Semantics");
5642 return Floats[0].isInteger() && Floats[1].isInteger();
5643}
5644
5646 unsigned FormatPrecision,
5647 unsigned FormatMaxPadding,
5648 bool TruncateZero) const {
5649 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5650 "Unexpected Semantics");
5651 APFloat(APFloatBase::semPPCDoubleDoubleLegacy, bitcastToAPInt())
5652 .toString(Str, FormatPrecision, FormatMaxPadding, TruncateZero);
5653}
5654
5656 // In order for Hi + Lo to be a power of two, the following must be true:
5657 // 1. Hi must be a power of two.
5658 // 2. Lo must be zero.
5659 if (getSecond().isNonZero())
5660 return INT_MIN;
5661 return getFirst().getExactLog2Abs();
5662}
5663
5664int ilogb(const DoubleAPFloat &Arg) {
5665 const APFloat &Hi = Arg.getFirst();
5666 const APFloat &Lo = Arg.getSecond();
5667 int IlogbResult = ilogb(Hi);
5668 // Zero and non-finite values can delegate to ilogb(Hi).
5669 if (Arg.getCategory() != fcNormal)
5670 return IlogbResult;
5671 // If Lo can't change the binade, we can delegate to ilogb(Hi).
5672 if (Lo.isZero() || Hi.isNegative() == Lo.isNegative())
5673 return IlogbResult;
5674 if (Hi.getExactLog2Abs() == INT_MIN)
5675 return IlogbResult;
5676 // Numbers of the form 2^a - 2^b or -2^a + 2^b are almost powers of two but
5677 // get nudged out of the binade by the low component.
5678 return IlogbResult - 1;
5679}
5680
5683 assert(Arg.Semantics == &APFloatBase::PPCDoubleDouble() &&
5684 "Unexpected Semantics");
5686 scalbn(Arg.Floats[0], Exp, RM),
5687 scalbn(Arg.Floats[1], Exp, RM));
5688}
5689
5690DoubleAPFloat frexp(const DoubleAPFloat &Arg, int &Exp,
5692 assert(Arg.Semantics == &APFloatBase::PPCDoubleDouble() &&
5693 "Unexpected Semantics");
5694
5695 // Get the unbiased exponent e of the number, where |Arg| = m * 2^e for m in
5696 // [1.0, 2.0).
5697 Exp = ilogb(Arg);
5698
5699 // For NaNs, quiet any signaling NaN and return the result, as per standard
5700 // practice.
5701 if (Exp == APFloat::IEK_NaN) {
5702 DoubleAPFloat Quiet{Arg};
5703 Quiet.getFirst() = Quiet.getFirst().makeQuiet();
5704 return Quiet;
5705 }
5706
5707 // For infinity, return it unchanged. The exponent remains IEK_Inf.
5708 if (Exp == APFloat::IEK_Inf)
5709 return Arg;
5710
5711 // For zero, the fraction is zero and the standard requires the exponent be 0.
5712 if (Exp == APFloat::IEK_Zero) {
5713 Exp = 0;
5714 return Arg;
5715 }
5716
5717 const APFloat &Hi = Arg.getFirst();
5718 const APFloat &Lo = Arg.getSecond();
5719
5720 // frexp requires the fraction's absolute value to be in [0.5, 1.0).
5721 // ilogb provides an exponent for an absolute value in [1.0, 2.0).
5722 // Increment the exponent to ensure the fraction is in the correct range.
5723 ++Exp;
5724
5725 const bool SignsDisagree = Hi.isNegative() != Lo.isNegative();
5726 APFloat Second = Lo;
5727 if (Arg.getCategory() == APFloat::fcNormal && Lo.isFiniteNonZero()) {
5728 roundingMode LoRoundingMode;
5729 // The interpretation of rmTowardZero depends on the sign of the combined
5730 // Arg rather than the sign of the component.
5731 if (RM == rmTowardZero)
5732 LoRoundingMode = Arg.isNegative() ? rmTowardPositive : rmTowardNegative;
5733 // For rmNearestTiesToAway, we face a similar problem. If signs disagree,
5734 // Lo is a correction *toward* zero relative to Hi. Rounding Lo
5735 // "away from zero" based on its own sign would move the value in the
5736 // wrong direction. As a safe proxy, we use rmNearestTiesToEven, which is
5737 // direction-agnostic. We only need to bother with this if Lo is scaled
5738 // down.
5739 else if (RM == rmNearestTiesToAway && SignsDisagree && Exp > 0)
5740 LoRoundingMode = rmNearestTiesToEven;
5741 else
5742 LoRoundingMode = RM;
5743 Second = scalbn(Lo, -Exp, LoRoundingMode);
5744 // The rmNearestTiesToEven proxy is correct most of the time, but it
5745 // differs from rmNearestTiesToAway when the scaled value of Lo is an
5746 // exact midpoint.
5747 // NOTE: This is morally equivalent to roundTiesTowardZero.
5748 if (RM == rmNearestTiesToAway && LoRoundingMode == rmNearestTiesToEven) {
5749 // Re-scale the result back to check if rounding occurred.
5750 const APFloat RecomposedLo = scalbn(Second, Exp, rmNearestTiesToEven);
5751 if (RecomposedLo != Lo) {
5752 // RoundingError tells us which direction we rounded:
5753 // - RoundingError > 0: we rounded up.
5754 // - RoundingError < 0: we down up.
5755 const APFloat RoundingError = RecomposedLo - Lo;
5756 // Determine if scalbn(Lo, -Exp) landed exactly on a midpoint.
5757 // We do this by checking if the absolute rounding error is exactly
5758 // half a ULP of the result.
5759 const APFloat UlpOfSecond = harrisonUlp(Second);
5760 const APFloat ScaledUlpOfSecond =
5761 scalbn(UlpOfSecond, Exp - 1, rmNearestTiesToEven);
5762 const bool IsMidpoint = abs(RoundingError) == ScaledUlpOfSecond;
5763 const bool RoundedLoAway =
5764 Second.isNegative() == RoundingError.isNegative();
5765 // The sign of Hi and Lo disagree and we rounded Lo away: we must
5766 // decrease the magnitude of Second to increase the magnitude
5767 // First+Second.
5768 if (IsMidpoint && RoundedLoAway)
5769 Second.next(/*nextDown=*/!Second.isNegative());
5770 }
5771 }
5772 // Handle a tricky edge case where Arg is slightly less than a power of two
5773 // (e.g., Arg = 2^k - epsilon). In this situation:
5774 // 1. Hi is 2^k, and Lo is a small negative value -epsilon.
5775 // 2. ilogb(Arg) correctly returns k-1.
5776 // 3. Our initial Exp becomes (k-1) + 1 = k.
5777 // 4. Scaling Hi (2^k) by 2^-k would yield a magnitude of 1.0 and
5778 // scaling Lo by 2^-k would yield zero. This would make the result 1.0
5779 // which is an invalid fraction, as the required interval is [0.5, 1.0).
5780 // We detect this specific case by checking if Hi is a power of two and if
5781 // the scaled Lo underflowed to zero. The fix: Increment Exp to k+1. This
5782 // adjusts the scale factor, causing Hi to be scaled to 0.5, which is a
5783 // valid fraction.
5784 if (Second.isZero() && SignsDisagree && Hi.getExactLog2Abs() != INT_MIN)
5785 ++Exp;
5786 }
5787
5788 APFloat First = scalbn(Hi, -Exp, RM);
5790 std::move(Second));
5791}
5792
5793APInt DoubleAPFloat::getNaNPayload() const { return Floats[0].getNaNPayload(); }
5794} // namespace detail
5795
5796APFloat::Storage::Storage(IEEEFloat F, const fltSemantics &Semantics) {
5797 if (usesLayout<IEEEFloat>(Semantics)) {
5798 new (&IEEE) IEEEFloat(std::move(F));
5799 return;
5800 }
5801 if (usesLayout<DoubleAPFloat>(Semantics)) {
5802 const fltSemantics& S = F.getSemantics();
5803 new (&Double) DoubleAPFloat(Semantics, APFloat(std::move(F), S),
5805 return;
5806 }
5807 llvm_unreachable("Unexpected semantics");
5808}
5809
5814
5815hash_code hash_value(const APFloat &Arg) {
5816 if (APFloat::usesLayout<detail::IEEEFloat>(Arg.getSemantics()))
5817 return hash_value(Arg.U.IEEE);
5818 if (APFloat::usesLayout<detail::DoubleAPFloat>(Arg.getSemantics()))
5819 return hash_value(Arg.U.Double);
5820 llvm_unreachable("Unexpected semantics");
5821}
5822
5824 : APFloat(Semantics) {
5825 auto StatusOrErr = convertFromString(S, rmNearestTiesToEven);
5826 assert(StatusOrErr && "Invalid floating point representation");
5827 consumeError(StatusOrErr.takeError());
5828}
5829
5831 if (isZero())
5832 return isNegative() ? fcNegZero : fcPosZero;
5833 if (isNormal())
5834 return isNegative() ? fcNegNormal : fcPosNormal;
5835 if (isDenormal())
5837 if (isInfinity())
5838 return isNegative() ? fcNegInf : fcPosInf;
5839 assert(isNaN() && "Other class of FP constant");
5840 return isSignaling() ? fcSNan : fcQNan;
5841}
5842
5843bool APFloat::getExactInverse(APFloat *Inv) const {
5844 // Only finite, non-zero numbers can have a useful, representable inverse.
5845 // This check filters out +/- zero, +/- infinity, and NaN.
5846 if (!isFiniteNonZero())
5847 return false;
5848
5849 // Historically, this function rejects subnormal inputs. One reason why this
5850 // might be important is that subnormals may behave differently under FTZ/DAZ
5851 // runtime behavior.
5852 if (isDenormal())
5853 return false;
5854
5855 // A number has an exact, representable inverse if and only if it is a power
5856 // of two.
5857 //
5858 // Mathematical Rationale:
5859 // 1. A binary floating-point number x is a dyadic rational, meaning it can
5860 // be written as x = M / 2^k for integers M (the significand) and k.
5861 // 2. The inverse is 1/x = 2^k / M.
5862 // 3. For 1/x to also be a dyadic rational (and thus exactly representable
5863 // in binary), its denominator M must also be a power of two.
5864 // Let's say M = 2^m.
5865 // 4. Substituting this back into the formula for x, we get
5866 // x = (2^m) / (2^k) = 2^(m-k).
5867 //
5868 // This proves that x must be a power of two.
5869
5870 // getExactLog2Abs() returns the integer exponent if the number is a power of
5871 // two or INT_MIN if it is not.
5872 const int Exp = getExactLog2Abs();
5873 if (Exp == INT_MIN)
5874 return false;
5875
5876 // The inverse of +/- 2^Exp is +/- 2^(-Exp). We can compute this by
5877 // scaling 1.0 by the negated exponent.
5878 APFloat Reciprocal =
5879 scalbn(APFloat::getOne(getSemantics(), /*Negative=*/isNegative()), -Exp,
5880 rmTowardZero);
5881
5882 // scalbn might round if the resulting exponent -Exp is outside the
5883 // representable range, causing overflow (to infinity) or underflow. We
5884 // must verify that the result is still the exact power of two we expect.
5885 if (Reciprocal.getExactLog2Abs() != -Exp)
5886 return false;
5887
5888 // Avoid multiplication with a subnormal, it is not safe on all platforms and
5889 // may be slower than a normal division.
5890 if (Reciprocal.isDenormal())
5891 return false;
5892
5893 assert(Reciprocal.isFiniteNonZero());
5894
5895 if (Inv)
5896 *Inv = std::move(Reciprocal);
5897
5898 return true;
5899}
5900
5902 roundingMode RM, bool *losesInfo) {
5903 if (&getSemantics() == &ToSemantics) {
5904 *losesInfo = false;
5905 return opOK;
5906 }
5907 if (usesLayout<IEEEFloat>(getSemantics()) &&
5908 usesLayout<IEEEFloat>(ToSemantics))
5909 return U.IEEE.convert(ToSemantics, RM, losesInfo);
5910 if (usesLayout<IEEEFloat>(getSemantics()) &&
5911 usesLayout<DoubleAPFloat>(ToSemantics)) {
5912 assert(&ToSemantics == &APFloatBase::semPPCDoubleDouble);
5913 auto Ret =
5914 U.IEEE.convert(APFloatBase::semPPCDoubleDoubleLegacy, RM, losesInfo);
5915 *this = APFloat(ToSemantics, U.IEEE.bitcastToAPInt());
5916 return Ret;
5917 }
5918 if (usesLayout<DoubleAPFloat>(getSemantics()) &&
5919 usesLayout<IEEEFloat>(ToSemantics)) {
5920 auto Ret = getIEEE().convert(ToSemantics, RM, losesInfo);
5921 *this = APFloat(std::move(getIEEE()), ToSemantics);
5922 return Ret;
5923 }
5924 llvm_unreachable("Unexpected semantics");
5925}
5926
5930
5932 SmallVector<char, 16> Buffer;
5933 toString(Buffer);
5934 OS << Buffer;
5935}
5936
5937#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
5939 print(dbgs());
5940 dbgs() << '\n';
5941}
5942#endif
5943
5945 NID.Add(bitcastToAPInt());
5946}
5947
5949 roundingMode rounding_mode,
5950 bool *isExact) const {
5951 unsigned bitWidth = result.getBitWidth();
5952 SmallVector<uint64_t, 4> parts(result.getNumWords());
5953 opStatus status = convertToInteger(parts, bitWidth, result.isSigned(),
5954 rounding_mode, isExact);
5955 // Keeps the original signed-ness.
5956 result = APInt(bitWidth, parts);
5957 return status;
5958}
5959
5961 if (&getSemantics() == &APFloatBase::semIEEEdouble)
5962 return getIEEE().convertToDouble();
5963 assert(isRepresentableBy(getSemantics(), semIEEEdouble) &&
5964 "Float semantics is not representable by IEEEdouble");
5965 APFloat Temp = *this;
5966 bool LosesInfo;
5967 [[maybe_unused]] opStatus St =
5968 Temp.convert(APFloatBase::semIEEEdouble, rmNearestTiesToEven, &LosesInfo);
5969 assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
5970 return Temp.getIEEE().convertToDouble();
5971}
5972
5973#ifdef HAS_IEE754_FLOAT128
5974float128 APFloat::convertToQuad() const {
5975 if (&getSemantics() == &APFloatBase::semIEEEquad)
5976 return getIEEE().convertToQuad();
5977 assert(isRepresentableBy(getSemantics(), semIEEEquad) &&
5978 "Float semantics is not representable by IEEEquad");
5979 APFloat Temp = *this;
5980 bool LosesInfo;
5981 [[maybe_unused]] opStatus St =
5982 Temp.convert(APFloatBase::semIEEEquad, rmNearestTiesToEven, &LosesInfo);
5983 assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
5984 return Temp.getIEEE().convertToQuad();
5985}
5986#endif
5987
5989 if (&getSemantics() == &APFloatBase::semIEEEsingle)
5990 return getIEEE().convertToFloat();
5991 assert(isRepresentableBy(getSemantics(), semIEEEsingle) &&
5992 "Float semantics is not representable by IEEEsingle");
5993 APFloat Temp = *this;
5994 bool LosesInfo;
5995 [[maybe_unused]] opStatus St =
5996 Temp.convert(APFloatBase::semIEEEsingle, rmNearestTiesToEven, &LosesInfo);
5997 assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
5998 return Temp.getIEEE().convertToFloat();
5999}
6000
6002 static constexpr StringLiteral ValidFormats[] = {
6003 "Float8E5M2", "Float8E5M2FNUZ", "Float8E4M3", "Float8E4M3FN",
6004 "Float8E4M3FNUZ", "Float8E4M3B11FNUZ", "Float8E3M4", "Float8E8M0FNU",
6005 "Float6E3M2FN", "Float6E2M3FN", "Float4E2M1FN"};
6006 return llvm::is_contained(ValidFormats, Format);
6007}
6008
6010 // TODO: extend to remaining arbitrary FP types: Float8E4M3, Float8E3M4,
6011 // Float8E5M2FNUZ, Float8E4M3FNUZ, Float8E4M3B11FNUZ, Float8E8M0FNU.
6013 .Case("Float8E5M2", &semFloat8E5M2)
6014 .Case("Float8E4M3FN", &semFloat8E4M3FN)
6015 .Case("Float4E2M1FN", &semFloat4E2M1FN)
6016 .Case("Float6E3M2FN", &semFloat6E3M2FN)
6017 .Case("Float6E2M3FN", &semFloat6E2M3FN)
6018 .Default(nullptr);
6019}
6020
6021APFloat::Storage::~Storage() {
6022 if (usesLayout<IEEEFloat>(*semantics)) {
6023 IEEE.~IEEEFloat();
6024 return;
6025 }
6026 if (usesLayout<DoubleAPFloat>(*semantics)) {
6027 Double.~DoubleAPFloat();
6028 return;
6029 }
6030 llvm_unreachable("Unexpected semantics");
6031}
6032
6033APFloat::Storage::Storage(const APFloat::Storage &RHS) {
6034 if (usesLayout<IEEEFloat>(*RHS.semantics)) {
6035 new (this) IEEEFloat(RHS.IEEE);
6036 return;
6037 }
6038 if (usesLayout<DoubleAPFloat>(*RHS.semantics)) {
6039 new (this) DoubleAPFloat(RHS.Double);
6040 return;
6041 }
6042 llvm_unreachable("Unexpected semantics");
6043}
6044
6045APFloat::Storage::Storage(APFloat::Storage &&RHS) {
6046 if (usesLayout<IEEEFloat>(*RHS.semantics)) {
6047 new (this) IEEEFloat(std::move(RHS.IEEE));
6048 return;
6049 }
6050 if (usesLayout<DoubleAPFloat>(*RHS.semantics)) {
6051 new (this) DoubleAPFloat(std::move(RHS.Double));
6052 return;
6053 }
6054 llvm_unreachable("Unexpected semantics");
6055}
6056
6057APFloat::Storage &APFloat::Storage::operator=(const APFloat::Storage &RHS) {
6058 if (usesLayout<IEEEFloat>(*semantics) &&
6059 usesLayout<IEEEFloat>(*RHS.semantics)) {
6060 IEEE = RHS.IEEE;
6061 } else if (usesLayout<DoubleAPFloat>(*semantics) &&
6062 usesLayout<DoubleAPFloat>(*RHS.semantics)) {
6063 Double = RHS.Double;
6064 } else if (this != &RHS) {
6065 this->~Storage();
6066 new (this) Storage(RHS);
6067 }
6068 return *this;
6069}
6070
6071APFloat::Storage &APFloat::Storage::operator=(APFloat::Storage &&RHS) {
6072 if (usesLayout<IEEEFloat>(*semantics) &&
6073 usesLayout<IEEEFloat>(*RHS.semantics)) {
6074 IEEE = std::move(RHS.IEEE);
6075 } else if (usesLayout<DoubleAPFloat>(*semantics) &&
6076 usesLayout<DoubleAPFloat>(*RHS.semantics)) {
6077 Double = std::move(RHS.Double);
6078 } else if (this != &RHS) {
6079 this->~Storage();
6080 new (this) Storage(std::move(RHS));
6081 }
6082 return *this;
6083}
6084
6085namespace {
6086
6087APFloat::opStatus getOpStatusFromLibc(int libc_exceptions) {
6089 if (libc_exceptions & FE_INVALID)
6091 if (libc_exceptions & FE_DIVBYZERO)
6093 if (libc_exceptions & FE_OVERFLOW)
6095 if (libc_exceptions & FE_UNDERFLOW)
6097 if (libc_exceptions & FE_INEXACT)
6099 return status;
6100}
6101
6102} // namespace
6103
6104// TODO: Support other rounding modes when LLVM libc math implement static
6105// roundings.
6106std::optional<APFloat> exp(const APFloat &x, RoundingMode rounding_mode,
6107 APFloat::opStatus *status) {
6108
6109 if (rounding_mode == APFloatBase::rmNearestTiesToEven) {
6110 if (APFloat::SemanticsToEnum(x.getSemantics()) ==
6112 float x_val = x.convertToFloat();
6113 int exc =
6114 LIBC_NAMESPACE::shared::check::exp_exceptions(x_val, FE_TONEAREST);
6115 if (status) {
6116 *status = getOpStatusFromLibc(exc);
6117 if (x.isSignaling()) {
6118 // 32-bit x86 will silence sNaN when loading floats, so we explicitly
6119 // add the INVALID exception here.
6120 *status =
6121 static_cast<APFloat::opStatus>(*status | APFloat::opInvalidOp);
6122 }
6123 }
6124 float result = LIBC_NAMESPACE::shared::expf(x_val);
6125 return APFloat(result);
6126 }
6127 if (APFloat::SemanticsToEnum(x.getSemantics()) ==
6129 double x_val = x.convertToDouble();
6130 int exc =
6131 LIBC_NAMESPACE::shared::check::exp_exceptions(x_val, FE_TONEAREST);
6132 if (status) {
6133 *status = getOpStatusFromLibc(exc);
6134 if (x.isSignaling()) {
6135 // 32-bit x86 will silence sNaN when loading floats, so we explicitly
6136 // add the INVALID exception here.
6137 *status =
6138 static_cast<APFloat::opStatus>(*status | APFloat::opInvalidOp);
6139 }
6140 }
6141 double result = LIBC_NAMESPACE::shared::exp(x_val);
6142 return APFloat(result);
6143 }
6144 }
6145 return std::nullopt;
6146}
6147
6148} // namespace llvm
6149
6150#undef APFLOAT_DISPATCH_ON_SEMANTICS
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
#define PackCategoriesIntoKey(_lhs, _rhs)
A macro used to combine two fcCategory enums into one key which can be used in a switch statement to ...
Definition APFloat.cpp:63
This file declares a class to represent arbitrary precision floating point values and provide a varie...
#define APFLOAT_DISPATCH_ON_SEMANTICS(METHOD_CALL)
Definition APFloat.h:27
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
Function Alias Analysis false
#define X(NUM, ENUM, NAME)
Definition ELF.h:853
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition Compiler.h:663
static bool isNeg(Value *V)
Returns true if the operation is a negation of V, and it works for both integers and floats.
static bool isSigned(unsigned Opcode)
Utilities for dealing with flags related to floating point properties and mode controls.
This file defines a hash set that can be used to remove duplication of nodes in a graph.
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition Lint.cpp:539
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define T
#define P(N)
if(PassOpts->AAPipeline)
This file contains some templates that are useful if you are working with the STL at all.
This file contains some functions that are useful when dealing with strings.
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, const llvm::StringTable &StandardNames, VectorLibrary VecLib)
Initialize the set of available library functions based on the specified target triple.
Value * RHS
Value * LHS
The Input class is used to parse a yaml document into in-memory structs and vectors.
static const fltSemantics & IEEEsingle()
Definition APFloat.h:297
static const fltSemantics & Float8E4M3FN()
Definition APFloat.h:307
static LLVM_ABI const llvm::fltSemantics & EnumToSemantics(Semantics S)
Definition APFloat.cpp:113
static LLVM_ABI bool semanticsHasInf(const fltSemantics &)
Definition APFloat.cpp:262
cmpResult
IEEE-754R 5.11: Floating Point Comparison Relations.
Definition APFloat.h:335
static constexpr roundingMode rmTowardZero
Definition APFloat.h:349
static LLVM_ABI ExponentType semanticsMinExponent(const fltSemantics &)
Definition APFloat.cpp:237
llvm::RoundingMode roundingMode
IEEE-754R 4.3: Rounding-direction attributes.
Definition APFloat.h:343
static const fltSemantics & BFloat()
Definition APFloat.h:296
static const fltSemantics & IEEEquad()
Definition APFloat.h:299
static LLVM_ABI unsigned int semanticsSizeInBits(const fltSemantics &)
Definition APFloat.cpp:240
static const fltSemantics & Float8E8M0FNU()
Definition APFloat.h:314
static LLVM_ABI bool semanticsHasSignedRepr(const fltSemantics &)
Definition APFloat.cpp:258
static const fltSemantics & IEEEdouble()
Definition APFloat.h:298
static LLVM_ABI unsigned getSizeInBits(const fltSemantics &Sem)
Returns the size of the floating point number (in bits) in the given semantics.
Definition APFloat.cpp:293
static const fltSemantics & x87DoubleExtended()
Definition APFloat.h:318
static constexpr roundingMode rmTowardNegative
Definition APFloat.h:348
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:345
static LLVM_ABI bool isValidArbitraryFPFormat(StringRef Format)
Returns true if the given string is a valid arbitrary floating-point format interpretation for llvm....
Definition APFloat.cpp:6001
static LLVM_ABI bool hasSignBitInMSB(const fltSemantics &)
Definition APFloat.cpp:275
static LLVM_ABI ExponentType semanticsMaxExponent(const fltSemantics &)
Definition APFloat.cpp:233
friend class APFloat
Definition APFloat.h:292
static LLVM_ABI unsigned int semanticsPrecision(const fltSemantics &)
Definition APFloat.cpp:229
static LLVM_ABI bool semanticsHasNaN(const fltSemantics &)
Definition APFloat.cpp:266
static LLVM_ABI Semantics SemanticsToEnum(const llvm::fltSemantics &Sem)
Definition APFloat.cpp:160
int32_t ExponentType
A signed type to represent a floating point numbers unbiased exponent.
Definition APFloat.h:156
static constexpr unsigned integerPartWidth
Definition APFloat.h:153
static const fltSemantics & PPCDoubleDoubleLegacy()
Definition APFloat.h:301
APInt::WordType integerPart
Definition APFloat.h:152
static LLVM_ABI bool semanticsHasZero(const fltSemantics &)
Definition APFloat.cpp:254
static LLVM_ABI bool isRepresentableAsNormalIn(const fltSemantics &Src, const fltSemantics &Dst)
Definition APFloat.cpp:279
static const fltSemantics & Float8E5M2FNUZ()
Definition APFloat.h:305
static const fltSemantics & Float8E4M3FNUZ()
Definition APFloat.h:308
static constexpr roundingMode rmTowardPositive
Definition APFloat.h:347
static const fltSemantics & IEEEhalf()
Definition APFloat.h:295
static const fltSemantics & Float4E2M1FN()
Definition APFloat.h:317
static const fltSemantics & Float6E2M3FN()
Definition APFloat.h:316
static const fltSemantics & Float8E4M3()
Definition APFloat.h:306
static const fltSemantics & Float8E4M3B11FNUZ()
Definition APFloat.h:309
static LLVM_ABI bool isRepresentableBy(const fltSemantics &A, const fltSemantics &B)
Definition APFloat.cpp:205
static const fltSemantics & Float8E3M4()
Definition APFloat.h:312
static LLVM_ABI bool isIEEELikeFP(const fltSemantics &)
Definition APFloat.cpp:270
static const fltSemantics & Float8E5M2()
Definition APFloat.h:304
fltCategory
Category of internally-represented number.
Definition APFloat.h:371
static constexpr roundingMode rmNearestTiesToAway
Definition APFloat.h:350
static const fltSemantics & PPCDoubleDouble()
Definition APFloat.h:300
static const fltSemantics & Float6E3M2FN()
Definition APFloat.h:315
opStatus
IEEE-754R 7: Default exception handling.
Definition APFloat.h:361
static LLVM_ABI const fltSemantics * getArbitraryFPSemantics(StringRef Format)
Returns the fltSemantics for a given arbitrary FP format string, or nullptr if invalid.
Definition APFloat.cpp:6009
static const fltSemantics & FloatTF32()
Definition APFloat.h:313
static LLVM_ABI unsigned int semanticsIntSizeInBits(const fltSemantics &, bool)
Definition APFloat.cpp:243
static APFloat getQNaN(const fltSemantics &Sem, bool Negative=false, const APInt *payload=nullptr)
Factory for QNaN values.
Definition APFloat.h:1185
LLVM_ABI void Profile(FoldingSetNodeID &NID) const
Used to insert APFloat objects, or objects that contain APFloat objects, into FoldingSets.
Definition APFloat.cpp:5944
opStatus divide(const APFloat &RHS, roundingMode RM)
Definition APFloat.h:1273
bool isFiniteNonZero() const
Definition APFloat.h:1554
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition APFloat.cpp:5901
LLVM_READONLY int getExactLog2Abs() const
Definition APFloat.h:1600
bool isNegative() const
Definition APFloat.h:1544
LLVM_ABI bool getExactInverse(APFloat *Inv) const
If this value is normal and has an exact, normal, multiplicative inverse, store it in inv and return ...
Definition APFloat.cpp:5843
cmpResult compareAbsoluteValue(const APFloat &RHS) const
Definition APFloat.h:1499
friend DoubleAPFloat
Definition APFloat.h:1616
LLVM_ABI double convertToDouble() const
Converts this APFloat to host double value.
Definition APFloat.cpp:5960
void toString(SmallVectorImpl< char > &Str, unsigned FormatPrecision=0, unsigned FormatMaxPadding=3, bool TruncateZero=true) const
Definition APFloat.h:1581
bool isNormal() const
Definition APFloat.h:1548
bool isDenormal() const
Definition APFloat.h:1545
opStatus add(const APFloat &RHS, roundingMode RM)
Definition APFloat.h:1246
static LLVM_ABI APFloat getAllOnesValue(const fltSemantics &Semantics)
Returns a float which is bitcasted from an all one value int.
Definition APFloat.cpp:5927
LLVM_ABI friend hash_code hash_value(const APFloat &Arg)
See friend declarations above.
Definition APFloat.cpp:5815
const fltSemantics & getSemantics() const
Definition APFloat.h:1552
bool isFinite() const
Definition APFloat.h:1549
bool isNaN() const
Definition APFloat.h:1542
static APFloat getOne(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative One.
Definition APFloat.h:1153
unsigned int convertToHexString(char *DST, unsigned int HexDigits, bool UpperCase, roundingMode RM) const
Definition APFloat.h:1534
LLVM_ABI float convertToFloat() const
Converts this APFloat to host float value.
Definition APFloat.cpp:5988
bool isSignaling() const
Definition APFloat.h:1546
opStatus fusedMultiplyAdd(const APFloat &Multiplicand, const APFloat &Addend, roundingMode RM)
Definition APFloat.h:1300
opStatus remainder(const APFloat &RHS)
Definition APFloat.h:1282
bool isZero() const
Definition APFloat.h:1540
APInt bitcastToAPInt() const
Definition APFloat.h:1436
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition APFloat.h:1397
opStatus next(bool nextDown)
Definition APFloat.h:1319
static APFloat getInf(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Infinity.
Definition APFloat.h:1163
friend APFloat scalbn(APFloat X, int Exp, roundingMode RM)
static APFloat getSmallest(const fltSemantics &Sem, bool Negative=false)
Returns the smallest (by magnitude) finite number in the given semantics.
Definition APFloat.h:1213
LLVM_ABI FPClassTest classify() const
Return the FPClassTest which will return true for the value.
Definition APFloat.cpp:5830
opStatus mod(const APFloat &RHS)
Definition APFloat.h:1291
LLVM_ABI Expected< opStatus > convertFromString(StringRef, roundingMode)
Fill this APFloat with the result of a string conversion.
Definition APFloat.cpp:5810
friend IEEEFloat
Definition APFloat.h:1615
LLVM_DUMP_METHOD void dump() const
Definition APFloat.cpp:5938
LLVM_ABI void print(raw_ostream &) const
Definition APFloat.cpp:5931
opStatus roundToIntegral(roundingMode RM)
Definition APFloat.h:1313
static bool hasSignificand(const fltSemantics &Sem)
Returns true if the given semantics has actual significand.
Definition APFloat.h:1238
static APFloat getZero(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Zero.
Definition APFloat.h:1144
bool isInfinity() const
Definition APFloat.h:1541
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt udiv(const APInt &RHS) const
Unsigned division operation.
Definition APInt.cpp:1599
static LLVM_ABI void tcSetBit(WordType *, unsigned bit)
Set the given bit of a bignum. Zero-based.
Definition APInt.cpp:2403
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
static LLVM_ABI void tcSet(WordType *, WordType, unsigned)
Sets the least significant part of a bignum to the input value, and zeroes out higher parts.
Definition APInt.cpp:2375
static LLVM_ABI void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
Dual division/remainder interface.
Definition APInt.cpp:1793
static LLVM_ABI int tcExtractBit(const WordType *, unsigned bit)
Extract the given bit of a bignum; returns 0 or 1. Zero-based.
Definition APInt.cpp:2398
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1055
static LLVM_ABI WordType tcAdd(WordType *, const WordType *, WordType carry, unsigned)
DST += RHS + CARRY where CARRY is zero or one. Returns the carry flag.
Definition APInt.cpp:2477
static LLVM_ABI void tcExtract(WordType *, unsigned dstCount, const WordType *, unsigned srcBits, unsigned srcLSB)
Copy the bit vector of width srcBITS from SRC, starting at bit srcLSB, to DST, of dstCOUNT parts,...
Definition APInt.cpp:2447
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition APInt.h:1535
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:968
static LLVM_ABI int tcCompare(const WordType *, const WordType *, unsigned)
Comparison (unsigned) of two bignums.
Definition APInt.cpp:2787
static APInt floatToBits(float V)
Converts a float to APInt bits.
Definition APInt.h:1775
uint64_t WordType
Definition APInt.h:80
static LLVM_ABI void tcAssign(WordType *, const WordType *, unsigned)
Assign one bignum to another.
Definition APInt.cpp:2383
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1511
static LLVM_ABI void tcShiftRight(WordType *, unsigned Words, unsigned Count)
Shift a bignum right Count bits.
Definition APInt.cpp:2761
static LLVM_ABI void tcFullMultiply(WordType *, const WordType *, const WordType *, unsigned, unsigned)
DST = LHS * RHS, where DST has width the sum of the widths of the operands.
Definition APInt.cpp:2667
unsigned getNumWords() const
Get the number of words.
Definition APInt.h:1518
bool isNegative() const
Determine sign of this APInt.
Definition APInt.h:330
static LLVM_ABI void tcClearBit(WordType *, unsigned bit)
Clear the given bit of a bignum. Zero-based.
Definition APInt.cpp:2408
void negate()
Negate this APInt in place.
Definition APInt.h:1491
static WordType tcDecrement(WordType *dst, unsigned parts)
Decrement a bignum in-place. Return the borrow flag.
Definition APInt.h:1941
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1662
static LLVM_ABI unsigned tcLSB(const WordType *, unsigned n)
Returns the bit number of the least or most significant set bit of a number.
Definition APInt.cpp:2414
static LLVM_ABI void tcShiftLeft(WordType *, unsigned Words, unsigned Count)
Shift a bignum left Count bits.
Definition APInt.cpp:2734
static LLVM_ABI bool tcIsZero(const WordType *, unsigned)
Returns true if a bignum is zero, false otherwise.
Definition APInt.cpp:2389
static LLVM_ABI unsigned tcMSB(const WordType *parts, unsigned n)
Returns the bit number of the most significant set bit of a number.
Definition APInt.cpp:2427
float bitsToFloat() const
Converts APInt bits to a float.
Definition APInt.h:1759
static LLVM_ABI int tcMultiplyPart(WordType *dst, const WordType *src, WordType multiplier, WordType carry, unsigned srcParts, unsigned dstParts, bool add)
DST += SRC * MULTIPLIER + PART if add is true DST = SRC * MULTIPLIER + PART if add is false.
Definition APInt.cpp:2565
static constexpr unsigned APINT_BITS_PER_WORD
Bits in a word.
Definition APInt.h:86
static LLVM_ABI WordType tcSubtract(WordType *, const WordType *, WordType carry, unsigned)
DST -= RHS + CARRY where CARRY is zero or one. Returns the carry flag.
Definition APInt.cpp:2512
static LLVM_ABI void tcNegate(WordType *, unsigned)
Negate a bignum in-place.
Definition APInt.cpp:2551
static APInt doubleToBits(double V)
Converts a double to APInt bits.
Definition APInt.h:1767
static WordType tcIncrement(WordType *dst, unsigned parts)
Increment a bignum in-place. Return the carry flag.
Definition APInt.h:1936
double bitsToDouble() const
Converts APInt bits to a double.
Definition APInt.h:1745
const uint64_t * getRawData() const
This function returns a pointer to the internal storage of the APInt.
Definition APInt.h:576
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition APInt.h:865
An arbitrary precision integer that knows its signedness.
Definition APSInt.h:24
bool isSigned() const
Definition APSInt.h:78
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
Get the array size.
Definition ArrayRef.h:141
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
static ErrorSuccess success()
Create a success value.
Definition Error.h:336
Tagged union holding either a T or a Error.
Definition Error.h:485
This class is used to gather all the unique data bits of a node.
Definition FoldingSet.h:208
void Add(const T &x)
Definition FoldingSet.h:248
Represent a mutable reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:294
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
iterator erase(const_iterator CI)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition StringRef.h:882
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition StringRef.h:490
const char * iterator
Definition StringRef.h:60
constexpr bool empty() const
Check if the string is empty.
Definition StringRef.h:141
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition StringRef.h:629
iterator begin() const
Definition StringRef.h:114
char back() const
Get the last character in the string.
Definition StringRef.h:153
StringRef slice(size_t Start, size_t End) const
Return a reference to the substring from [Start, End).
Definition StringRef.h:714
constexpr size_t size() const
Get the string size.
Definition StringRef.h:144
char front() const
Get the first character in the string.
Definition StringRef.h:147
iterator end() const
Definition StringRef.h:116
bool consume_front(char Prefix)
Returns true if this StringRef has the given prefix and removes that prefix.
Definition StringRef.h:655
bool consume_front_insensitive(StringRef Prefix)
Returns true if this StringRef has the given prefix, ignoring case, and removes that prefix.
Definition StringRef.h:675
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
LLVM_ABI void makeSmallestNormalized(bool Neg)
Definition APFloat.cpp:5157
LLVM_ABI DoubleAPFloat & operator=(const DoubleAPFloat &RHS)
Definition APFloat.cpp:4687
LLVM_ABI void changeSign()
Definition APFloat.cpp:5064
LLVM_ABI bool isLargest() const
Definition APFloat.cpp:5631
LLVM_ABI opStatus remainder(const DoubleAPFloat &RHS)
Definition APFloat.cpp:4951
LLVM_ABI opStatus multiply(const DoubleAPFloat &RHS, roundingMode RM)
Definition APFloat.cpp:4854
LLVM_ABI fltCategory getCategory() const
Definition APFloat.cpp:5123
LLVM_ABI bool bitwiseIsEqual(const DoubleAPFloat &RHS) const
Definition APFloat.cpp:5180
LLVM_ABI LLVM_READONLY int getExactLog2Abs() const
Definition APFloat.cpp:5655
LLVM_ABI opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition APFloat.cpp:5582
LLVM_ABI APInt bitcastToAPInt() const
Definition APFloat.cpp:5191
LLVM_ABI Expected< opStatus > convertFromString(StringRef, roundingMode)
Definition APFloat.cpp:5201
LLVM_ABI bool isSmallest() const
Definition APFloat.cpp:5614
LLVM_ABI opStatus subtract(const DoubleAPFloat &RHS, roundingMode RM)
Definition APFloat.cpp:4846
LLVM_ABI friend hash_code hash_value(const DoubleAPFloat &Arg)
Definition APFloat.cpp:5185
LLVM_ABI cmpResult compareAbsoluteValue(const DoubleAPFloat &RHS) const
Definition APFloat.cpp:5070
LLVM_ABI bool isDenormal() const
Definition APFloat.cpp:5607
LLVM_ABI opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition APFloat.cpp:5418
LLVM_ABI void makeSmallest(bool Neg)
Definition APFloat.cpp:5150
LLVM_ABI friend int ilogb(const DoubleAPFloat &X)
Definition APFloat.cpp:5664
LLVM_ABI opStatus next(bool nextDown)
Definition APFloat.cpp:5217
LLVM_ABI void makeInf(bool Neg)
Definition APFloat.cpp:5129
LLVM_ABI bool isInteger() const
Definition APFloat.cpp:5639
LLVM_ABI void makeZero(bool Neg)
Definition APFloat.cpp:5134
LLVM_ABI opStatus divide(const DoubleAPFloat &RHS, roundingMode RM)
Definition APFloat.cpp:4940
LLVM_ABI bool isSmallestNormalized() const
Definition APFloat.cpp:5622
LLVM_ABI opStatus mod(const DoubleAPFloat &RHS)
Definition APFloat.cpp:4961
LLVM_ABI DoubleAPFloat(const fltSemantics &S)
Definition APFloat.cpp:4634
LLVM_ABI void toString(SmallVectorImpl< char > &Str, unsigned FormatPrecision, unsigned FormatMaxPadding, bool TruncateZero=true) const
Definition APFloat.cpp:5645
LLVM_ABI void makeLargest(bool Neg)
Definition APFloat.cpp:5139
LLVM_ABI cmpResult compare(const DoubleAPFloat &RHS) const
Definition APFloat.cpp:5172
LLVM_ABI friend DoubleAPFloat scalbn(const DoubleAPFloat &X, int Exp, roundingMode)
LLVM_ABI opStatus roundToIntegral(roundingMode RM)
Definition APFloat.cpp:4987
LLVM_ABI opStatus fusedMultiplyAdd(const DoubleAPFloat &Multiplicand, const DoubleAPFloat &Addend, roundingMode RM)
Definition APFloat.cpp:4972
LLVM_ABI APInt getNaNPayload() const
Definition APFloat.cpp:5793
LLVM_ABI unsigned int convertToHexString(char *DST, unsigned int HexDigits, bool UpperCase, roundingMode RM) const
Definition APFloat.cpp:5597
LLVM_ABI bool isNegative() const
Definition APFloat.cpp:5127
LLVM_ABI opStatus add(const DoubleAPFloat &RHS, roundingMode RM)
Definition APFloat.cpp:4841
LLVM_ABI void makeNaN(bool SNaN, bool Neg, const APInt *fill)
Definition APFloat.cpp:5167
LLVM_ABI unsigned int convertToHexString(char *dst, unsigned int hexDigits, bool upperCase, roundingMode) const
Write out a hexadecimal representation of the floating point value to DST, which must be of sufficien...
Definition APFloat.cpp:3191
LLVM_ABI cmpResult compareAbsoluteValue(const IEEEFloat &) const
Definition APFloat.cpp:1440
LLVM_ABI opStatus mod(const IEEEFloat &)
C fmod, or llvm frem.
Definition APFloat.cpp:2196
fltCategory getCategory() const
Definition APFloat.h:583
LLVM_ABI opStatus convertFromAPInt(const APInt &, bool, roundingMode)
Definition APFloat.cpp:2751
LLVM_ABI APInt getNaNPayload() const
Definition APFloat.cpp:4522
bool isFiniteNonZero() const
Definition APFloat.h:586
bool needsCleanup() const
Returns whether this instance allocated memory.
Definition APFloat.h:473
LLVM_ABI void makeLargest(bool Neg=false)
Make this number the largest magnitude normal number in the given semantics.
Definition APFloat.cpp:3949
LLVM_ABI LLVM_READONLY int getExactLog2Abs() const
Definition APFloat.cpp:4344
LLVM_ABI APInt bitcastToAPInt() const
Definition APFloat.cpp:3587
LLVM_ABI friend IEEEFloat scalbn(IEEEFloat X, int Exp, roundingMode)
Definition APFloat.cpp:4594
LLVM_ABI cmpResult compare(const IEEEFloat &) const
IEEE comparison with another floating point number (NaNs compare unordered, 0==-0).
Definition APFloat.cpp:2364
bool isNegative() const
IEEE-754R isSignMinus: Returns true if and only if the current value is negative.
Definition APFloat.h:548
LLVM_ABI opStatus divide(const IEEEFloat &, roundingMode)
Definition APFloat.cpp:2070
bool isNaN() const
Returns true if and only if the float is a quiet or signaling NaN.
Definition APFloat.h:573
LLVM_ABI opStatus remainder(const IEEEFloat &)
IEEE remainder.
Definition APFloat.cpp:2088
LLVM_ABI double convertToDouble() const
Definition APFloat.cpp:3657
LLVM_ABI float convertToFloat() const
Definition APFloat.cpp:3650
LLVM_ABI opStatus subtract(const IEEEFloat &, roundingMode)
Definition APFloat.cpp:2046
LLVM_ABI void toString(SmallVectorImpl< char > &Str, unsigned FormatPrecision=0, unsigned FormatMaxPadding=3, bool TruncateZero=true) const
Converts this value into a decimal string.
Definition APFloat.cpp:4300
LLVM_ABI void makeSmallest(bool Neg=false)
Make this number the smallest magnitude denormal number in the given semantics.
Definition APFloat.cpp:3981
LLVM_ABI void makeInf(bool Neg=false)
Definition APFloat.cpp:4541
LLVM_ABI bool isSmallestNormalized() const
Returns true if this is the smallest (by magnitude) normalized finite number in the given semantics.
Definition APFloat.cpp:961
LLVM_ABI void makeQuiet()
Definition APFloat.cpp:4570
LLVM_ABI bool isLargest() const
Returns true if and only if the number has the largest possible finite magnitude in the current seman...
Definition APFloat.cpp:1063
LLVM_ABI opStatus add(const IEEEFloat &, roundingMode)
Definition APFloat.cpp:2040
bool isFinite() const
Returns true if and only if the current value is zero, subnormal, or normal.
Definition APFloat.h:560
LLVM_ABI Expected< opStatus > convertFromString(StringRef, roundingMode)
Definition APFloat.cpp:3134
LLVM_ABI void makeNaN(bool SNaN=false, bool Neg=false, const APInt *fill=nullptr)
Definition APFloat.cpp:849
LLVM_ABI opStatus multiply(const IEEEFloat &, roundingMode)
Definition APFloat.cpp:2052
LLVM_ABI opStatus roundToIntegral(roundingMode)
Definition APFloat.cpp:2279
LLVM_ABI IEEEFloat & operator=(const IEEEFloat &)
Definition APFloat.cpp:921
LLVM_ABI bool bitwiseIsEqual(const IEEEFloat &) const
Bitwise comparison for equality (QNaNs compare equal, 0!=-0).
Definition APFloat.cpp:1088
LLVM_ABI void makeSmallestNormalized(bool Negative=false)
Returns the smallest (by magnitude) normalized finite number in the given semantics.
Definition APFloat.cpp:3995
LLVM_ABI bool isInteger() const
Returns true if and only if the number is an exact integer.
Definition APFloat.cpp:1080
LLVM_ABI IEEEFloat(const fltSemantics &)
Definition APFloat.cpp:1115
LLVM_ABI opStatus fusedMultiplyAdd(const IEEEFloat &, const IEEEFloat &, roundingMode)
Definition APFloat.cpp:2233
LLVM_ABI friend int ilogb(const IEEEFloat &Arg)
Definition APFloat.cpp:4576
LLVM_ABI opStatus next(bool nextDown)
IEEE-754R 5.3.1: nextUp/nextDown.
Definition APFloat.cpp:4389
bool isInfinity() const
IEEE-754R isInfinite(): Returns true if and only if the float is infinity.
Definition APFloat.h:570
const fltSemantics & getSemantics() const
Definition APFloat.h:584
bool isZero() const
Returns true if and only if the float is plus or minus zero.
Definition APFloat.h:563
LLVM_ABI bool isSignaling() const
Returns true if and only if the float is a signaling NaN.
Definition APFloat.cpp:4373
LLVM_ABI void makeZero(bool Neg=false)
Definition APFloat.cpp:4556
LLVM_ABI opStatus convert(const fltSemantics &, roundingMode, bool *)
IEEEFloat::convert - convert a value of one floating point type to another.
Definition APFloat.cpp:2440
LLVM_ABI void changeSign()
Definition APFloat.cpp:1998
LLVM_ABI bool isDenormal() const
IEEE-754R isSubnormal(): Returns true if and only if the float is a denormal.
Definition APFloat.cpp:946
LLVM_ABI opStatus convertToInteger(MutableArrayRef< integerPart >, unsigned int, bool, roundingMode, bool *) const
Definition APFloat.cpp:2696
LLVM_ABI bool isSmallest() const
Returns true if and only if the number has the smallest possible non-zero magnitude in the current se...
Definition APFloat.cpp:953
An opaque object representing a hash code.
Definition Hashing.h:77
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
static constexpr opStatus opInexact
Definition APFloat.h:449
LLVM_ABI SlowDynamicAPInt abs(const SlowDynamicAPInt &X)
Redeclarations of friend declarations above to make it discoverable by lookups.
static constexpr fltCategory fcNaN
Definition APFloat.h:451
static constexpr opStatus opDivByZero
Definition APFloat.h:446
static constexpr opStatus opOverflow
Definition APFloat.h:447
static constexpr cmpResult cmpLessThan
Definition APFloat.h:441
const char unit< Period >::value[]
Definition Chrono.h:104
static void tcSetLeastSignificantBits(APInt::WordType *dst, unsigned parts, unsigned bits)
Definition APFloat.cpp:1463
static constexpr roundingMode rmTowardPositive
Definition APFloat.h:437
static constexpr uninitializedTag uninitialized
Definition APFloat.h:431
static constexpr fltCategory fcZero
Definition APFloat.h:453
static constexpr opStatus opOK
Definition APFloat.h:444
static constexpr cmpResult cmpGreaterThan
Definition APFloat.h:442
static constexpr unsigned integerPartWidth
Definition APFloat.h:439
LLVM_ABI hash_code hash_value(const IEEEFloat &Arg)
Definition APFloat.cpp:3331
APFloatBase::ExponentType ExponentType
Definition APFloat.h:430
static constexpr fltCategory fcNormal
Definition APFloat.h:452
static constexpr opStatus opInvalidOp
Definition APFloat.h:445
APFloatBase::opStatus opStatus
Definition APFloat.h:427
LLVM_ABI IEEEFloat frexp(const IEEEFloat &Val, int &Exp, roundingMode RM)
Definition APFloat.cpp:4615
APFloatBase::uninitializedTag uninitializedTag
Definition APFloat.h:425
static constexpr cmpResult cmpUnordered
Definition APFloat.h:443
static constexpr roundingMode rmTowardNegative
Definition APFloat.h:436
APFloatBase::roundingMode roundingMode
Definition APFloat.h:426
APFloatBase::cmpResult cmpResult
Definition APFloat.h:428
static constexpr fltCategory fcInfinity
Definition APFloat.h:450
static constexpr roundingMode rmNearestTiesToAway
Definition APFloat.h:434
static constexpr roundingMode rmTowardZero
Definition APFloat.h:438
static constexpr opStatus opUnderflow
Definition APFloat.h:448
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:432
LLVM_ABI int ilogb(const IEEEFloat &Arg)
Definition APFloat.cpp:4576
static constexpr cmpResult cmpEqual
Definition APFloat.h:440
LLVM_ABI IEEEFloat scalbn(IEEEFloat X, int Exp, roundingMode)
Definition APFloat.cpp:4594
static std::pair< APFloat, APFloat > fastTwoSum(APFloat X, APFloat Y)
Definition APFloat.cpp:4704
APFloatBase::integerPart integerPart
Definition APFloat.h:424
LLVM_ABI std::error_code status(const Twine &path, file_status &result, bool follow=true)
Get file status as if by POSIX stat().
This is an optimization pass for GlobalISel generic memory operations.
static unsigned int partAsHex(char *dst, APFloatBase::integerPart part, unsigned int count, const char *hexDigitChars)
Definition APFloat.cpp:746
void fill(R &&Range, T &&Value)
Provide wrappers to std::fill which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1758
static const char infinityL[]
Definition APFloat.cpp:737
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
static constexpr unsigned int partCountForBits(unsigned int bits)
Definition APFloat.cpp:324
static const char NaNU[]
Definition APFloat.cpp:740
static unsigned int HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
Definition APFloat.cpp:622
static unsigned int powerOf5(APFloatBase::integerPart *dst, unsigned int power)
Definition APFloat.cpp:681
unsigned hexDigitValue(char C)
Interpret the given character C as a hexadecimal digit and return its value.
static APFloat harrisonUlp(const APFloat &X)
Definition APFloat.cpp:793
static constexpr APFloatBase::ExponentType exponentZero(const fltSemantics &semantics)
Definition APFloat.cpp:298
static Expected< int > totalExponent(StringRef::iterator p, StringRef::iterator end, int exponentAdjustment)
Definition APFloat.cpp:381
LLVM_ABI std::error_code inconvertibleErrorCode()
The value returned by this function can be returned from convertToErrorCode for Error values where no...
Definition Error.cpp:94
const unsigned int maxPowerOfFiveExponent
Definition APFloat.cpp:224
int ilogb(const APFloat &Arg)
Returns the exponent of the internal representation of the APFloat.
Definition APFloat.h:1637
static char * writeUnsignedDecimal(char *dst, unsigned int n)
Definition APFloat.cpp:763
constexpr auto equal_to(T &&Arg)
Functor variant of std::equal_to that can be used as a UnaryPredicate in functional algorithms like a...
Definition STLExtras.h:2172
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition bit.h:156
const unsigned int maxPrecision
Definition APFloat.cpp:223
APFloat frexp(const APFloat &X, int &Exp, APFloat::roundingMode RM)
Equivalent of C standard library function.
Definition APFloat.h:1658
static const char NaNL[]
Definition APFloat.cpp:739
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:204
static const char infinityU[]
Definition APFloat.cpp:738
lostFraction
Enum that represents what fraction of the LSB truncated bits of an fp number represent.
Definition APFloat.h:51
@ lfMoreThanHalf
Definition APFloat.h:55
@ lfLessThanHalf
Definition APFloat.h:53
@ lfExactlyHalf
Definition APFloat.h:54
@ lfExactlyZero
Definition APFloat.h:52
static Error interpretDecimal(StringRef::iterator begin, StringRef::iterator end, decimalInfo *D)
Definition APFloat.cpp:471
LLVM_READONLY LLVM_ABI std::optional< APFloat > exp(const APFloat &X, RoundingMode RM=APFloat::rmNearestTiesToEven, APFloat::opStatus *Status=nullptr)
Implement IEEE 754-2019 exp functions.
Definition APFloat.cpp:6106
LLVM_ABI bool isFinite(const Loop *L)
Return true if this loop can be assumed to run for a finite number of iterations.
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
const unsigned int maxPowerOfFiveParts
Definition APFloat.cpp:225
APFloat scalbn(APFloat X, int Exp, APFloat::roundingMode RM)
Returns: X * 2^Exp for integral exponents.
Definition APFloat.h:1646
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
static constexpr APFloatBase::ExponentType exponentNaN(const fltSemantics &semantics)
Definition APFloat.cpp:308
static Error createError(const Twine &Err)
Definition APFloat.cpp:320
static lostFraction shiftRight(APFloatBase::integerPart *dst, unsigned int parts, unsigned int bits)
Definition APFloat.cpp:590
Error make_error(ArgTs &&... Args)
Make a Error instance representing failure using the given error info type.
Definition Error.h:340
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:74
static const char hexDigitsUpper[]
Definition APFloat.cpp:736
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:221
const unsigned int maxExponent
Definition APFloat.cpp:222
static unsigned int decDigitValue(unsigned int c)
Definition APFloat.cpp:331
fltNonfiniteBehavior
Definition APFloat.h:953
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:2011
static lostFraction combineLostFractions(lostFraction moreSignificant, lostFraction lessSignificant)
Definition APFloat.cpp:601
static Expected< StringRef::iterator > skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end, StringRef::iterator *dot)
Definition APFloat.cpp:431
RoundingMode
Rounding mode.
ArrayRef(const T &OneElt) -> ArrayRef< T >
static constexpr APFloatBase::ExponentType exponentInf(const fltSemantics &semantics)
Definition APFloat.cpp:303
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1946
static lostFraction lostFractionThroughTruncation(const APFloatBase::integerPart *parts, unsigned int partCount, unsigned int bits)
Definition APFloat.cpp:570
APFloat neg(APFloat X)
Returns the negated value of the argument.
Definition APFloat.h:1672
static APFloatBase::integerPart ulpsFromBoundary(const APFloatBase::integerPart *parts, unsigned int bits, bool isNearest)
Definition APFloat.cpp:636
static char * writeSignedDecimal(char *dst, int value)
Definition APFloat.cpp:779
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
Definition Hashing.h:305
static Expected< lostFraction > trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end, unsigned int digitValue)
Definition APFloat.cpp:541
void consumeError(Error Err)
Consume a Error without doing anything.
Definition Error.h:1106
static Expected< int > readExponent(StringRef::iterator begin, StringRef::iterator end)
Definition APFloat.cpp:341
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
Definition Hashing.h:285
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition MathExtras.h:373
static const char hexDigitsLower[]
Definition APFloat.cpp:735
#define N
const char * lastSigDigit
Definition APFloat.cpp:466
const char * firstSigDigit
Definition APFloat.cpp:465
APFloatBase::ExponentType maxExponent
Definition APFloat.h:1001
fltNonfiniteBehavior nonFiniteBehavior
Definition APFloat.h:1014
APFloatBase::ExponentType minExponent
Definition APFloat.h:1005
unsigned int sizeInBits
Definition APFloat.h:1012
unsigned int precision
Definition APFloat.h:1009
fltNanEncoding nanEncoding
Definition APFloat.h:1016