LLVM 23.0.0git
APFloat.cpp
Go to the documentation of this file.
1//===-- APFloat.cpp - Implement APFloat class -----------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements a class to represent arbitrary precision floating
10// point values and provide a variety of arithmetic operations on them.
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/ADT/APFloat.h"
15#include "llvm/ADT/APSInt.h"
16#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/FoldingSet.h"
19#include "llvm/ADT/Hashing.h"
20#include "llvm/ADT/STLExtras.h"
22#include "llvm/ADT/StringRef.h"
24#include "llvm/Config/llvm-config.h"
25#include "llvm/Support/Debug.h"
26#include "llvm/Support/Error.h"
29#include <cstring>
30#include <limits.h>
31
32#define APFLOAT_DISPATCH_ON_SEMANTICS(METHOD_CALL) \
33 do { \
34 if (usesLayout<IEEEFloat>(getSemantics())) \
35 return U.IEEE.METHOD_CALL; \
36 if (usesLayout<DoubleAPFloat>(getSemantics())) \
37 return U.Double.METHOD_CALL; \
38 llvm_unreachable("Unexpected semantics"); \
39 } while (false)
40
41using namespace llvm;
42
43/// A macro used to combine two fcCategory enums into one key which can be used
44/// in a switch statement to classify how the interaction of two APFloat's
45/// categories affects an operation.
46///
47/// TODO: If clang source code is ever allowed to use constexpr in its own
48/// codebase, change this into a static inline function.
49#define PackCategoriesIntoKey(_lhs, _rhs) ((_lhs) * 4 + (_rhs))
50
51/* Assumed in hexadecimal significand parsing, and conversion to
52 hexadecimal strings. */
53static_assert(APFloatBase::integerPartWidth % 4 == 0, "Part width must be divisible by 4!");
54
55namespace llvm {
56
57constexpr fltSemantics APFloatBase::semIEEEhalf = {15, -14, 11, 16};
58constexpr fltSemantics APFloatBase::semBFloat = {127, -126, 8, 16};
59constexpr fltSemantics APFloatBase::semIEEEsingle = {127, -126, 24, 32};
60constexpr fltSemantics APFloatBase::semIEEEdouble = {1023, -1022, 53, 64};
61constexpr fltSemantics APFloatBase::semIEEEquad = {16383, -16382, 113, 128};
62constexpr fltSemantics APFloatBase::semFloat8E5M2 = {15, -14, 3, 8};
63constexpr fltSemantics APFloatBase::semFloat8E5M2FNUZ = {
65constexpr fltSemantics APFloatBase::semFloat8E4M3 = {7, -6, 4, 8};
66constexpr fltSemantics APFloatBase::semFloat8E4M3FN = {
68constexpr fltSemantics APFloatBase::semFloat8E4M3FNUZ = {
70constexpr fltSemantics APFloatBase::semFloat8E4M3B11FNUZ = {
72constexpr fltSemantics APFloatBase::semFloat8E3M4 = {3, -2, 5, 8};
73constexpr fltSemantics APFloatBase::semFloatTF32 = {127, -126, 11, 19};
74constexpr fltSemantics APFloatBase::semFloat8E8M0FNU = {
75 127,
76 -127,
77 1,
78 8,
81 false,
82 false,
83 false};
84
85constexpr fltSemantics APFloatBase::semFloat6E3M2FN = {
87constexpr fltSemantics APFloatBase::semFloat6E2M3FN = {
89constexpr fltSemantics APFloatBase::semFloat4E2M1FN = {
91constexpr fltSemantics APFloatBase::semX87DoubleExtended = {16383, -16382, 64,
92 80};
93constexpr fltSemantics APFloatBase::semBogus = {0, 0, 0, 0};
94constexpr fltSemantics APFloatBase::semPPCDoubleDouble = {-1, 0, 0, 128};
95constexpr fltSemantics APFloatBase::semPPCDoubleDoubleLegacy = {
96 1023, -1022 + 53, 53 + 53, 128};
97
99 switch (S) {
100 case S_IEEEhalf:
101 return IEEEhalf();
102 case S_BFloat:
103 return BFloat();
104 case S_IEEEsingle:
105 return IEEEsingle();
106 case S_IEEEdouble:
107 return IEEEdouble();
108 case S_IEEEquad:
109 return IEEEquad();
111 return PPCDoubleDouble();
113 return PPCDoubleDoubleLegacy();
114 case S_Float8E5M2:
115 return Float8E5M2();
116 case S_Float8E5M2FNUZ:
117 return Float8E5M2FNUZ();
118 case S_Float8E4M3:
119 return Float8E4M3();
120 case S_Float8E4M3FN:
121 return Float8E4M3FN();
122 case S_Float8E4M3FNUZ:
123 return Float8E4M3FNUZ();
125 return Float8E4M3B11FNUZ();
126 case S_Float8E3M4:
127 return Float8E3M4();
128 case S_FloatTF32:
129 return FloatTF32();
130 case S_Float8E8M0FNU:
131 return Float8E8M0FNU();
132 case S_Float6E3M2FN:
133 return Float6E3M2FN();
134 case S_Float6E2M3FN:
135 return Float6E2M3FN();
136 case S_Float4E2M1FN:
137 return Float4E2M1FN();
139 return x87DoubleExtended();
140 }
141 llvm_unreachable("Unrecognised floating semantics");
142}
143
146 if (&Sem == &llvm::APFloat::IEEEhalf())
147 return S_IEEEhalf;
148 else if (&Sem == &llvm::APFloat::BFloat())
149 return S_BFloat;
150 else if (&Sem == &llvm::APFloat::IEEEsingle())
151 return S_IEEEsingle;
152 else if (&Sem == &llvm::APFloat::IEEEdouble())
153 return S_IEEEdouble;
154 else if (&Sem == &llvm::APFloat::IEEEquad())
155 return S_IEEEquad;
156 else if (&Sem == &llvm::APFloat::PPCDoubleDouble())
157 return S_PPCDoubleDouble;
158 else if (&Sem == &llvm::APFloat::PPCDoubleDoubleLegacy())
160 else if (&Sem == &llvm::APFloat::Float8E5M2())
161 return S_Float8E5M2;
162 else if (&Sem == &llvm::APFloat::Float8E5M2FNUZ())
163 return S_Float8E5M2FNUZ;
164 else if (&Sem == &llvm::APFloat::Float8E4M3())
165 return S_Float8E4M3;
166 else if (&Sem == &llvm::APFloat::Float8E4M3FN())
167 return S_Float8E4M3FN;
168 else if (&Sem == &llvm::APFloat::Float8E4M3FNUZ())
169 return S_Float8E4M3FNUZ;
170 else if (&Sem == &llvm::APFloat::Float8E4M3B11FNUZ())
171 return S_Float8E4M3B11FNUZ;
172 else if (&Sem == &llvm::APFloat::Float8E3M4())
173 return S_Float8E3M4;
174 else if (&Sem == &llvm::APFloat::FloatTF32())
175 return S_FloatTF32;
176 else if (&Sem == &llvm::APFloat::Float8E8M0FNU())
177 return S_Float8E8M0FNU;
178 else if (&Sem == &llvm::APFloat::Float6E3M2FN())
179 return S_Float6E3M2FN;
180 else if (&Sem == &llvm::APFloat::Float6E2M3FN())
181 return S_Float6E2M3FN;
182 else if (&Sem == &llvm::APFloat::Float4E2M1FN())
183 return S_Float4E2M1FN;
184 else if (&Sem == &llvm::APFloat::x87DoubleExtended())
185 return S_x87DoubleExtended;
186 else
187 llvm_unreachable("Unknown floating semantics");
188}
189
191 const fltSemantics &B) {
192 return A.maxExponent <= B.maxExponent && A.minExponent >= B.minExponent &&
193 A.precision <= B.precision;
194}
195
196/* A tight upper bound on number of parts required to hold the value
197 pow(5, power) is
198
199 power * 815 / (351 * integerPartWidth) + 1
200
201 However, whilst the result may require only this many parts,
202 because we are multiplying two values to get it, the
203 multiplication may require an extra part with the excess part
204 being zero (consider the trivial case of 1 * 1, tcFullMultiply
205 requires two parts to hold the single-part result). So we add an
206 extra one to guarantee enough space whilst multiplying. */
207const unsigned int maxExponent = 16383;
208const unsigned int maxPrecision = 113;
210const unsigned int maxPowerOfFiveParts =
211 2 +
213
214unsigned int APFloatBase::semanticsPrecision(const fltSemantics &semantics) {
215 return semantics.precision;
216}
219 return semantics.maxExponent;
220}
223 return semantics.minExponent;
224}
225unsigned int APFloatBase::semanticsSizeInBits(const fltSemantics &semantics) {
226 return semantics.sizeInBits;
227}
229 bool isSigned) {
230 // The max FP value is pow(2, MaxExponent) * (1 + MaxFraction), so we need
231 // at least one more bit than the MaxExponent to hold the max FP value.
232 unsigned int MinBitWidth = semanticsMaxExponent(semantics) + 1;
233 // Extra sign bit needed.
234 if (isSigned)
235 ++MinBitWidth;
236 return MinBitWidth;
237}
238
240 return semantics.hasZero;
241}
242
244 return semantics.hasSignedRepr;
245}
246
250
254
256 // Keep in sync with Type::isIEEELikeFPTy
257 return SemanticsToEnum(semantics) <= S_IEEEquad;
258}
259
261 return semantics.hasSignBitInMSB;
262}
263
265 const fltSemantics &Dst) {
266 // Exponent range must be larger.
267 if (Src.maxExponent >= Dst.maxExponent || Src.minExponent <= Dst.minExponent)
268 return false;
269
270 // If the mantissa is long enough, the result value could still be denormal
271 // with a larger exponent range.
272 //
273 // FIXME: This condition is probably not accurate but also shouldn't be a
274 // practical concern with existing types.
275 return Dst.precision >= Src.precision;
276}
277
279 return Sem.sizeInBits;
280}
281
282static constexpr APFloatBase::ExponentType
283exponentZero(const fltSemantics &semantics) {
284 return semantics.minExponent - 1;
285}
286
287static constexpr APFloatBase::ExponentType
288exponentInf(const fltSemantics &semantics) {
289 return semantics.maxExponent + 1;
290}
291
292static constexpr APFloatBase::ExponentType
293exponentNaN(const fltSemantics &semantics) {
296 return exponentZero(semantics);
297 if (semantics.hasSignedRepr)
298 return semantics.maxExponent;
299 }
300 return semantics.maxExponent + 1;
301}
302
303/* A bunch of private, handy routines. */
304
305static inline Error createError(const Twine &Err) {
307}
308
309static constexpr inline unsigned int partCountForBits(unsigned int bits) {
310 return std::max(1u, (bits + APFloatBase::integerPartWidth - 1) /
312}
313
314/* Returns 0U-9U. Return values >= 10U are not digits. */
315static inline unsigned int
316decDigitValue(unsigned int c)
317{
318 return c - '0';
319}
320
321/* Return the value of a decimal exponent of the form
322 [+-]ddddddd.
323
324 If the exponent overflows, returns a large exponent with the
325 appropriate sign. */
328 const unsigned int overlargeExponent = 24000; /* FIXME. */
329 StringRef::iterator p = begin;
330
331 // Treat no exponent as 0 to match binutils
332 if (p == end || ((*p == '-' || *p == '+') && (p + 1) == end))
333 return 0;
334
335 bool isNegative = *p == '-';
336 if (*p == '-' || *p == '+') {
337 p++;
338 if (p == end)
339 return createError("Exponent has no digits");
340 }
341
342 unsigned absExponent = decDigitValue(*p++);
343 if (absExponent >= 10U)
344 return createError("Invalid character in exponent");
345
346 for (; p != end; ++p) {
347 unsigned value = decDigitValue(*p);
348 if (value >= 10U)
349 return createError("Invalid character in exponent");
350
351 absExponent = absExponent * 10U + value;
352 if (absExponent >= overlargeExponent) {
353 absExponent = overlargeExponent;
354 break;
355 }
356 }
357
358 if (isNegative)
359 return -(int) absExponent;
360 else
361 return (int) absExponent;
362}
363
364/* This is ugly and needs cleaning up, but I don't immediately see
365 how whilst remaining safe. */
368 int exponentAdjustment) {
369 int exponent = 0;
370
371 if (p == end)
372 return createError("Exponent has no digits");
373
374 bool negative = *p == '-';
375 if (*p == '-' || *p == '+') {
376 p++;
377 if (p == end)
378 return createError("Exponent has no digits");
379 }
380
381 int unsignedExponent = 0;
382 bool overflow = false;
383 for (; p != end; ++p) {
384 unsigned int value;
385
386 value = decDigitValue(*p);
387 if (value >= 10U)
388 return createError("Invalid character in exponent");
389
390 unsignedExponent = unsignedExponent * 10 + value;
391 if (unsignedExponent > 32767) {
392 overflow = true;
393 break;
394 }
395 }
396
397 if (exponentAdjustment > 32767 || exponentAdjustment < -32768)
398 overflow = true;
399
400 if (!overflow) {
401 exponent = unsignedExponent;
402 if (negative)
403 exponent = -exponent;
404 exponent += exponentAdjustment;
405 if (exponent > 32767 || exponent < -32768)
406 overflow = true;
407 }
408
409 if (overflow)
410 exponent = negative ? -32768: 32767;
411
412 return exponent;
413}
414
417 StringRef::iterator *dot) {
418 StringRef::iterator p = begin;
419 *dot = end;
420 while (p != end && *p == '0')
421 p++;
422
423 if (p != end && *p == '.') {
424 *dot = p++;
425
426 if (end - begin == 1)
427 return createError("Significand has no digits");
428
429 while (p != end && *p == '0')
430 p++;
431 }
432
433 return p;
434}
435
436/* Given a normal decimal floating point number of the form
437
438 dddd.dddd[eE][+-]ddd
439
440 where the decimal point and exponent are optional, fill out the
441 structure D. Exponent is appropriate if the significand is
442 treated as an integer, and normalizedExponent if the significand
443 is taken to have the decimal point after a single leading
444 non-zero digit.
445
446 If the value is zero, V->firstSigDigit points to a non-digit, and
447 the return exponent is zero.
448*/
450 const char *firstSigDigit;
451 const char *lastSigDigit;
454};
455
458 StringRef::iterator dot = end;
459
460 auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot);
461 if (!PtrOrErr)
462 return PtrOrErr.takeError();
463 StringRef::iterator p = *PtrOrErr;
464
465 D->firstSigDigit = p;
466 D->exponent = 0;
467 D->normalizedExponent = 0;
468
469 for (; p != end; ++p) {
470 if (*p == '.') {
471 if (dot != end)
472 return createError("String contains multiple dots");
473 dot = p++;
474 if (p == end)
475 break;
476 }
477 if (decDigitValue(*p) >= 10U)
478 break;
479 }
480
481 if (p != end) {
482 if (*p != 'e' && *p != 'E')
483 return createError("Invalid character in significand");
484 if (p == begin)
485 return createError("Significand has no digits");
486 if (dot != end && p - begin == 1)
487 return createError("Significand has no digits");
488
489 /* p points to the first non-digit in the string */
490 auto ExpOrErr = readExponent(p + 1, end);
491 if (!ExpOrErr)
492 return ExpOrErr.takeError();
493 D->exponent = *ExpOrErr;
494
495 /* Implied decimal point? */
496 if (dot == end)
497 dot = p;
498 }
499
500 /* If number is all zeroes accept any exponent. */
501 if (p != D->firstSigDigit) {
502 /* Drop insignificant trailing zeroes. */
503 if (p != begin) {
504 do
505 do
506 p--;
507 while (p != begin && *p == '0');
508 while (p != begin && *p == '.');
509 }
510
511 /* Adjust the exponents for any decimal point. */
512 D->exponent += static_cast<APFloat::ExponentType>((dot - p) - (dot > p));
513 D->normalizedExponent = (D->exponent +
514 static_cast<APFloat::ExponentType>((p - D->firstSigDigit)
515 - (dot > D->firstSigDigit && dot < p)));
516 }
517
518 D->lastSigDigit = p;
519 return Error::success();
520}
521
522/* Return the trailing fraction of a hexadecimal number.
523 DIGITVALUE is the first hex digit of the fraction, P points to
524 the next digit. */
527 unsigned int digitValue) {
528 /* If the first trailing digit isn't 0 or 8 we can work out the
529 fraction immediately. */
530 if (digitValue > 8)
531 return lfMoreThanHalf;
532 else if (digitValue < 8 && digitValue > 0)
533 return lfLessThanHalf;
534
535 // Otherwise we need to find the first non-zero digit.
536 while (p != end && (*p == '0' || *p == '.'))
537 p++;
538
539 if (p == end)
540 return createError("Invalid trailing hexadecimal fraction!");
541
542 unsigned hexDigit = hexDigitValue(*p);
543
544 /* If we ran off the end it is exactly zero or one-half, otherwise
545 a little more. */
546 if (hexDigit == UINT_MAX)
547 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
548 else
549 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
550}
551
552/* Return the fraction lost were a bignum truncated losing the least
553 significant BITS bits. */
554static lostFraction
556 unsigned int partCount,
557 unsigned int bits)
558{
559 unsigned lsb = APInt::tcLSB(parts, partCount);
560
561 /* Note this is guaranteed true if bits == 0, or LSB == UINT_MAX. */
562 if (bits <= lsb)
563 return lfExactlyZero;
564 if (bits == lsb + 1)
565 return lfExactlyHalf;
566 if (bits <= partCount * APFloatBase::integerPartWidth &&
567 APInt::tcExtractBit(parts, bits - 1))
568 return lfMoreThanHalf;
569
570 return lfLessThanHalf;
571}
572
573/* Shift DST right BITS bits noting lost fraction. */
574static lostFraction
575shiftRight(APFloatBase::integerPart *dst, unsigned int parts, unsigned int bits)
576{
577 lostFraction lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
578
579 APInt::tcShiftRight(dst, parts, bits);
580
581 return lost_fraction;
582}
583
584/* Combine the effect of two lost fractions. */
585static lostFraction
587 lostFraction lessSignificant)
588{
589 if (lessSignificant != lfExactlyZero) {
590 if (moreSignificant == lfExactlyZero)
591 moreSignificant = lfLessThanHalf;
592 else if (moreSignificant == lfExactlyHalf)
593 moreSignificant = lfMoreThanHalf;
594 }
595
596 return moreSignificant;
597}
598
599/* The error from the true value, in half-ulps, on multiplying two
600 floating point numbers, which differ from the value they
601 approximate by at most HUE1 and HUE2 half-ulps, is strictly less
602 than the returned value.
603
604 See "How to Read Floating Point Numbers Accurately" by William D
605 Clinger. */
606static unsigned int
607HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
608{
609 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
610
611 if (HUerr1 + HUerr2 == 0)
612 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */
613 else
614 return inexactMultiply + 2 * (HUerr1 + HUerr2);
615}
616
617/* The number of ulps from the boundary (zero, or half if ISNEAREST)
618 when the least significant BITS are truncated. BITS cannot be
619 zero. */
621ulpsFromBoundary(const APFloatBase::integerPart *parts, unsigned int bits,
622 bool isNearest) {
623 assert(bits != 0);
624
625 bits--;
626 unsigned count = bits / APFloatBase::integerPartWidth;
627 unsigned partBits = bits % APFloatBase::integerPartWidth + 1;
628
630 parts[count] & (~(APFloatBase::integerPart)0 >>
631 (APFloatBase::integerPartWidth - partBits));
632
634 if (isNearest)
635 boundary = (APFloatBase::integerPart) 1 << (partBits - 1);
636 else
637 boundary = 0;
638
639 if (count == 0) {
640 if (part - boundary <= boundary - part)
641 return part - boundary;
642 else
643 return boundary - part;
644 }
645
646 if (part == boundary) {
647 while (--count)
648 if (parts[count])
649 return ~(APFloatBase::integerPart) 0; /* A lot. */
650
651 return parts[0];
652 } else if (part == boundary - 1) {
653 while (--count)
654 if (~parts[count])
655 return ~(APFloatBase::integerPart) 0; /* A lot. */
656
657 return -parts[0];
658 }
659
660 return ~(APFloatBase::integerPart) 0; /* A lot. */
661}
662
663/* Place pow(5, power) in DST, and return the number of parts used.
664 DST must be at least one part larger than size of the answer. */
665static unsigned int
666powerOf5(APFloatBase::integerPart *dst, unsigned int power) {
667 static const APFloatBase::integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125, 15625, 78125 };
669 pow5s[0] = 78125 * 5;
670
671 unsigned int partsCount = 1;
672 APFloatBase::integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
673 assert(power <= maxExponent);
674
675 p1 = dst;
676 p2 = scratch;
677
678 *p1 = firstEightPowers[power & 7];
679 power >>= 3;
680
681 unsigned result = 1;
682 pow5 = pow5s;
683
684 for (unsigned int n = 0; power; power >>= 1, n++) {
685 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */
686 if (n != 0) {
687 APInt::tcFullMultiply(pow5, pow5 - partsCount, pow5 - partsCount,
688 partsCount, partsCount);
689 partsCount *= 2;
690 if (pow5[partsCount - 1] == 0)
691 partsCount--;
692 }
693
694 if (power & 1) {
696
697 APInt::tcFullMultiply(p2, p1, pow5, result, partsCount);
698 result += partsCount;
699 if (p2[result - 1] == 0)
700 result--;
701
702 /* Now result is in p1 with partsCount parts and p2 is scratch
703 space. */
704 tmp = p1;
705 p1 = p2;
706 p2 = tmp;
707 }
708
709 pow5 += partsCount;
710 }
711
712 if (p1 != dst)
713 APInt::tcAssign(dst, p1, result);
714
715 return result;
716}
717
718/* Zero at the end to avoid modular arithmetic when adding one; used
719 when rounding up during hexadecimal output. */
720static const char hexDigitsLower[] = "0123456789abcdef0";
721static const char hexDigitsUpper[] = "0123456789ABCDEF0";
722static const char infinityL[] = "infinity";
723static const char infinityU[] = "INFINITY";
724static const char NaNL[] = "nan";
725static const char NaNU[] = "NAN";
726
727/* Write out an integerPart in hexadecimal, starting with the most
728 significant nibble. Write out exactly COUNT hexdigits, return
729 COUNT. */
730static unsigned int
731partAsHex (char *dst, APFloatBase::integerPart part, unsigned int count,
732 const char *hexDigitChars)
733{
734 unsigned int result = count;
735
737
738 part >>= (APFloatBase::integerPartWidth - 4 * count);
739 while (count--) {
740 dst[count] = hexDigitChars[part & 0xf];
741 part >>= 4;
742 }
743
744 return result;
745}
746
747/* Write out an unsigned decimal integer. */
748static char *writeUnsignedDecimal(char *dst, unsigned int n) {
749 char buff[40], *p;
750
751 p = buff;
752 do
753 *p++ = '0' + n % 10;
754 while (n /= 10);
755
756 do
757 *dst++ = *--p;
758 while (p != buff);
759
760 return dst;
761}
762
763/* Write out a signed decimal integer. */
764static char *writeSignedDecimal(char *dst, int value) {
765 if (value < 0) {
766 *dst++ = '-';
767 dst = writeUnsignedDecimal(dst, -(unsigned) value);
768 } else {
769 dst = writeUnsignedDecimal(dst, value);
770 }
771
772 return dst;
773}
774
775// Compute the ULP of the input using a definition from:
776// Jean-Michel Muller. On the definition of ulp(x). [Research Report] RR-5504,
777// LIP RR-2005-09, INRIA, LIP. 2005, pp.16. inria-00070503
778static APFloat harrisonUlp(const APFloat &X) {
779 const fltSemantics &Sem = X.getSemantics();
780 switch (X.getCategory()) {
781 case APFloat::fcNaN:
782 return APFloat::getQNaN(Sem);
784 return APFloat::getInf(Sem);
785 case APFloat::fcZero:
786 return APFloat::getSmallest(Sem);
788 break;
789 }
790 if (X.isDenormal() || X.isSmallestNormalized())
791 return APFloat::getSmallest(Sem);
792 int Exp = ilogb(X);
793 if (X.getExactLog2() != INT_MIN)
794 Exp -= 1;
795 return scalbn(APFloat::getOne(Sem), Exp - (Sem.precision - 1),
797}
798
799namespace detail {
800/* Constructors. */
801void IEEEFloat::initialize(const fltSemantics *ourSemantics) {
802 semantics = ourSemantics;
803 unsigned count = partCount();
804 if (count > 1)
805 significand.parts = new integerPart[count];
806}
807
808void IEEEFloat::freeSignificand() {
809 if (needsCleanup())
810 delete [] significand.parts;
811}
812
813void IEEEFloat::assign(const IEEEFloat &rhs) {
814 assert(semantics == rhs.semantics);
815
816 sign = rhs.sign;
817 category = rhs.category;
818 exponent = rhs.exponent;
819 if (isFiniteNonZero() || category == fcNaN)
820 copySignificand(rhs);
821}
822
823void IEEEFloat::copySignificand(const IEEEFloat &rhs) {
824 assert(isFiniteNonZero() || category == fcNaN);
825 assert(rhs.partCount() >= partCount());
826
827 APInt::tcAssign(significandParts(), rhs.significandParts(),
828 partCount());
829}
830
831/* Make this number a NaN, with an arbitrary but deterministic value
832 for the significand. If double or longer, this is a signalling NaN,
833 which may not be ideal. If float, this is QNaN(0). */
834void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) {
835 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
836 llvm_unreachable("This floating point format does not support NaN");
837
838 if (Negative && !semantics->hasSignedRepr)
840 "This floating point format does not support signed values");
841
842 category = fcNaN;
843 sign = Negative;
844 exponent = exponentNaN();
845
846 integerPart *significand = significandParts();
847 unsigned numParts = partCount();
848
849 APInt fill_storage;
850 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
851 // Finite-only types do not distinguish signalling and quiet NaN, so
852 // make them all signalling.
853 SNaN = false;
854 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
855 sign = true;
856 fill_storage = APInt::getZero(semantics->precision - 1);
857 } else {
858 fill_storage = APInt::getAllOnes(semantics->precision - 1);
859 }
860 fill = &fill_storage;
861 }
862
863 // Set the significand bits to the fill.
864 if (!fill || fill->getNumWords() < numParts)
865 APInt::tcSet(significand, 0, numParts);
866 if (fill) {
867 APInt::tcAssign(significand, fill->getRawData(),
868 std::min(fill->getNumWords(), numParts));
869
870 // Zero out the excess bits of the significand.
871 unsigned bitsToPreserve = semantics->precision - 1;
872 unsigned part = bitsToPreserve / 64;
873 bitsToPreserve %= 64;
874 significand[part] &= ((1ULL << bitsToPreserve) - 1);
875 for (part++; part != numParts; ++part)
876 significand[part] = 0;
877 }
878
879 unsigned QNaNBit =
880 (semantics->precision >= 2) ? (semantics->precision - 2) : 0;
881
882 if (SNaN) {
883 // We always have to clear the QNaN bit to make it an SNaN.
884 APInt::tcClearBit(significand, QNaNBit);
885
886 // If there are no bits set in the payload, we have to set
887 // *something* to make it a NaN instead of an infinity;
888 // conventionally, this is the next bit down from the QNaN bit.
889 if (APInt::tcIsZero(significand, numParts))
890 APInt::tcSetBit(significand, QNaNBit - 1);
891 } else if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
892 // The only NaN is a quiet NaN, and it has no bits sets in the significand.
893 // Do nothing.
894 } else {
895 // We always have to set the QNaN bit to make it a QNaN.
896 APInt::tcSetBit(significand, QNaNBit);
897 }
898
899 // For x87 extended precision, we want to make a NaN, not a
900 // pseudo-NaN. Maybe we should expose the ability to make
901 // pseudo-NaNs?
902 if (semantics == &APFloatBase::semX87DoubleExtended)
903 APInt::tcSetBit(significand, QNaNBit + 1);
904}
905
907 if (this != &rhs) {
908 if (semantics != rhs.semantics) {
909 freeSignificand();
910 initialize(rhs.semantics);
911 }
912 assign(rhs);
913 }
914
915 return *this;
916}
917
919 freeSignificand();
920
921 semantics = rhs.semantics;
922 significand = rhs.significand;
923 exponent = rhs.exponent;
924 category = rhs.category;
925 sign = rhs.sign;
926
927 rhs.semantics = &APFloatBase::semBogus;
928 return *this;
929}
930
932 return isFiniteNonZero() && (exponent == semantics->minExponent) &&
933 (APInt::tcExtractBit(significandParts(),
934 semantics->precision - 1) == 0);
935}
936
938 // The smallest number by magnitude in our format will be the smallest
939 // denormal, i.e. the floating point number with exponent being minimum
940 // exponent and significand bitwise equal to 1 (i.e. with MSB equal to 0).
941 return isFiniteNonZero() && exponent == semantics->minExponent &&
942 significandMSB() == 0;
943}
944
946 return getCategory() == fcNormal && exponent == semantics->minExponent &&
947 isSignificandAllZerosExceptMSB();
948}
949
950unsigned int IEEEFloat::getNumHighBits() const {
951 const unsigned int PartCount = partCountForBits(semantics->precision);
952 const unsigned int Bits = PartCount * integerPartWidth;
953
954 // Compute how many bits are used in the final word.
955 // When precision is just 1, it represents the 'Pth'
956 // Precision bit and not the actual significand bit.
957 const unsigned int NumHighBits = (semantics->precision > 1)
958 ? (Bits - semantics->precision + 1)
959 : (Bits - semantics->precision);
960 return NumHighBits;
961}
962
963bool IEEEFloat::isSignificandAllOnes() const {
964 // Test if the significand excluding the integral bit is all ones. This allows
965 // us to test for binade boundaries.
966 const integerPart *Parts = significandParts();
967 const unsigned PartCount = partCountForBits(semantics->precision);
968 for (unsigned i = 0; i < PartCount - 1; i++)
969 if (~Parts[i])
970 return false;
971
972 // Set the unused high bits to all ones when we compare.
973 const unsigned NumHighBits = getNumHighBits();
974 assert(NumHighBits <= integerPartWidth && NumHighBits > 0 &&
975 "Can not have more high bits to fill than integerPartWidth");
976 const integerPart HighBitFill =
977 ~integerPart(0) << (integerPartWidth - NumHighBits);
978 if ((semantics->precision <= 1) || (~(Parts[PartCount - 1] | HighBitFill)))
979 return false;
980
981 return true;
982}
983
984bool IEEEFloat::isSignificandAllOnesExceptLSB() const {
985 // Test if the significand excluding the integral bit is all ones except for
986 // the least significant bit.
987 const integerPart *Parts = significandParts();
988
989 if (Parts[0] & 1)
990 return false;
991
992 const unsigned PartCount = partCountForBits(semantics->precision);
993 for (unsigned i = 0; i < PartCount - 1; i++) {
994 if (~Parts[i] & ~unsigned{!i})
995 return false;
996 }
997
998 // Set the unused high bits to all ones when we compare.
999 const unsigned NumHighBits = getNumHighBits();
1000 assert(NumHighBits <= integerPartWidth && NumHighBits > 0 &&
1001 "Can not have more high bits to fill than integerPartWidth");
1002 const integerPart HighBitFill = ~integerPart(0)
1003 << (integerPartWidth - NumHighBits);
1004 if (~(Parts[PartCount - 1] | HighBitFill | 0x1))
1005 return false;
1006
1007 return true;
1008}
1009
1010bool IEEEFloat::isSignificandAllZeros() const {
1011 // Test if the significand excluding the integral bit is all zeros. This
1012 // allows us to test for binade boundaries.
1013 const integerPart *Parts = significandParts();
1014 const unsigned PartCount = partCountForBits(semantics->precision);
1015
1016 for (unsigned i = 0; i < PartCount - 1; i++)
1017 if (Parts[i])
1018 return false;
1019
1020 // Compute how many bits are used in the final word.
1021 const unsigned NumHighBits = getNumHighBits();
1022 assert(NumHighBits < integerPartWidth && "Can not have more high bits to "
1023 "clear than integerPartWidth");
1024 const integerPart HighBitMask = ~integerPart(0) >> NumHighBits;
1025
1026 if ((semantics->precision > 1) && (Parts[PartCount - 1] & HighBitMask))
1027 return false;
1028
1029 return true;
1030}
1031
1032bool IEEEFloat::isSignificandAllZerosExceptMSB() const {
1033 const integerPart *Parts = significandParts();
1034 const unsigned PartCount = partCountForBits(semantics->precision);
1035
1036 for (unsigned i = 0; i < PartCount - 1; i++) {
1037 if (Parts[i])
1038 return false;
1039 }
1040
1041 const unsigned NumHighBits = getNumHighBits();
1042 const integerPart MSBMask = integerPart(1)
1043 << (integerPartWidth - NumHighBits);
1044 return ((semantics->precision <= 1) || (Parts[PartCount - 1] == MSBMask));
1045}
1046
1048 bool IsMaxExp = isFiniteNonZero() && exponent == semantics->maxExponent;
1049 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1050 semantics->nanEncoding == fltNanEncoding::AllOnes) {
1051 // The largest number by magnitude in our format will be the floating point
1052 // number with maximum exponent and with significand that is all ones except
1053 // the LSB.
1054 return (IsMaxExp && APFloat::hasSignificand(*semantics))
1055 ? isSignificandAllOnesExceptLSB()
1056 : IsMaxExp;
1057 } else {
1058 // The largest number by magnitude in our format will be the floating point
1059 // number with maximum exponent and with significand that is all ones.
1060 return IsMaxExp && isSignificandAllOnes();
1061 }
1062}
1063
1065 // This could be made more efficient; I'm going for obviously correct.
1066 if (!isFinite()) return false;
1067 IEEEFloat truncated = *this;
1068 truncated.roundToIntegral(rmTowardZero);
1069 return compare(truncated) == cmpEqual;
1070}
1071
1072bool IEEEFloat::bitwiseIsEqual(const IEEEFloat &rhs) const {
1073 if (this == &rhs)
1074 return true;
1075 if (semantics != rhs.semantics ||
1076 category != rhs.category ||
1077 sign != rhs.sign)
1078 return false;
1079 if (category==fcZero || category==fcInfinity)
1080 return true;
1081
1082 if (isFiniteNonZero() && exponent != rhs.exponent)
1083 return false;
1084
1085 return std::equal(significandParts(), significandParts() + partCount(),
1086 rhs.significandParts());
1087}
1088
1090 initialize(&ourSemantics);
1091 sign = 0;
1092 category = fcNormal;
1093 zeroSignificand();
1094 exponent = ourSemantics.precision - 1;
1095 significandParts()[0] = value;
1097}
1098
1100 initialize(&ourSemantics);
1101 // The Float8E8MOFNU format does not have a representation
1102 // for zero. So, use the closest representation instead.
1103 // Moreover, the all-zero encoding represents a valid
1104 // normal value (which is the smallestNormalized here).
1105 // Hence, we call makeSmallestNormalized (where category is
1106 // 'fcNormal') instead of makeZero (where category is 'fcZero').
1107 ourSemantics.hasZero ? makeZero(false) : makeSmallestNormalized(false);
1108}
1109
1110// Delegate to the previous constructor, because later copy constructor may
1111// actually inspects category, which can't be garbage.
1113 : IEEEFloat(ourSemantics) {}
1114
1116 initialize(rhs.semantics);
1117 assign(rhs);
1118}
1119
1120IEEEFloat::IEEEFloat(IEEEFloat &&rhs) : semantics(&APFloatBase::semBogus) {
1121 *this = std::move(rhs);
1122}
1123
1124IEEEFloat::~IEEEFloat() { freeSignificand(); }
1125
1126unsigned int IEEEFloat::partCount() const {
1127 return partCountForBits(semantics->precision + 1);
1128}
1129
1130const APFloat::integerPart *IEEEFloat::significandParts() const {
1131 return const_cast<IEEEFloat *>(this)->significandParts();
1132}
1133
1134APFloat::integerPart *IEEEFloat::significandParts() {
1135 if (partCount() > 1)
1136 return significand.parts;
1137 else
1138 return &significand.part;
1139}
1140
1141void IEEEFloat::zeroSignificand() {
1142 APInt::tcSet(significandParts(), 0, partCount());
1143}
1144
1145/* Increment an fcNormal floating point number's significand. */
1146void IEEEFloat::incrementSignificand() {
1147 [[maybe_unused]] integerPart carry =
1148 APInt::tcIncrement(significandParts(), partCount());
1149
1150 /* Our callers should never cause us to overflow. */
1151 assert(carry == 0);
1152}
1153
1154/* Add the significand of the RHS. Returns the carry flag. */
1155APFloat::integerPart IEEEFloat::addSignificand(const IEEEFloat &rhs) {
1156 integerPart *parts = significandParts();
1157
1158 assert(semantics == rhs.semantics);
1159 assert(exponent == rhs.exponent);
1160
1161 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
1162}
1163
1164/* Subtract the significand of the RHS with a borrow flag. Returns
1165 the borrow flag. */
1166APFloat::integerPart IEEEFloat::subtractSignificand(const IEEEFloat &rhs,
1167 integerPart borrow) {
1168 integerPart *parts = significandParts();
1169
1170 assert(semantics == rhs.semantics);
1171 assert(exponent == rhs.exponent);
1172
1173 return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
1174 partCount());
1175}
1176
1177/* Multiply the significand of the RHS. If ADDEND is non-NULL, add it
1178 on to the full-precision result of the multiplication. Returns the
1179 lost fraction. */
1180lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs,
1181 IEEEFloat addend,
1182 bool ignoreAddend) {
1183 integerPart scratch[4];
1184 bool ignored;
1185
1186 assert(semantics == rhs.semantics);
1187
1188 unsigned precision = semantics->precision;
1189
1190 // Allocate space for twice as many bits as the original significand, plus one
1191 // extra bit for the addition to overflow into.
1192 unsigned newPartsCount = partCountForBits(precision * 2 + 1);
1193
1194 // FIXME: Replace with SmallVector<4>.
1195 integerPart *fullSignificand =
1196 newPartsCount > 4 ? new integerPart[newPartsCount] : scratch;
1197
1198 integerPart *lhsSignificand = significandParts();
1199 unsigned partsCount = partCount();
1200
1201 APInt::tcFullMultiply(fullSignificand, lhsSignificand,
1202 rhs.significandParts(), partsCount, partsCount);
1203
1204 lostFraction lost_fraction = lfExactlyZero;
1205 // One, not zero, based MSB.
1206 unsigned omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
1207 exponent += rhs.exponent;
1208
1209 // Assume the operands involved in the multiplication are single-precision
1210 // FP, and the two multiplicants are:
1211 // *this = a23 . a22 ... a0 * 2^e1
1212 // rhs = b23 . b22 ... b0 * 2^e2
1213 // the result of multiplication is:
1214 // *this = c48 c47 c46 . c45 ... c0 * 2^(e1+e2)
1215 // Note that there are three significant bits at the left-hand side of the
1216 // radix point: two for the multiplication, and an overflow bit for the
1217 // addition (that will always be zero at this point). Move the radix point
1218 // toward left by two bits, and adjust exponent accordingly.
1219 exponent += 2;
1220
1221 if (!ignoreAddend && addend.isNonZero()) {
1222 // The intermediate result of the multiplication has "2 * precision"
1223 // signicant bit; adjust the addend to be consistent with mul result.
1224 //
1225 Significand savedSignificand = significand;
1226 const fltSemantics *savedSemantics = semantics;
1227
1228 // Normalize our MSB to one below the top bit to allow for overflow.
1229 unsigned extendedPrecision = 2 * precision + 1;
1230 if (omsb != extendedPrecision - 1) {
1231 assert(extendedPrecision > omsb);
1232 APInt::tcShiftLeft(fullSignificand, newPartsCount,
1233 (extendedPrecision - 1) - omsb);
1234 exponent -= (extendedPrecision - 1) - omsb;
1235 }
1236
1237 /* Create new semantics. */
1238 fltSemantics extendedSemantics = *semantics;
1239 extendedSemantics.precision = extendedPrecision;
1240
1241 if (newPartsCount == 1)
1242 significand.part = fullSignificand[0];
1243 else
1244 significand.parts = fullSignificand;
1245 semantics = &extendedSemantics;
1246
1247 // Make a copy so we can convert it to the extended semantics.
1248 // Note that we cannot convert the addend directly, as the extendedSemantics
1249 // is a local variable (which we take a reference to).
1250 IEEEFloat extendedAddend(addend);
1251 [[maybe_unused]] opStatus status = extendedAddend.convert(
1252 extendedSemantics, APFloat::rmTowardZero, &ignored);
1253 assert(status == APFloat::opOK);
1254
1255 // Shift the significand of the addend right by one bit. This guarantees
1256 // that the high bit of the significand is zero (same as fullSignificand),
1257 // so the addition will overflow (if it does overflow at all) into the top bit.
1258 lost_fraction = extendedAddend.shiftSignificandRight(1);
1259 assert(lost_fraction == lfExactlyZero &&
1260 "Lost precision while shifting addend for fused-multiply-add.");
1261
1262 lost_fraction = addOrSubtractSignificand(extendedAddend, false);
1263
1264 /* Restore our state. */
1265 if (newPartsCount == 1)
1266 fullSignificand[0] = significand.part;
1267 significand = savedSignificand;
1268 semantics = savedSemantics;
1269
1270 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
1271 }
1272
1273 // Convert the result having "2 * precision" significant-bits back to the one
1274 // having "precision" significant-bits. First, move the radix point from
1275 // poision "2*precision - 1" to "precision - 1". The exponent need to be
1276 // adjusted by "2*precision - 1" - "precision - 1" = "precision".
1277 exponent -= precision + 1;
1278
1279 // In case MSB resides at the left-hand side of radix point, shift the
1280 // mantissa right by some amount to make sure the MSB reside right before
1281 // the radix point (i.e. "MSB . rest-significant-bits").
1282 //
1283 // Note that the result is not normalized when "omsb < precision". So, the
1284 // caller needs to call IEEEFloat::normalize() if normalized value is
1285 // expected.
1286 if (omsb > precision) {
1287 unsigned int bits, significantParts;
1288 lostFraction lf;
1289
1290 bits = omsb - precision;
1291 significantParts = partCountForBits(omsb);
1292 lf = shiftRight(fullSignificand, significantParts, bits);
1293 lost_fraction = combineLostFractions(lf, lost_fraction);
1294 exponent += bits;
1295 }
1296
1297 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
1298
1299 if (newPartsCount > 4)
1300 delete [] fullSignificand;
1301
1302 return lost_fraction;
1303}
1304
1305lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs) {
1306 // When the given semantics has zero, the addend here is a zero.
1307 // i.e . it belongs to the 'fcZero' category.
1308 // But when the semantics does not support zero, we need to
1309 // explicitly convey that this addend should be ignored
1310 // for multiplication.
1311 return multiplySignificand(rhs, IEEEFloat(*semantics), !semantics->hasZero);
1312}
1313
1314/* Multiply the significands of LHS and RHS to DST. */
1315lostFraction IEEEFloat::divideSignificand(const IEEEFloat &rhs) {
1316 integerPart scratch[4];
1317
1318 assert(semantics == rhs.semantics);
1319
1320 integerPart *lhsSignificand = significandParts();
1321 const integerPart *rhsSignificand = rhs.significandParts();
1322 unsigned partsCount = partCount();
1323
1324 integerPart *dividend =
1325 partsCount > 2 ? new integerPart[partsCount * 2] : scratch;
1326 integerPart *divisor = dividend + partsCount;
1327
1328 /* Copy the dividend and divisor as they will be modified in-place. */
1329 for (unsigned i = 0; i < partsCount; i++) {
1330 dividend[i] = lhsSignificand[i];
1331 divisor[i] = rhsSignificand[i];
1332 lhsSignificand[i] = 0;
1333 }
1334
1335 exponent -= rhs.exponent;
1336
1337 unsigned int precision = semantics->precision;
1338
1339 /* Normalize the divisor. */
1340 unsigned bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
1341 if (bit) {
1342 exponent += bit;
1343 APInt::tcShiftLeft(divisor, partsCount, bit);
1344 }
1345
1346 /* Normalize the dividend. */
1347 bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
1348 if (bit) {
1349 exponent -= bit;
1350 APInt::tcShiftLeft(dividend, partsCount, bit);
1351 }
1352
1353 /* Ensure the dividend >= divisor initially for the loop below.
1354 Incidentally, this means that the division loop below is
1355 guaranteed to set the integer bit to one. */
1356 if (APInt::tcCompare(dividend, divisor, partsCount) < 0) {
1357 exponent--;
1358 APInt::tcShiftLeft(dividend, partsCount, 1);
1359 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
1360 }
1361
1362 /* Long division. */
1363 for (bit = precision; bit; bit -= 1) {
1364 if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
1365 APInt::tcSubtract(dividend, divisor, 0, partsCount);
1366 APInt::tcSetBit(lhsSignificand, bit - 1);
1367 }
1368
1369 APInt::tcShiftLeft(dividend, partsCount, 1);
1370 }
1371
1372 /* Figure out the lost fraction. */
1373 int cmp = APInt::tcCompare(dividend, divisor, partsCount);
1374
1375 lostFraction lost_fraction;
1376 if (cmp > 0)
1377 lost_fraction = lfMoreThanHalf;
1378 else if (cmp == 0)
1379 lost_fraction = lfExactlyHalf;
1380 else if (APInt::tcIsZero(dividend, partsCount))
1381 lost_fraction = lfExactlyZero;
1382 else
1383 lost_fraction = lfLessThanHalf;
1384
1385 if (partsCount > 2)
1386 delete [] dividend;
1387
1388 return lost_fraction;
1389}
1390
1391unsigned int IEEEFloat::significandMSB() const {
1392 return APInt::tcMSB(significandParts(), partCount());
1393}
1394
1395unsigned int IEEEFloat::significandLSB() const {
1396 return APInt::tcLSB(significandParts(), partCount());
1397}
1398
1399/* Note that a zero result is NOT normalized to fcZero. */
1400lostFraction IEEEFloat::shiftSignificandRight(unsigned int bits) {
1401 /* Our exponent should not overflow. */
1402 assert((ExponentType) (exponent + bits) >= exponent);
1403
1404 exponent += bits;
1405
1406 return shiftRight(significandParts(), partCount(), bits);
1407}
1408
1409/* Shift the significand left BITS bits, subtract BITS from its exponent. */
1410void IEEEFloat::shiftSignificandLeft(unsigned int bits) {
1411 assert(bits < semantics->precision ||
1412 (semantics->precision == 1 && bits <= 1));
1413
1414 if (bits) {
1415 unsigned int partsCount = partCount();
1416
1417 APInt::tcShiftLeft(significandParts(), partsCount, bits);
1418 exponent -= bits;
1419
1420 assert(!APInt::tcIsZero(significandParts(), partsCount));
1421 }
1422}
1423
1425 assert(semantics == rhs.semantics);
1427 assert(rhs.isFiniteNonZero());
1428
1429 int compare = exponent - rhs.exponent;
1430
1431 /* If exponents are equal, do an unsigned bignum comparison of the
1432 significands. */
1433 if (compare == 0)
1434 compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
1435 partCount());
1436
1437 if (compare > 0)
1438 return cmpGreaterThan;
1439 else if (compare < 0)
1440 return cmpLessThan;
1441 else
1442 return cmpEqual;
1443}
1444
1445/* Set the least significant BITS bits of a bignum, clear the
1446 rest. */
1447static void tcSetLeastSignificantBits(APInt::WordType *dst, unsigned parts,
1448 unsigned bits) {
1449 unsigned i = 0;
1450 while (bits > APInt::APINT_BITS_PER_WORD) {
1451 dst[i++] = ~(APInt::WordType)0;
1453 }
1454
1455 if (bits)
1456 dst[i++] = ~(APInt::WordType)0 >> (APInt::APINT_BITS_PER_WORD - bits);
1457
1458 while (i < parts)
1459 dst[i++] = 0;
1460}
1461
1462/* Handle overflow. Sign is preserved. We either become infinity or
1463 the largest finite number. */
1464APFloat::opStatus IEEEFloat::handleOverflow(roundingMode rounding_mode) {
1466 /* Infinity? */
1467 if (rounding_mode == rmNearestTiesToEven ||
1468 rounding_mode == rmNearestTiesToAway ||
1469 (rounding_mode == rmTowardPositive && !sign) ||
1470 (rounding_mode == rmTowardNegative && sign)) {
1472 makeNaN(false, sign);
1473 else
1474 category = fcInfinity;
1475 return static_cast<opStatus>(opOverflow | opInexact);
1476 }
1477 }
1478
1479 /* Otherwise we become the largest finite number. */
1480 category = fcNormal;
1481 exponent = semantics->maxExponent;
1482 tcSetLeastSignificantBits(significandParts(), partCount(),
1483 semantics->precision);
1484 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1485 semantics->nanEncoding == fltNanEncoding::AllOnes)
1486 APInt::tcClearBit(significandParts(), 0);
1487
1488 return opInexact;
1489}
1490
1491/* Returns TRUE if, when truncating the current number, with BIT the
1492 new LSB, with the given lost fraction and rounding mode, the result
1493 would need to be rounded away from zero (i.e., by increasing the
1494 signficand). This routine must work for fcZero of both signs, and
1495 fcNormal numbers. */
1496bool IEEEFloat::roundAwayFromZero(roundingMode rounding_mode,
1497 lostFraction lost_fraction,
1498 unsigned int bit) const {
1499 /* NaNs and infinities should not have lost fractions. */
1500 assert(isFiniteNonZero() || category == fcZero);
1501
1502 /* Current callers never pass this so we don't handle it. */
1503 assert(lost_fraction != lfExactlyZero);
1504
1505 switch (rounding_mode) {
1507 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
1508
1510 if (lost_fraction == lfMoreThanHalf)
1511 return true;
1512
1513 /* Our zeroes don't have a significand to test. */
1514 if (lost_fraction == lfExactlyHalf && category != fcZero)
1515 return APInt::tcExtractBit(significandParts(), bit);
1516
1517 return false;
1518
1519 case rmTowardZero:
1520 return false;
1521
1522 case rmTowardPositive:
1523 return !sign;
1524
1525 case rmTowardNegative:
1526 return sign;
1527
1528 default:
1529 break;
1530 }
1531 llvm_unreachable("Invalid rounding mode found");
1532}
1533
1534APFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode,
1535 lostFraction lost_fraction) {
1536 if (!isFiniteNonZero())
1537 return opOK;
1538
1539 /* Before rounding normalize the exponent of fcNormal numbers. */
1540 /* One, not zero, based MSB. */
1541 unsigned omsb = significandMSB() + 1;
1542
1543 // Only skip this `if` if the value is exactly zero.
1544 if (omsb || lost_fraction != lfExactlyZero) {
1545 /* OMSB is numbered from 1. We want to place it in the integer
1546 bit numbered PRECISION if possible, with a compensating change in
1547 the exponent. */
1548 int exponentChange = omsb - semantics->precision;
1549
1550 /* If the resulting exponent is too high, overflow according to
1551 the rounding mode. */
1552 if (exponent + exponentChange > semantics->maxExponent)
1553 return handleOverflow(rounding_mode);
1554
1555 /* Subnormal numbers have exponent minExponent, and their MSB
1556 is forced based on that. */
1557 if (exponent + exponentChange < semantics->minExponent)
1558 exponentChange = semantics->minExponent - exponent;
1559
1560 /* Shifting left is easy as we don't lose precision. */
1561 if (exponentChange < 0) {
1562 assert(lost_fraction == lfExactlyZero);
1563
1564 shiftSignificandLeft(-exponentChange);
1565
1566 return opOK;
1567 }
1568
1569 if (exponentChange > 0) {
1570 lostFraction lf;
1571
1572 /* Shift right and capture any new lost fraction. */
1573 lf = shiftSignificandRight(exponentChange);
1574
1575 lost_fraction = combineLostFractions(lf, lost_fraction);
1576
1577 /* Keep OMSB up-to-date. */
1578 if (omsb > (unsigned) exponentChange)
1579 omsb -= exponentChange;
1580 else
1581 omsb = 0;
1582 }
1583 }
1584
1585 // The all-ones values is an overflow if NaN is all ones. If NaN is
1586 // represented by negative zero, then it is a valid finite value.
1587 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1588 semantics->nanEncoding == fltNanEncoding::AllOnes &&
1589 exponent == semantics->maxExponent && isSignificandAllOnes())
1590 return handleOverflow(rounding_mode);
1591
1592 /* Now round the number according to rounding_mode given the lost
1593 fraction. */
1594
1595 /* As specified in IEEE 754, since we do not trap we do not report
1596 underflow for exact results. */
1597 if (lost_fraction == lfExactlyZero) {
1598 /* Canonicalize zeroes. */
1599 if (omsb == 0) {
1600 category = fcZero;
1601 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
1602 sign = false;
1603 if (!semantics->hasZero)
1605 }
1606
1607 return opOK;
1608 }
1609
1610 /* Increment the significand if we're rounding away from zero. */
1611 if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
1612 if (omsb == 0)
1613 exponent = semantics->minExponent;
1614
1615 incrementSignificand();
1616 omsb = significandMSB() + 1;
1617
1618 /* Did the significand increment overflow? */
1619 if (omsb == (unsigned) semantics->precision + 1) {
1620 /* Renormalize by incrementing the exponent and shifting our
1621 significand right one. However if we already have the
1622 maximum exponent we overflow to infinity. */
1623 if (exponent == semantics->maxExponent)
1624 // Invoke overflow handling with a rounding mode that will guarantee
1625 // that the result gets turned into the correct infinity representation.
1626 // This is needed instead of just setting the category to infinity to
1627 // account for 8-bit floating point types that have no inf, only NaN.
1628 return handleOverflow(sign ? rmTowardNegative : rmTowardPositive);
1629
1630 shiftSignificandRight(1);
1631
1632 return opInexact;
1633 }
1634
1635 // The all-ones values is an overflow if NaN is all ones. If NaN is
1636 // represented by negative zero, then it is a valid finite value.
1637 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1638 semantics->nanEncoding == fltNanEncoding::AllOnes &&
1639 exponent == semantics->maxExponent && isSignificandAllOnes())
1640 return handleOverflow(rounding_mode);
1641 }
1642
1643 /* The normal case - we were and are not denormal, and any
1644 significand increment above didn't overflow. */
1645 if (omsb == semantics->precision)
1646 return opInexact;
1647
1648 /* We have a non-zero denormal. */
1649 assert(omsb < semantics->precision);
1650
1651 /* Canonicalize zeroes. */
1652 if (omsb == 0) {
1653 category = fcZero;
1654 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
1655 sign = false;
1656 // This condition handles the case where the semantics
1657 // does not have zero but uses the all-zero encoding
1658 // to represent the smallest normal value.
1659 if (!semantics->hasZero)
1661 }
1662
1663 /* The fcZero case is a denormal that underflowed to zero. */
1664 return (opStatus) (opUnderflow | opInexact);
1665}
1666
1667APFloat::opStatus IEEEFloat::addOrSubtractSpecials(const IEEEFloat &rhs,
1668 bool subtract) {
1669 switch (PackCategoriesIntoKey(category, rhs.category)) {
1670 default:
1671 llvm_unreachable(nullptr);
1672
1676 assign(rhs);
1677 [[fallthrough]];
1682 if (isSignaling()) {
1683 makeQuiet();
1684 return opInvalidOp;
1685 }
1686 return rhs.isSignaling() ? opInvalidOp : opOK;
1687
1691 return opOK;
1692
1695 category = fcInfinity;
1696 sign = rhs.sign ^ subtract;
1697 return opOK;
1698
1700 assign(rhs);
1701 sign = rhs.sign ^ subtract;
1702 return opOK;
1703
1705 /* Sign depends on rounding mode; handled by caller. */
1706 return opOK;
1707
1709 /* Differently signed infinities can only be validly
1710 subtracted. */
1711 if (((sign ^ rhs.sign)!=0) != subtract) {
1712 makeNaN();
1713 return opInvalidOp;
1714 }
1715
1716 return opOK;
1717
1719 return opDivByZero;
1720 }
1721}
1722
1723/* Add or subtract two normal numbers. */
1724lostFraction IEEEFloat::addOrSubtractSignificand(const IEEEFloat &rhs,
1725 bool subtract) {
1726 [[maybe_unused]] integerPart carry = 0;
1727 lostFraction lost_fraction;
1728
1729 /* Determine if the operation on the absolute values is effectively
1730 an addition or subtraction. */
1731 subtract ^= static_cast<bool>(sign ^ rhs.sign);
1732
1733 /* Are we bigger exponent-wise than the RHS? */
1734 int bits = exponent - rhs.exponent;
1735
1736 /* Subtraction is more subtle than one might naively expect. */
1737 if (subtract) {
1738 if ((bits < 0) && !semantics->hasSignedRepr)
1740 "This floating point format does not support signed values");
1741
1742 IEEEFloat temp_rhs(rhs);
1743 bool lost_fraction_is_from_rhs = false;
1744
1745 if (bits == 0)
1746 lost_fraction = lfExactlyZero;
1747 else if (bits > 0) {
1748 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
1749 lost_fraction_is_from_rhs = true;
1750 shiftSignificandLeft(1);
1751 } else {
1752 lost_fraction = shiftSignificandRight(-bits - 1);
1753 temp_rhs.shiftSignificandLeft(1);
1754 }
1755
1756 // Should we reverse the subtraction.
1757 cmpResult cmp_result = compareAbsoluteValue(temp_rhs);
1758 if (cmp_result == cmpLessThan) {
1759 bool borrow =
1760 lost_fraction != lfExactlyZero && !lost_fraction_is_from_rhs;
1761 if (borrow) {
1762 // The lost fraction is being subtracted, borrow from the significand
1763 // and invert `lost_fraction`.
1764 if (lost_fraction == lfLessThanHalf)
1765 lost_fraction = lfMoreThanHalf;
1766 else if (lost_fraction == lfMoreThanHalf)
1767 lost_fraction = lfLessThanHalf;
1768 }
1769 carry = temp_rhs.subtractSignificand(*this, borrow);
1770 copySignificand(temp_rhs);
1771 sign = !sign;
1772 } else if (cmp_result == cmpGreaterThan) {
1773 bool borrow = lost_fraction != lfExactlyZero && lost_fraction_is_from_rhs;
1774 if (borrow) {
1775 // The lost fraction is being subtracted, borrow from the significand
1776 // and invert `lost_fraction`.
1777 if (lost_fraction == lfLessThanHalf)
1778 lost_fraction = lfMoreThanHalf;
1779 else if (lost_fraction == lfMoreThanHalf)
1780 lost_fraction = lfLessThanHalf;
1781 }
1782 carry = subtractSignificand(temp_rhs, borrow);
1783 } else { // cmpEqual
1784 zeroSignificand();
1785 if (lost_fraction != lfExactlyZero && lost_fraction_is_from_rhs) {
1786 // rhs is slightly larger due to the lost fraction, flip the sign.
1787 sign = !sign;
1788 }
1789 }
1790
1791 /* The code above is intended to ensure that no borrow is
1792 necessary. */
1793 assert(!carry);
1794 } else {
1795 if (bits > 0) {
1796 IEEEFloat temp_rhs(rhs);
1797
1798 lost_fraction = temp_rhs.shiftSignificandRight(bits);
1799 carry = addSignificand(temp_rhs);
1800 } else {
1801 lost_fraction = shiftSignificandRight(-bits);
1802 carry = addSignificand(rhs);
1803 }
1804
1805 /* We have a guard bit; generating a carry cannot happen. */
1806 assert(!carry);
1807 }
1808
1809 return lost_fraction;
1810}
1811
1812APFloat::opStatus IEEEFloat::multiplySpecials(const IEEEFloat &rhs) {
1813 switch (PackCategoriesIntoKey(category, rhs.category)) {
1814 default:
1815 llvm_unreachable(nullptr);
1816
1820 assign(rhs);
1821 sign = false;
1822 [[fallthrough]];
1827 sign ^= rhs.sign; // restore the original sign
1828 if (isSignaling()) {
1829 makeQuiet();
1830 return opInvalidOp;
1831 }
1832 return rhs.isSignaling() ? opInvalidOp : opOK;
1833
1837 category = fcInfinity;
1838 return opOK;
1839
1843 category = fcZero;
1844 return opOK;
1845
1848 makeNaN();
1849 return opInvalidOp;
1850
1852 return opOK;
1853 }
1854}
1855
1856APFloat::opStatus IEEEFloat::divideSpecials(const IEEEFloat &rhs) {
1857 switch (PackCategoriesIntoKey(category, rhs.category)) {
1858 default:
1859 llvm_unreachable(nullptr);
1860
1864 assign(rhs);
1865 sign = false;
1866 [[fallthrough]];
1871 sign ^= rhs.sign; // restore the original sign
1872 if (isSignaling()) {
1873 makeQuiet();
1874 return opInvalidOp;
1875 }
1876 return rhs.isSignaling() ? opInvalidOp : opOK;
1877
1882 return opOK;
1883
1885 category = fcZero;
1886 return opOK;
1887
1889 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly)
1890 makeNaN(false, sign);
1891 else
1892 category = fcInfinity;
1893 return opDivByZero;
1894
1897 makeNaN();
1898 return opInvalidOp;
1899
1901 return opOK;
1902 }
1903}
1904
1905APFloat::opStatus IEEEFloat::modSpecials(const IEEEFloat &rhs) {
1906 switch (PackCategoriesIntoKey(category, rhs.category)) {
1907 default:
1908 llvm_unreachable(nullptr);
1909
1913 assign(rhs);
1914 [[fallthrough]];
1919 if (isSignaling()) {
1920 makeQuiet();
1921 return opInvalidOp;
1922 }
1923 return rhs.isSignaling() ? opInvalidOp : opOK;
1924
1928 return opOK;
1929
1935 makeNaN();
1936 return opInvalidOp;
1937
1939 return opOK;
1940 }
1941}
1942
1943APFloat::opStatus IEEEFloat::remainderSpecials(const IEEEFloat &rhs) {
1944 switch (PackCategoriesIntoKey(category, rhs.category)) {
1945 default:
1946 llvm_unreachable(nullptr);
1947
1951 assign(rhs);
1952 [[fallthrough]];
1957 if (isSignaling()) {
1958 makeQuiet();
1959 return opInvalidOp;
1960 }
1961 return rhs.isSignaling() ? opInvalidOp : opOK;
1962
1966 return opOK;
1967
1973 makeNaN();
1974 return opInvalidOp;
1975
1977 return opDivByZero; // fake status, indicating this is not a special case
1978 }
1979}
1980
1981/* Change sign. */
1983 // With NaN-as-negative-zero, neither NaN or negative zero can change
1984 // their signs.
1985 if (semantics->nanEncoding == fltNanEncoding::NegativeZero &&
1986 (isZero() || isNaN()))
1987 return;
1988 /* Look mummy, this one's easy. */
1989 sign = !sign;
1990}
1991
1992/* Normalized addition or subtraction. */
1993APFloat::opStatus IEEEFloat::addOrSubtract(const IEEEFloat &rhs,
1994 roundingMode rounding_mode,
1995 bool subtract) {
1996 opStatus fs = addOrSubtractSpecials(rhs, subtract);
1997
1998 /* This return code means it was not a simple case. */
1999 if (fs == opDivByZero) {
2000 lostFraction lost_fraction;
2001
2002 lost_fraction = addOrSubtractSignificand(rhs, subtract);
2003 fs = normalize(rounding_mode, lost_fraction);
2004
2005 /* Can only be zero if we lost no fraction. */
2006 assert(category != fcZero || lost_fraction == lfExactlyZero);
2007 }
2008
2009 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
2010 positive zero unless rounding to minus infinity, except that
2011 adding two like-signed zeroes gives that zero. */
2012 if (category == fcZero) {
2013 if (rhs.category != fcZero || (sign == rhs.sign) == subtract)
2014 sign = (rounding_mode == rmTowardNegative);
2015 // NaN-in-negative-zero means zeros need to be normalized to +0.
2016 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2017 sign = false;
2018 }
2019
2020 return fs;
2021}
2022
2023/* Normalized addition. */
2025 roundingMode rounding_mode) {
2026 return addOrSubtract(rhs, rounding_mode, false);
2027}
2028
2029/* Normalized subtraction. */
2031 roundingMode rounding_mode) {
2032 return addOrSubtract(rhs, rounding_mode, true);
2033}
2034
2035/* Normalized multiply. */
2037 roundingMode rounding_mode) {
2038 sign ^= rhs.sign;
2039 opStatus fs = multiplySpecials(rhs);
2040
2041 if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero)
2042 sign = false;
2043 if (isFiniteNonZero()) {
2044 lostFraction lost_fraction = multiplySignificand(rhs);
2045 fs = normalize(rounding_mode, lost_fraction);
2046 if (lost_fraction != lfExactlyZero)
2047 fs = (opStatus) (fs | opInexact);
2048 }
2049
2050 return fs;
2051}
2052
2053/* Normalized divide. */
2055 roundingMode rounding_mode) {
2056 sign ^= rhs.sign;
2057 opStatus fs = divideSpecials(rhs);
2058
2059 if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero)
2060 sign = false;
2061 if (isFiniteNonZero()) {
2062 lostFraction lost_fraction = divideSignificand(rhs);
2063 fs = normalize(rounding_mode, lost_fraction);
2064 if (lost_fraction != lfExactlyZero)
2065 fs = (opStatus) (fs | opInexact);
2066 }
2067
2068 return fs;
2069}
2070
2071/* Normalized remainder. */
2073 unsigned int origSign = sign;
2074
2075 // First handle the special cases.
2076 opStatus fs = remainderSpecials(rhs);
2077 if (fs != opDivByZero)
2078 return fs;
2079
2080 fs = opOK;
2081
2082 // Make sure the current value is less than twice the denom. If the addition
2083 // did not succeed (an overflow has happened), which means that the finite
2084 // value we currently posses must be less than twice the denom (as we are
2085 // using the same semantics).
2086 IEEEFloat P2 = rhs;
2087 if (P2.add(rhs, rmNearestTiesToEven) == opOK) {
2088 fs = mod(P2);
2089 assert(fs == opOK);
2090 }
2091
2092 // Lets work with absolute numbers.
2093 IEEEFloat P = rhs;
2094 P.sign = false;
2095 sign = false;
2096
2097 //
2098 // To calculate the remainder we use the following scheme.
2099 //
2100 // The remainder is defained as follows:
2101 //
2102 // remainder = numer - rquot * denom = x - r * p
2103 //
2104 // Where r is the result of: x/p, rounded toward the nearest integral value
2105 // (with halfway cases rounded toward the even number).
2106 //
2107 // Currently, (after x mod 2p):
2108 // r is the number of 2p's present inside x, which is inherently, an even
2109 // number of p's.
2110 //
2111 // We may split the remaining calculation into 4 options:
2112 // - if x < 0.5p then we round to the nearest number with is 0, and are done.
2113 // - if x == 0.5p then we round to the nearest even number which is 0, and we
2114 // are done as well.
2115 // - if 0.5p < x < p then we round to nearest number which is 1, and we have
2116 // to subtract 1p at least once.
2117 // - if x >= p then we must subtract p at least once, as x must be a
2118 // remainder.
2119 //
2120 // By now, we were done, or we added 1 to r, which in turn, now an odd number.
2121 //
2122 // We can now split the remaining calculation to the following 3 options:
2123 // - if x < 0.5p then we round to the nearest number with is 0, and are done.
2124 // - if x == 0.5p then we round to the nearest even number. As r is odd, we
2125 // must round up to the next even number. so we must subtract p once more.
2126 // - if x > 0.5p (and inherently x < p) then we must round r up to the next
2127 // integral, and subtract p once more.
2128 //
2129
2130 // Extend the semantics to prevent an overflow/underflow or inexact result.
2131 bool losesInfo;
2132 fltSemantics extendedSemantics = *semantics;
2133 extendedSemantics.maxExponent++;
2134 extendedSemantics.minExponent--;
2135 extendedSemantics.precision += 2;
2136
2137 IEEEFloat VEx = *this;
2138 fs = VEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2139 assert(fs == opOK && !losesInfo);
2140 IEEEFloat PEx = P;
2141 fs = PEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2142 assert(fs == opOK && !losesInfo);
2143
2144 // It is simpler to work with 2x instead of 0.5p, and we do not need to lose
2145 // any fraction.
2146 fs = VEx.add(VEx, rmNearestTiesToEven);
2147 assert(fs == opOK);
2148
2149 if (VEx.compare(PEx) == cmpGreaterThan) {
2151 assert(fs == opOK);
2152
2153 // Make VEx = this.add(this), but because we have different semantics, we do
2154 // not want to `convert` again, so we just subtract PEx twice (which equals
2155 // to the desired value).
2156 fs = VEx.subtract(PEx, rmNearestTiesToEven);
2157 assert(fs == opOK);
2158 fs = VEx.subtract(PEx, rmNearestTiesToEven);
2159 assert(fs == opOK);
2160
2161 cmpResult result = VEx.compare(PEx);
2162 if (result == cmpGreaterThan || result == cmpEqual) {
2164 assert(fs == opOK);
2165 }
2166 }
2167
2168 if (isZero()) {
2169 sign = origSign; // IEEE754 requires this
2170 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2171 // But some 8-bit floats only have positive 0.
2172 sign = false;
2173 } else {
2174 sign ^= origSign;
2175 }
2176 return fs;
2177}
2178
2179/* Normalized llvm frem (C fmod). */
2181 opStatus fs = modSpecials(rhs);
2182 unsigned int origSign = sign;
2183
2184 while (isFiniteNonZero() && rhs.isFiniteNonZero() &&
2186 int Exp = ilogb(*this) - ilogb(rhs);
2187 IEEEFloat V = scalbn(rhs, Exp, rmNearestTiesToEven);
2188 // V can overflow to NaN with fltNonfiniteBehavior::NanOnly, so explicitly
2189 // check for it.
2190 if (V.isNaN() || compareAbsoluteValue(V) == cmpLessThan)
2191 V = scalbn(rhs, Exp - 1, rmNearestTiesToEven);
2192 V.sign = sign;
2193
2195
2196 // When the semantics supports zero, this loop's
2197 // exit-condition is handled by the 'isFiniteNonZero'
2198 // category check above. However, when the semantics
2199 // does not have 'fcZero' and we have reached the
2200 // minimum possible value, (and any further subtract
2201 // will underflow to the same value) explicitly
2202 // provide an exit-path here.
2203 if (!semantics->hasZero && this->isSmallest())
2204 break;
2205
2206 assert(fs==opOK);
2207 }
2208 if (isZero()) {
2209 sign = origSign; // fmod requires this
2210 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2211 sign = false;
2212 }
2213 return fs;
2214}
2215
2216/* Normalized fused-multiply-add. */
2218 const IEEEFloat &addend,
2219 roundingMode rounding_mode) {
2220 opStatus fs;
2221
2222 /* Post-multiplication sign, before addition. */
2223 sign ^= multiplicand.sign;
2224
2225 /* If and only if all arguments are normal do we need to do an
2226 extended-precision calculation. */
2227 if (isFiniteNonZero() &&
2228 multiplicand.isFiniteNonZero() &&
2229 addend.isFinite()) {
2230 lostFraction lost_fraction;
2231
2232 lost_fraction = multiplySignificand(multiplicand, addend);
2233 fs = normalize(rounding_mode, lost_fraction);
2234 if (lost_fraction != lfExactlyZero)
2235 fs = (opStatus) (fs | opInexact);
2236
2237 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
2238 positive zero unless rounding to minus infinity, except that
2239 adding two like-signed zeroes gives that zero. */
2240 if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign) {
2241 sign = (rounding_mode == rmTowardNegative);
2242 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2243 sign = false;
2244 }
2245 } else {
2246 fs = multiplySpecials(multiplicand);
2247
2248 /* FS can only be opOK or opInvalidOp. There is no more work
2249 to do in the latter case. The IEEE-754R standard says it is
2250 implementation-defined in this case whether, if ADDEND is a
2251 quiet NaN, we raise invalid op; this implementation does so.
2252
2253 If we need to do the addition we can do so with normal
2254 precision. */
2255 if (fs == opOK)
2256 fs = addOrSubtract(addend, rounding_mode, false);
2257 }
2258
2259 return fs;
2260}
2261
2262/* Rounding-mode correct round to integral value. */
2264 if (isInfinity())
2265 // [IEEE Std 754-2008 6.1]:
2266 // The behavior of infinity in floating-point arithmetic is derived from the
2267 // limiting cases of real arithmetic with operands of arbitrarily
2268 // large magnitude, when such a limit exists.
2269 // ...
2270 // Operations on infinite operands are usually exact and therefore signal no
2271 // exceptions ...
2272 return opOK;
2273
2274 if (isNaN()) {
2275 if (isSignaling()) {
2276 // [IEEE Std 754-2008 6.2]:
2277 // Under default exception handling, any operation signaling an invalid
2278 // operation exception and for which a floating-point result is to be
2279 // delivered shall deliver a quiet NaN.
2280 makeQuiet();
2281 // [IEEE Std 754-2008 6.2]:
2282 // Signaling NaNs shall be reserved operands that, under default exception
2283 // handling, signal the invalid operation exception(see 7.2) for every
2284 // general-computational and signaling-computational operation except for
2285 // the conversions described in 5.12.
2286 return opInvalidOp;
2287 } else {
2288 // [IEEE Std 754-2008 6.2]:
2289 // For an operation with quiet NaN inputs, other than maximum and minimum
2290 // operations, if a floating-point result is to be delivered the result
2291 // shall be a quiet NaN which should be one of the input NaNs.
2292 // ...
2293 // Every general-computational and quiet-computational operation involving
2294 // one or more input NaNs, none of them signaling, shall signal no
2295 // exception, except fusedMultiplyAdd might signal the invalid operation
2296 // exception(see 7.2).
2297 return opOK;
2298 }
2299 }
2300
2301 if (isZero()) {
2302 // [IEEE Std 754-2008 6.3]:
2303 // ... the sign of the result of conversions, the quantize operation, the
2304 // roundToIntegral operations, and the roundToIntegralExact(see 5.3.1) is
2305 // the sign of the first or only operand.
2306 return opOK;
2307 }
2308
2309 // If the exponent is large enough, we know that this value is already
2310 // integral, and the arithmetic below would potentially cause it to saturate
2311 // to +/-Inf. Bail out early instead.
2312 if (exponent + 1 >= (int)APFloat::semanticsPrecision(*semantics))
2313 return opOK;
2314
2315 // The algorithm here is quite simple: we add 2^(p-1), where p is the
2316 // precision of our format, and then subtract it back off again. The choice
2317 // of rounding modes for the addition/subtraction determines the rounding mode
2318 // for our integral rounding as well.
2319 // NOTE: When the input value is negative, we do subtraction followed by
2320 // addition instead.
2321 APInt IntegerConstant(NextPowerOf2(APFloat::semanticsPrecision(*semantics)),
2322 1);
2323 IntegerConstant <<= APFloat::semanticsPrecision(*semantics) - 1;
2324 IEEEFloat MagicConstant(*semantics);
2325 opStatus fs = MagicConstant.convertFromAPInt(IntegerConstant, false,
2327 assert(fs == opOK);
2328 MagicConstant.sign = sign;
2329
2330 // Preserve the input sign so that we can handle the case of zero result
2331 // correctly.
2332 bool inputSign = isNegative();
2333
2334 fs = add(MagicConstant, rounding_mode);
2335
2336 // Current value and 'MagicConstant' are both integers, so the result of the
2337 // subtraction is always exact according to Sterbenz' lemma.
2338 subtract(MagicConstant, rounding_mode);
2339
2340 // Restore the input sign.
2341 if (inputSign != isNegative())
2342 changeSign();
2343
2344 return fs;
2345}
2346
2347/* Comparison requires normalized numbers. */
2349 assert(semantics == rhs.semantics);
2350
2351 switch (PackCategoriesIntoKey(category, rhs.category)) {
2352 default:
2353 llvm_unreachable(nullptr);
2354
2362 return cmpUnordered;
2363
2367 if (sign)
2368 return cmpLessThan;
2369 else
2370 return cmpGreaterThan;
2371
2375 if (rhs.sign)
2376 return cmpGreaterThan;
2377 else
2378 return cmpLessThan;
2379
2381 if (sign == rhs.sign)
2382 return cmpEqual;
2383 else if (sign)
2384 return cmpLessThan;
2385 else
2386 return cmpGreaterThan;
2387
2389 return cmpEqual;
2390
2392 break;
2393 }
2394
2395 cmpResult result;
2396 /* Two normal numbers. Do they have the same sign? */
2397 if (sign != rhs.sign) {
2398 if (sign)
2399 result = cmpLessThan;
2400 else
2401 result = cmpGreaterThan;
2402 } else {
2403 /* Compare absolute values; invert result if negative. */
2404 result = compareAbsoluteValue(rhs);
2405
2406 if (sign) {
2407 if (result == cmpLessThan)
2408 result = cmpGreaterThan;
2409 else if (result == cmpGreaterThan)
2410 result = cmpLessThan;
2411 }
2412 }
2413
2414 return result;
2415}
2416
2417/// IEEEFloat::convert - convert a value of one floating point type to another.
2418/// The return value corresponds to the IEEE754 exceptions. *losesInfo
2419/// records whether the transformation lost information, i.e. whether
2420/// converting the result back to the original type will produce the
2421/// original value (this is almost the same as return value==fsOK, but there
2422/// are edge cases where this is not so).
2423
2425 roundingMode rounding_mode,
2426 bool *losesInfo) {
2427 opStatus fs;
2428 const fltSemantics &fromSemantics = *semantics;
2429 bool is_signaling = isSignaling();
2430
2432 unsigned newPartCount = partCountForBits(toSemantics.precision + 1);
2433 unsigned oldPartCount = partCount();
2434 int shift = toSemantics.precision - fromSemantics.precision;
2435
2436 bool X86SpecialNan = false;
2437 if (&fromSemantics == &APFloatBase::semX87DoubleExtended &&
2438 &toSemantics != &APFloatBase::semX87DoubleExtended && category == fcNaN &&
2439 (!(*significandParts() & 0x8000000000000000ULL) ||
2440 !(*significandParts() & 0x4000000000000000ULL))) {
2441 // x86 has some unusual NaNs which cannot be represented in any other
2442 // format; note them here.
2443 X86SpecialNan = true;
2444 }
2445
2446 // If this is a truncation of a denormal number, and the target semantics
2447 // has larger exponent range than the source semantics (this can happen
2448 // when truncating from PowerPC double-double to double format), the
2449 // right shift could lose result mantissa bits. Adjust exponent instead
2450 // of performing excessive shift.
2451 // Also do a similar trick in case shifting denormal would produce zero
2452 // significand as this case isn't handled correctly by normalize.
2453 if (shift < 0 && isFiniteNonZero()) {
2454 int omsb = significandMSB() + 1;
2455 int exponentChange = omsb - fromSemantics.precision;
2456 if (exponent + exponentChange < toSemantics.minExponent)
2457 exponentChange = toSemantics.minExponent - exponent;
2458 exponentChange = std::max(exponentChange, shift);
2459 if (exponentChange < 0) {
2460 shift -= exponentChange;
2461 exponent += exponentChange;
2462 } else if (omsb <= -shift) {
2463 exponentChange = omsb + shift - 1; // leave at least one bit set
2464 shift -= exponentChange;
2465 exponent += exponentChange;
2466 }
2467 }
2468
2469 // If this is a truncation, perform the shift before we narrow the storage.
2470 if (shift < 0 && (isFiniteNonZero() ||
2471 (category == fcNaN && semantics->nonFiniteBehavior !=
2473 lostFraction = shiftRight(significandParts(), oldPartCount, -shift);
2474
2475 // Fix the storage so it can hold to new value.
2476 if (newPartCount > oldPartCount) {
2477 // The new type requires more storage; make it available.
2478 integerPart *newParts;
2479 newParts = new integerPart[newPartCount];
2480 APInt::tcSet(newParts, 0, newPartCount);
2481 if (isFiniteNonZero() || category==fcNaN)
2482 APInt::tcAssign(newParts, significandParts(), oldPartCount);
2483 freeSignificand();
2484 significand.parts = newParts;
2485 } else if (newPartCount == 1 && oldPartCount != 1) {
2486 // Switch to built-in storage for a single part.
2487 integerPart newPart = 0;
2488 if (isFiniteNonZero() || category==fcNaN)
2489 newPart = significandParts()[0];
2490 freeSignificand();
2491 significand.part = newPart;
2492 }
2493
2494 // Now that we have the right storage, switch the semantics.
2495 semantics = &toSemantics;
2496
2497 // If this is an extension, perform the shift now that the storage is
2498 // available.
2499 if (shift > 0 && (isFiniteNonZero() || category==fcNaN))
2500 APInt::tcShiftLeft(significandParts(), newPartCount, shift);
2501
2502 if (isFiniteNonZero()) {
2503 fs = normalize(rounding_mode, lostFraction);
2504 *losesInfo = (fs != opOK);
2505 } else if (category == fcNaN) {
2506 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
2507 *losesInfo =
2509 makeNaN(false, sign);
2510 return is_signaling ? opInvalidOp : opOK;
2511 }
2512
2513 // If NaN is negative zero, we need to create a new NaN to avoid converting
2514 // NaN to -Inf.
2515 if (fromSemantics.nanEncoding == fltNanEncoding::NegativeZero &&
2516 semantics->nanEncoding != fltNanEncoding::NegativeZero)
2517 makeNaN(false, false);
2518
2519 *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan;
2520
2521 // For x87 extended precision, we want to make a NaN, not a special NaN if
2522 // the input wasn't special either.
2523 if (!X86SpecialNan && semantics == &APFloatBase::semX87DoubleExtended)
2524 APInt::tcSetBit(significandParts(), semantics->precision - 1);
2525
2526 // Convert of sNaN creates qNaN and raises an exception (invalid op).
2527 // This also guarantees that a sNaN does not become Inf on a truncation
2528 // that loses all payload bits.
2529 if (is_signaling) {
2530 makeQuiet();
2531 fs = opInvalidOp;
2532 } else {
2533 fs = opOK;
2534 }
2535 } else if (category == fcInfinity &&
2536 semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
2537 makeNaN(false, sign);
2538 *losesInfo = true;
2539 fs = opInexact;
2540 } else if (category == fcZero &&
2541 semantics->nanEncoding == fltNanEncoding::NegativeZero) {
2542 // Negative zero loses info, but positive zero doesn't.
2543 *losesInfo =
2544 fromSemantics.nanEncoding != fltNanEncoding::NegativeZero && sign;
2545 fs = *losesInfo ? opInexact : opOK;
2546 // NaN is negative zero means -0 -> +0, which can lose information
2547 sign = false;
2548 } else {
2549 *losesInfo = false;
2550 fs = opOK;
2551 }
2552
2553 if (category == fcZero && !semantics->hasZero)
2555 return fs;
2556}
2557
2558/* Convert a floating point number to an integer according to the
2559 rounding mode. If the rounded integer value is out of range this
2560 returns an invalid operation exception and the contents of the
2561 destination parts are unspecified. If the rounded value is in
2562 range but the floating point number is not the exact integer, the C
2563 standard doesn't require an inexact exception to be raised. IEEE
2564 854 does require it so we do that.
2565
2566 Note that for conversions to integer type the C standard requires
2567 round-to-zero to always be used. */
2568APFloat::opStatus IEEEFloat::convertToSignExtendedInteger(
2569 MutableArrayRef<integerPart> parts, unsigned int width, bool isSigned,
2570 roundingMode rounding_mode, bool *isExact) const {
2571 *isExact = false;
2572
2573 /* Handle the three special cases first. */
2574 if (category == fcInfinity || category == fcNaN)
2575 return opInvalidOp;
2576
2577 unsigned dstPartsCount = partCountForBits(width);
2578 assert(dstPartsCount <= parts.size() && "Integer too big");
2579
2580 if (category == fcZero) {
2581 APInt::tcSet(parts.data(), 0, dstPartsCount);
2582 // Negative zero can't be represented as an int.
2583 *isExact = !sign;
2584 return opOK;
2585 }
2586
2587 const integerPart *src = significandParts();
2588
2589 unsigned truncatedBits;
2590 /* Step 1: place our absolute value, with any fraction truncated, in
2591 the destination. */
2592 if (exponent < 0) {
2593 /* Our absolute value is less than one; truncate everything. */
2594 APInt::tcSet(parts.data(), 0, dstPartsCount);
2595 /* For exponent -1 the integer bit represents .5, look at that.
2596 For smaller exponents leftmost truncated bit is 0. */
2597 truncatedBits = semantics->precision -1U - exponent;
2598 } else {
2599 /* We want the most significant (exponent + 1) bits; the rest are
2600 truncated. */
2601 unsigned int bits = exponent + 1U;
2602
2603 /* Hopelessly large in magnitude? */
2604 if (bits > width)
2605 return opInvalidOp;
2606
2607 if (bits < semantics->precision) {
2608 /* We truncate (semantics->precision - bits) bits. */
2609 truncatedBits = semantics->precision - bits;
2610 APInt::tcExtract(parts.data(), dstPartsCount, src, bits, truncatedBits);
2611 } else {
2612 /* We want at least as many bits as are available. */
2613 APInt::tcExtract(parts.data(), dstPartsCount, src, semantics->precision,
2614 0);
2615 APInt::tcShiftLeft(parts.data(), dstPartsCount,
2616 bits - semantics->precision);
2617 truncatedBits = 0;
2618 }
2619 }
2620
2621 /* Step 2: work out any lost fraction, and increment the absolute
2622 value if we would round away from zero. */
2623 lostFraction lost_fraction;
2624 if (truncatedBits) {
2625 lost_fraction = lostFractionThroughTruncation(src, partCount(),
2626 truncatedBits);
2627 if (lost_fraction != lfExactlyZero &&
2628 roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) {
2629 if (APInt::tcIncrement(parts.data(), dstPartsCount))
2630 return opInvalidOp; /* Overflow. */
2631 }
2632 } else {
2633 lost_fraction = lfExactlyZero;
2634 }
2635
2636 /* Step 3: check if we fit in the destination. */
2637 unsigned int omsb = APInt::tcMSB(parts.data(), dstPartsCount) + 1;
2638
2639 if (sign) {
2640 if (!isSigned) {
2641 /* Negative numbers cannot be represented as unsigned. */
2642 if (omsb != 0)
2643 return opInvalidOp;
2644 } else {
2645 /* It takes omsb bits to represent the unsigned integer value.
2646 We lose a bit for the sign, but care is needed as the
2647 maximally negative integer is a special case. */
2648 if (omsb == width &&
2649 APInt::tcLSB(parts.data(), dstPartsCount) + 1 != omsb)
2650 return opInvalidOp;
2651
2652 /* This case can happen because of rounding. */
2653 if (omsb > width)
2654 return opInvalidOp;
2655 }
2656
2657 APInt::tcNegate (parts.data(), dstPartsCount);
2658 } else {
2659 if (omsb >= width + !isSigned)
2660 return opInvalidOp;
2661 }
2662
2663 if (lost_fraction == lfExactlyZero) {
2664 *isExact = true;
2665 return opOK;
2666 }
2667 return opInexact;
2668}
2669
2670/* Same as convertToSignExtendedInteger, except we provide
2671 deterministic values in case of an invalid operation exception,
2672 namely zero for NaNs and the minimal or maximal value respectively
2673 for underflow or overflow.
2674 The *isExact output tells whether the result is exact, in the sense
2675 that converting it back to the original floating point type produces
2676 the original value. This is almost equivalent to result==opOK,
2677 except for negative zeroes.
2678*/
2681 unsigned int width, bool isSigned,
2682 roundingMode rounding_mode, bool *isExact) const {
2683 opStatus fs = convertToSignExtendedInteger(parts, width, isSigned,
2684 rounding_mode, isExact);
2685
2686 if (fs == opInvalidOp) {
2687 unsigned int bits, dstPartsCount;
2688
2689 dstPartsCount = partCountForBits(width);
2690 assert(dstPartsCount <= parts.size() && "Integer too big");
2691
2692 if (category == fcNaN)
2693 bits = 0;
2694 else if (sign)
2695 bits = isSigned;
2696 else
2697 bits = width - isSigned;
2698
2699 tcSetLeastSignificantBits(parts.data(), dstPartsCount, bits);
2700 if (sign && isSigned)
2701 APInt::tcShiftLeft(parts.data(), dstPartsCount, width - 1);
2702 }
2703
2704 return fs;
2705}
2706
2707/* Convert an unsigned integer SRC to a floating point number,
2708 rounding according to ROUNDING_MODE. The sign of the floating
2709 point number is not modified. */
2710APFloat::opStatus IEEEFloat::convertFromUnsignedParts(
2711 const integerPart *src, unsigned int srcCount, roundingMode rounding_mode) {
2712 category = fcNormal;
2713 unsigned omsb = APInt::tcMSB(src, srcCount) + 1;
2714 integerPart *dst = significandParts();
2715 unsigned dstCount = partCount();
2716 unsigned precision = semantics->precision;
2717
2718 /* We want the most significant PRECISION bits of SRC. There may not
2719 be that many; extract what we can. */
2720 lostFraction lost_fraction;
2721 if (precision <= omsb) {
2722 exponent = omsb - 1;
2723 lost_fraction = lostFractionThroughTruncation(src, srcCount,
2724 omsb - precision);
2725 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);
2726 } else {
2727 exponent = precision - 1;
2728 lost_fraction = lfExactlyZero;
2729 APInt::tcExtract(dst, dstCount, src, omsb, 0);
2730 }
2731
2732 return normalize(rounding_mode, lost_fraction);
2733}
2734
2736 roundingMode rounding_mode) {
2737 unsigned int partCount = Val.getNumWords();
2738 APInt api = Val;
2739
2740 sign = false;
2741 if (isSigned && api.isNegative()) {
2742 sign = true;
2743 api = -api;
2744 }
2745
2746 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2747}
2748
2750IEEEFloat::convertFromHexadecimalString(StringRef s,
2751 roundingMode rounding_mode) {
2752 lostFraction lost_fraction = lfExactlyZero;
2753
2754 category = fcNormal;
2755 zeroSignificand();
2756 exponent = 0;
2757
2758 integerPart *significand = significandParts();
2759 unsigned partsCount = partCount();
2760 unsigned bitPos = partsCount * integerPartWidth;
2761 bool computedTrailingFraction = false;
2762
2763 // Skip leading zeroes and any (hexa)decimal point.
2764 StringRef::iterator begin = s.begin();
2765 StringRef::iterator end = s.end();
2767 auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot);
2768 if (!PtrOrErr)
2769 return PtrOrErr.takeError();
2770 StringRef::iterator p = *PtrOrErr;
2771 StringRef::iterator firstSignificantDigit = p;
2772
2773 while (p != end) {
2774 integerPart hex_value;
2775
2776 if (*p == '.') {
2777 if (dot != end)
2778 return createError("String contains multiple dots");
2779 dot = p++;
2780 continue;
2781 }
2782
2783 hex_value = hexDigitValue(*p);
2784 if (hex_value == UINT_MAX)
2785 break;
2786
2787 p++;
2788
2789 // Store the number while we have space.
2790 if (bitPos) {
2791 bitPos -= 4;
2792 hex_value <<= bitPos % integerPartWidth;
2793 significand[bitPos / integerPartWidth] |= hex_value;
2794 } else if (!computedTrailingFraction) {
2795 auto FractOrErr = trailingHexadecimalFraction(p, end, hex_value);
2796 if (!FractOrErr)
2797 return FractOrErr.takeError();
2798 lost_fraction = *FractOrErr;
2799 computedTrailingFraction = true;
2800 }
2801 }
2802
2803 /* Hex floats require an exponent but not a hexadecimal point. */
2804 if (p == end)
2805 return createError("Hex strings require an exponent");
2806 if (*p != 'p' && *p != 'P')
2807 return createError("Invalid character in significand");
2808 if (p == begin)
2809 return createError("Significand has no digits");
2810 if (dot != end && p - begin == 1)
2811 return createError("Significand has no digits");
2812
2813 /* Ignore the exponent if we are zero. */
2814 if (p != firstSignificantDigit) {
2815 int expAdjustment;
2816
2817 /* Implicit hexadecimal point? */
2818 if (dot == end)
2819 dot = p;
2820
2821 /* Calculate the exponent adjustment implicit in the number of
2822 significant digits. */
2823 expAdjustment = static_cast<int>(dot - firstSignificantDigit);
2824 if (expAdjustment < 0)
2825 expAdjustment++;
2826 expAdjustment = expAdjustment * 4 - 1;
2827
2828 /* Adjust for writing the significand starting at the most
2829 significant nibble. */
2830 expAdjustment += semantics->precision;
2831 expAdjustment -= partsCount * integerPartWidth;
2832
2833 /* Adjust for the given exponent. */
2834 auto ExpOrErr = totalExponent(p + 1, end, expAdjustment);
2835 if (!ExpOrErr)
2836 return ExpOrErr.takeError();
2837 exponent = *ExpOrErr;
2838 }
2839
2840 return normalize(rounding_mode, lost_fraction);
2841}
2842
2844IEEEFloat::roundSignificandWithExponent(const integerPart *decSigParts,
2845 unsigned sigPartCount, int exp,
2846 roundingMode rounding_mode) {
2847 fltSemantics calcSemantics = { 32767, -32767, 0, 0 };
2849
2850 bool isNearest = rounding_mode == rmNearestTiesToEven ||
2851 rounding_mode == rmNearestTiesToAway;
2852
2853 unsigned parts = partCountForBits(semantics->precision + 11);
2854
2855 /* Calculate pow(5, abs(exp)). */
2856 unsigned pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp : -exp);
2857
2858 for (;; parts *= 2) {
2859 unsigned int excessPrecision, truncatedBits;
2860
2861 calcSemantics.precision = parts * integerPartWidth - 1;
2862 excessPrecision = calcSemantics.precision - semantics->precision;
2863 truncatedBits = excessPrecision;
2864
2865 IEEEFloat decSig(calcSemantics, uninitialized);
2866 decSig.makeZero(sign);
2867 IEEEFloat pow5(calcSemantics);
2868
2869 opStatus sigStatus = decSig.convertFromUnsignedParts(
2870 decSigParts, sigPartCount, rmNearestTiesToEven);
2871 opStatus powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
2873 /* Add exp, as 10^n = 5^n * 2^n. */
2874 decSig.exponent += exp;
2875
2876 lostFraction calcLostFraction;
2877 integerPart HUerr, HUdistance;
2878 unsigned int powHUerr;
2879
2880 if (exp >= 0) {
2881 /* multiplySignificand leaves the precision-th bit set to 1. */
2882 calcLostFraction = decSig.multiplySignificand(pow5);
2883 powHUerr = powStatus != opOK;
2884 } else {
2885 calcLostFraction = decSig.divideSignificand(pow5);
2886 /* Denormal numbers have less precision. */
2887 if (decSig.exponent < semantics->minExponent) {
2888 excessPrecision += (semantics->minExponent - decSig.exponent);
2889 truncatedBits = excessPrecision;
2890 excessPrecision = std::min(excessPrecision, calcSemantics.precision);
2891 }
2892 /* Extra half-ulp lost in reciprocal of exponent. */
2893 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2;
2894 }
2895
2896 /* Both multiplySignificand and divideSignificand return the
2897 result with the integer bit set. */
2899 (decSig.significandParts(), calcSemantics.precision - 1) == 1);
2900
2901 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
2902 powHUerr);
2903 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
2904 excessPrecision, isNearest);
2905
2906 /* Are we guaranteed to round correctly if we truncate? */
2907 if (HUdistance >= HUerr) {
2908 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
2909 calcSemantics.precision - excessPrecision,
2910 excessPrecision);
2911 /* Take the exponent of decSig. If we tcExtract-ed less bits
2912 above we must adjust our exponent to compensate for the
2913 implicit right shift. */
2914 exponent = (decSig.exponent + semantics->precision
2915 - (calcSemantics.precision - excessPrecision));
2916 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
2917 decSig.partCount(),
2918 truncatedBits);
2919 return static_cast<opStatus>(normalize(rounding_mode, calcLostFraction) |
2920 ((sigStatus | powStatus) & opInexact));
2921 }
2922 }
2923}
2924
2925Expected<APFloat::opStatus>
2926IEEEFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) {
2927 decimalInfo D;
2928 opStatus fs;
2929
2930 /* Scan the text. */
2931 StringRef::iterator p = str.begin();
2932 if (Error Err = interpretDecimal(p, str.end(), &D))
2933 return std::move(Err);
2934
2935 /* Handle the quick cases. First the case of no significant digits,
2936 i.e. zero, and then exponents that are obviously too large or too
2937 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp
2938 definitely overflows if
2939
2940 (exp - 1) * L >= maxExponent
2941
2942 and definitely underflows to zero where
2943
2944 (exp + 1) * L <= minExponent - precision
2945
2946 With integer arithmetic the tightest bounds for L are
2947
2948 93/28 < L < 196/59 [ numerator <= 256 ]
2949 42039/12655 < L < 28738/8651 [ numerator <= 65536 ]
2950 */
2951
2952 // Test if we have a zero number allowing for strings with no null terminators
2953 // and zero decimals with non-zero exponents.
2954 //
2955 // We computed firstSigDigit by ignoring all zeros and dots. Thus if
2956 // D->firstSigDigit equals str.end(), every digit must be a zero and there can
2957 // be at most one dot. On the other hand, if we have a zero with a non-zero
2958 // exponent, then we know that D.firstSigDigit will be non-numeric.
2959 if (D.firstSigDigit == str.end() || decDigitValue(*D.firstSigDigit) >= 10U) {
2960 category = fcZero;
2961 fs = opOK;
2962 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2963 sign = false;
2964 if (!semantics->hasZero)
2966
2967 /* Check whether the normalized exponent is high enough to overflow
2968 max during the log-rebasing in the max-exponent check below. */
2969 } else if (D.normalizedExponent - 1 > INT_MAX / 42039) {
2970 fs = handleOverflow(rounding_mode);
2971
2972 /* If it wasn't, then it also wasn't high enough to overflow max
2973 during the log-rebasing in the min-exponent check. Check that it
2974 won't overflow min in either check, then perform the min-exponent
2975 check. */
2976 } else if (D.normalizedExponent - 1 < INT_MIN / 42039 ||
2977 (D.normalizedExponent + 1) * 28738 <=
2978 8651 * (semantics->minExponent - (int) semantics->precision)) {
2979 /* Underflow to zero and round. */
2980 category = fcNormal;
2981 zeroSignificand();
2982 fs = normalize(rounding_mode, lfLessThanHalf);
2983
2984 /* We can finally safely perform the max-exponent check. */
2985 } else if ((D.normalizedExponent - 1) * 42039
2986 >= 12655 * semantics->maxExponent) {
2987 /* Overflow and round. */
2988 fs = handleOverflow(rounding_mode);
2989 } else {
2990 integerPart *decSignificand;
2991 unsigned int partCount;
2992
2993 /* A tight upper bound on number of bits required to hold an
2994 N-digit decimal integer is N * 196 / 59. Allocate enough space
2995 to hold the full significand, and an extra part required by
2996 tcMultiplyPart. */
2997 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1;
2998 partCount = partCountForBits(1 + 196 * partCount / 59);
2999 decSignificand = new integerPart[partCount + 1];
3000 partCount = 0;
3001
3002 /* Convert to binary efficiently - we do almost all multiplication
3003 in an integerPart. When this would overflow do we do a single
3004 bignum multiplication, and then revert again to multiplication
3005 in an integerPart. */
3006 do {
3007 integerPart decValue, val, multiplier;
3008
3009 val = 0;
3010 multiplier = 1;
3011
3012 do {
3013 if (*p == '.') {
3014 p++;
3015 if (p == str.end()) {
3016 break;
3017 }
3018 }
3019 decValue = decDigitValue(*p++);
3020 if (decValue >= 10U) {
3021 delete[] decSignificand;
3022 return createError("Invalid character in significand");
3023 }
3024 multiplier *= 10;
3025 val = val * 10 + decValue;
3026 /* The maximum number that can be multiplied by ten with any
3027 digit added without overflowing an integerPart. */
3028 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);
3029
3030 /* Multiply out the current part. */
3031 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
3032 partCount, partCount + 1, false);
3033
3034 /* If we used another part (likely but not guaranteed), increase
3035 the count. */
3036 if (decSignificand[partCount])
3037 partCount++;
3038 } while (p <= D.lastSigDigit);
3039
3040 category = fcNormal;
3041 fs = roundSignificandWithExponent(decSignificand, partCount,
3042 D.exponent, rounding_mode);
3043
3044 delete [] decSignificand;
3045 }
3046
3047 return fs;
3048}
3049
3050bool IEEEFloat::convertFromStringSpecials(StringRef str) {
3051 const size_t MIN_NAME_SIZE = 3;
3052
3053 if (str.size() < MIN_NAME_SIZE)
3054 return false;
3055
3056 if (str == "inf" || str == "INFINITY" || str == "+Inf" || str == "+inf") {
3057 makeInf(false);
3058 return true;
3059 }
3060
3061 bool IsNegative = str.consume_front("-");
3062 if (IsNegative) {
3063 if (str.size() < MIN_NAME_SIZE)
3064 return false;
3065
3066 if (str == "inf" || str == "INFINITY" || str == "Inf") {
3067 makeInf(true);
3068 return true;
3069 }
3070 }
3071
3072 // If we have a 's' (or 'S') prefix, then this is a Signaling NaN.
3073 bool IsSignaling = str.consume_front_insensitive("s");
3074 if (IsSignaling) {
3075 if (str.size() < MIN_NAME_SIZE)
3076 return false;
3077 }
3078
3079 if (str.consume_front("nan") || str.consume_front("NaN")) {
3080 // A NaN without payload.
3081 if (str.empty()) {
3082 makeNaN(IsSignaling, IsNegative);
3083 return true;
3084 }
3085
3086 // Allow the payload to be inside parentheses.
3087 if (str.front() == '(') {
3088 // Parentheses should be balanced (and not empty).
3089 if (str.size() <= 2 || str.back() != ')')
3090 return false;
3091
3092 str = str.slice(1, str.size() - 1);
3093 }
3094
3095 // Determine the payload number's radix.
3096 unsigned Radix = 10;
3097 if (str[0] == '0') {
3098 if (str.size() > 1 && tolower(str[1]) == 'x') {
3099 str = str.drop_front(2);
3100 Radix = 16;
3101 } else {
3102 Radix = 8;
3103 }
3104 }
3105
3106 // Parse the payload and make the NaN.
3107 APInt Payload;
3108 if (!str.getAsInteger(Radix, Payload)) {
3109 makeNaN(IsSignaling, IsNegative, &Payload);
3110 return true;
3111 }
3112 }
3113
3114 return false;
3115}
3116
3117Expected<APFloat::opStatus>
3119 if (str.empty())
3120 return createError("Invalid string length");
3121
3122 // Handle special cases.
3123 if (convertFromStringSpecials(str))
3124 return opOK;
3125
3126 /* Handle a leading minus sign. */
3127 StringRef::iterator p = str.begin();
3128 size_t slen = str.size();
3129 sign = *p == '-' ? 1 : 0;
3130 if (sign && !semantics->hasSignedRepr)
3132 "This floating point format does not support signed values");
3133
3134 if (*p == '-' || *p == '+') {
3135 p++;
3136 slen--;
3137 if (!slen)
3138 return createError("String has no digits");
3139 }
3140
3141 if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
3142 if (slen == 2)
3143 return createError("Invalid string");
3144 return convertFromHexadecimalString(StringRef(p + 2, slen - 2),
3145 rounding_mode);
3146 }
3147
3148 return convertFromDecimalString(StringRef(p, slen), rounding_mode);
3149}
3150
3151/* Write out a hexadecimal representation of the floating point value
3152 to DST, which must be of sufficient size, in the C99 form
3153 [-]0xh.hhhhp[+-]d. Return the number of characters written,
3154 excluding the terminating NUL.
3155
3156 If UPPERCASE, the output is in upper case, otherwise in lower case.
3157
3158 HEXDIGITS digits appear altogether, rounding the value if
3159 necessary. If HEXDIGITS is 0, the minimal precision to display the
3160 number precisely is used instead. If nothing would appear after
3161 the decimal point it is suppressed.
3162
3163 The decimal exponent is always printed and has at least one digit.
3164 Zero values display an exponent of zero. Infinities and NaNs
3165 appear as "infinity" or "nan" respectively.
3166
3167 The above rules are as specified by C99. There is ambiguity about
3168 what the leading hexadecimal digit should be. This implementation
3169 uses whatever is necessary so that the exponent is displayed as
3170 stored. This implies the exponent will fall within the IEEE format
3171 range, and the leading hexadecimal digit will be 0 (for denormals),
3172 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
3173 any other digits zero).
3174*/
3175unsigned int IEEEFloat::convertToHexString(char *dst, unsigned int hexDigits,
3176 bool upperCase,
3177 roundingMode rounding_mode) const {
3178 char *p = dst;
3179 if (sign)
3180 *dst++ = '-';
3181
3182 switch (category) {
3183 case fcInfinity:
3184 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);
3185 dst += sizeof infinityL - 1;
3186 break;
3187
3188 case fcNaN:
3189 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);
3190 dst += sizeof NaNU - 1;
3191 break;
3192
3193 case fcZero:
3194 *dst++ = '0';
3195 *dst++ = upperCase ? 'X': 'x';
3196 *dst++ = '0';
3197 if (hexDigits > 1) {
3198 *dst++ = '.';
3199 memset (dst, '0', hexDigits - 1);
3200 dst += hexDigits - 1;
3201 }
3202 *dst++ = upperCase ? 'P': 'p';
3203 *dst++ = '0';
3204 break;
3205
3206 case fcNormal:
3207 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
3208 break;
3209 }
3210
3211 *dst = 0;
3212
3213 return static_cast<unsigned int>(dst - p);
3214}
3215
3216/* Does the hard work of outputting the correctly rounded hexadecimal
3217 form of a normal floating point number with the specified number of
3218 hexadecimal digits. If HEXDIGITS is zero the minimum number of
3219 digits necessary to print the value precisely is output. */
3220char *IEEEFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
3221 bool upperCase,
3222 roundingMode rounding_mode) const {
3223 *dst++ = '0';
3224 *dst++ = upperCase ? 'X': 'x';
3225
3226 bool roundUp = false;
3227 const char *hexDigitChars = upperCase ? hexDigitsUpper : hexDigitsLower;
3228
3229 const integerPart *significand = significandParts();
3230 unsigned partsCount = partCount();
3231
3232 /* +3 because the first digit only uses the single integer bit, so
3233 we have 3 virtual zero most-significant-bits. */
3234 unsigned valueBits = semantics->precision + 3;
3235 unsigned shift = integerPartWidth - valueBits % integerPartWidth;
3236
3237 /* The natural number of digits required ignoring trailing
3238 insignificant zeroes. */
3239 unsigned outputDigits = (valueBits - significandLSB() + 3) / 4;
3240
3241 /* hexDigits of zero means use the required number for the
3242 precision. Otherwise, see if we are truncating. If we are,
3243 find out if we need to round away from zero. */
3244 if (hexDigits) {
3245 if (hexDigits < outputDigits) {
3246 /* We are dropping non-zero bits, so need to check how to round.
3247 "bits" is the number of dropped bits. */
3248 unsigned int bits;
3249 lostFraction fraction;
3250
3251 bits = valueBits - hexDigits * 4;
3252 fraction = lostFractionThroughTruncation (significand, partsCount, bits);
3253 roundUp = roundAwayFromZero(rounding_mode, fraction, bits);
3254 }
3255 outputDigits = hexDigits;
3256 }
3257
3258 /* Write the digits consecutively, and start writing in the location
3259 of the hexadecimal point. We move the most significant digit
3260 left and add the hexadecimal point later. */
3261 char *p = ++dst;
3262
3263 unsigned count = (valueBits + integerPartWidth - 1) / integerPartWidth;
3264
3265 while (outputDigits && count) {
3266 integerPart part;
3267
3268 /* Put the most significant integerPartWidth bits in "part". */
3269 if (--count == partsCount)
3270 part = 0; /* An imaginary higher zero part. */
3271 else
3272 part = significand[count] << shift;
3273
3274 if (count && shift)
3275 part |= significand[count - 1] >> (integerPartWidth - shift);
3276
3277 /* Convert as much of "part" to hexdigits as we can. */
3278 unsigned int curDigits = integerPartWidth / 4;
3279
3280 curDigits = std::min(curDigits, outputDigits);
3281 dst += partAsHex (dst, part, curDigits, hexDigitChars);
3282 outputDigits -= curDigits;
3283 }
3284
3285 if (roundUp) {
3286 char *q = dst;
3287
3288 /* Note that hexDigitChars has a trailing '0'. */
3289 do {
3290 q--;
3291 *q = hexDigitChars[hexDigitValue (*q) + 1];
3292 } while (*q == '0');
3293 assert(q >= p);
3294 } else {
3295 /* Add trailing zeroes. */
3296 memset (dst, '0', outputDigits);
3297 dst += outputDigits;
3298 }
3299
3300 /* Move the most significant digit to before the point, and if there
3301 is something after the decimal point add it. This must come
3302 after rounding above. */
3303 p[-1] = p[0];
3304 if (dst -1 == p)
3305 dst--;
3306 else
3307 p[0] = '.';
3308
3309 /* Finally output the exponent. */
3310 *dst++ = upperCase ? 'P': 'p';
3311
3312 return writeSignedDecimal (dst, exponent);
3313}
3314
3316 if (!Arg.isFiniteNonZero())
3317 return hash_combine((uint8_t)Arg.category,
3318 // NaN has no sign, fix it at zero.
3319 Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign,
3320 Arg.semantics->precision);
3321
3322 // Normal floats need their exponent and significand hashed.
3323 return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign,
3324 Arg.semantics->precision, Arg.exponent,
3326 Arg.significandParts(),
3327 Arg.significandParts() + Arg.partCount()));
3328}
3329
3330// Conversion from APFloat to/from host float/double. It may eventually be
3331// possible to eliminate these and have everybody deal with APFloats, but that
3332// will take a while. This approach will not easily extend to long double.
3333// Current implementation requires integerPartWidth==64, which is correct at
3334// the moment but could be made more general.
3335
3336// Denormals have exponent minExponent in APFloat, but minExponent-1 in
3337// the actual IEEE respresentations. We compensate for that here.
3338
3339APInt IEEEFloat::convertF80LongDoubleAPFloatToAPInt() const {
3340 assert(semantics ==
3341 (const llvm::fltSemantics *)&APFloatBase::semX87DoubleExtended);
3342 assert(partCount()==2);
3343
3344 uint64_t myexponent, mysignificand;
3345
3346 if (isFiniteNonZero()) {
3347 myexponent = exponent+16383; //bias
3348 mysignificand = significandParts()[0];
3349 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
3350 myexponent = 0; // denormal
3351 } else if (category==fcZero) {
3352 myexponent = 0;
3353 mysignificand = 0;
3354 } else if (category==fcInfinity) {
3355 myexponent = 0x7fff;
3356 mysignificand = 0x8000000000000000ULL;
3357 } else {
3358 assert(category == fcNaN && "Unknown category");
3359 myexponent = 0x7fff;
3360 mysignificand = significandParts()[0];
3361 }
3362
3363 uint64_t words[2];
3364 words[0] = mysignificand;
3365 words[1] = ((uint64_t)(sign & 1) << 15) |
3366 (myexponent & 0x7fffLL);
3367 return APInt(80, words);
3368}
3369
3370APInt IEEEFloat::convertPPCDoubleDoubleLegacyAPFloatToAPInt() const {
3371 assert(semantics ==
3372 (const llvm::fltSemantics *)&APFloatBase::semPPCDoubleDoubleLegacy);
3373 assert(partCount()==2);
3374
3375 uint64_t words[2];
3376 bool losesInfo;
3377
3378 // Convert number to double. To avoid spurious underflows, we re-
3379 // normalize against the "double" minExponent first, and only *then*
3380 // truncate the mantissa. The result of that second conversion
3381 // may be inexact, but should never underflow.
3382 // Declare fltSemantics before APFloat that uses it (and
3383 // saves pointer to it) to ensure correct destruction order.
3384 fltSemantics extendedSemantics = *semantics;
3385 extendedSemantics.minExponent = APFloatBase::semIEEEdouble.minExponent;
3386 IEEEFloat extended(*this);
3387 [[maybe_unused]] opStatus fs =
3388 extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
3389 assert(fs == opOK && !losesInfo);
3390
3391 IEEEFloat u(extended);
3392 fs = u.convert(APFloatBase::semIEEEdouble, rmNearestTiesToEven, &losesInfo);
3393 assert(fs == opOK || fs == opInexact);
3394 words[0] = *u.convertDoubleAPFloatToAPInt().getRawData();
3395
3396 // If conversion was exact or resulted in a special case, we're done;
3397 // just set the second double to zero. Otherwise, re-convert back to
3398 // the extended format and compute the difference. This now should
3399 // convert exactly to double.
3400 if (u.isFiniteNonZero() && losesInfo) {
3401 fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
3402 assert(fs == opOK && !losesInfo);
3403
3404 IEEEFloat v(extended);
3405 v.subtract(u, rmNearestTiesToEven);
3406 fs = v.convert(APFloatBase::semIEEEdouble, rmNearestTiesToEven, &losesInfo);
3407 assert(fs == opOK && !losesInfo);
3408 words[1] = *v.convertDoubleAPFloatToAPInt().getRawData();
3409 } else {
3410 words[1] = 0;
3411 }
3412
3413 return APInt(128, words);
3414}
3415
3416template <const fltSemantics &S>
3417APInt IEEEFloat::convertIEEEFloatToAPInt() const {
3418 assert(semantics == &S);
3419 const int bias = (semantics == &APFloatBase::semFloat8E8M0FNU)
3420 ? -S.minExponent
3421 : -(S.minExponent - 1);
3422 constexpr unsigned int trailing_significand_bits = S.precision - 1;
3423 constexpr int integer_bit_part = trailing_significand_bits / integerPartWidth;
3424 constexpr integerPart integer_bit =
3425 integerPart{1} << (trailing_significand_bits % integerPartWidth);
3426 constexpr uint64_t significand_mask = integer_bit - 1;
3427 constexpr unsigned int exponent_bits =
3428 trailing_significand_bits ? (S.sizeInBits - 1 - trailing_significand_bits)
3429 : S.sizeInBits;
3430 static_assert(exponent_bits < 64);
3431 constexpr uint64_t exponent_mask = (uint64_t{1} << exponent_bits) - 1;
3432
3433 uint64_t myexponent;
3434 std::array<integerPart, partCountForBits(trailing_significand_bits)>
3435 mysignificand;
3436
3437 if (isFiniteNonZero()) {
3438 myexponent = exponent + bias;
3439 std::copy_n(significandParts(), mysignificand.size(),
3440 mysignificand.begin());
3441 if (myexponent == 1 &&
3442 !(significandParts()[integer_bit_part] & integer_bit))
3443 myexponent = 0; // denormal
3444 } else if (category == fcZero) {
3445 if (!S.hasZero)
3446 llvm_unreachable("semantics does not support zero!");
3447 myexponent = ::exponentZero(S) + bias;
3448 mysignificand.fill(0);
3449 } else if (category == fcInfinity) {
3450 if (S.nonFiniteBehavior == fltNonfiniteBehavior::NanOnly ||
3451 S.nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
3452 llvm_unreachable("semantics don't support inf!");
3453 myexponent = ::exponentInf(S) + bias;
3454 mysignificand.fill(0);
3455 } else {
3456 assert(category == fcNaN && "Unknown category!");
3457 if (S.nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
3458 llvm_unreachable("semantics don't support NaN!");
3459 myexponent = ::exponentNaN(S) + bias;
3460 std::copy_n(significandParts(), mysignificand.size(),
3461 mysignificand.begin());
3462 }
3463 std::array<uint64_t, (S.sizeInBits + 63) / 64> words;
3464 auto words_iter =
3465 std::copy_n(mysignificand.begin(), mysignificand.size(), words.begin());
3466 if constexpr (significand_mask != 0 || trailing_significand_bits == 0) {
3467 // Clear the integer bit.
3468 words[mysignificand.size() - 1] &= significand_mask;
3469 }
3470 std::fill(words_iter, words.end(), uint64_t{0});
3471 constexpr size_t last_word = words.size() - 1;
3472 uint64_t shifted_sign = static_cast<uint64_t>(sign & 1)
3473 << ((S.sizeInBits - 1) % 64);
3474 words[last_word] |= shifted_sign;
3475 uint64_t shifted_exponent = (myexponent & exponent_mask)
3476 << (trailing_significand_bits % 64);
3477 words[last_word] |= shifted_exponent;
3478 if constexpr (last_word == 0) {
3479 return APInt(S.sizeInBits, words[0]);
3480 }
3481 return APInt(S.sizeInBits, words);
3482}
3483
3484APInt IEEEFloat::convertQuadrupleAPFloatToAPInt() const {
3485 assert(partCount() == 2);
3486 return convertIEEEFloatToAPInt<APFloatBase::semIEEEquad>();
3487}
3488
3489APInt IEEEFloat::convertDoubleAPFloatToAPInt() const {
3490 assert(partCount()==1);
3491 return convertIEEEFloatToAPInt<APFloatBase::semIEEEdouble>();
3492}
3493
3494APInt IEEEFloat::convertFloatAPFloatToAPInt() const {
3495 assert(partCount()==1);
3496 return convertIEEEFloatToAPInt<APFloatBase::semIEEEsingle>();
3497}
3498
3499APInt IEEEFloat::convertBFloatAPFloatToAPInt() const {
3500 assert(partCount() == 1);
3501 return convertIEEEFloatToAPInt<APFloatBase::semBFloat>();
3502}
3503
3504APInt IEEEFloat::convertHalfAPFloatToAPInt() const {
3505 assert(partCount()==1);
3506 return convertIEEEFloatToAPInt<APFloatBase::APFloatBase::semIEEEhalf>();
3507}
3508
3509APInt IEEEFloat::convertFloat8E5M2APFloatToAPInt() const {
3510 assert(partCount() == 1);
3511 return convertIEEEFloatToAPInt<APFloatBase::semFloat8E5M2>();
3512}
3513
3514APInt IEEEFloat::convertFloat8E5M2FNUZAPFloatToAPInt() const {
3515 assert(partCount() == 1);
3516 return convertIEEEFloatToAPInt<APFloatBase::semFloat8E5M2FNUZ>();
3517}
3518
3519APInt IEEEFloat::convertFloat8E4M3APFloatToAPInt() const {
3520 assert(partCount() == 1);
3521 return convertIEEEFloatToAPInt<APFloatBase::semFloat8E4M3>();
3522}
3523
3524APInt IEEEFloat::convertFloat8E4M3FNAPFloatToAPInt() const {
3525 assert(partCount() == 1);
3526 return convertIEEEFloatToAPInt<APFloatBase::semFloat8E4M3FN>();
3527}
3528
3529APInt IEEEFloat::convertFloat8E4M3FNUZAPFloatToAPInt() const {
3530 assert(partCount() == 1);
3531 return convertIEEEFloatToAPInt<APFloatBase::semFloat8E4M3FNUZ>();
3532}
3533
3534APInt IEEEFloat::convertFloat8E4M3B11FNUZAPFloatToAPInt() const {
3535 assert(partCount() == 1);
3536 return convertIEEEFloatToAPInt<APFloatBase::semFloat8E4M3B11FNUZ>();
3537}
3538
3539APInt IEEEFloat::convertFloat8E3M4APFloatToAPInt() const {
3540 assert(partCount() == 1);
3541 return convertIEEEFloatToAPInt<APFloatBase::semFloat8E3M4>();
3542}
3543
3544APInt IEEEFloat::convertFloatTF32APFloatToAPInt() const {
3545 assert(partCount() == 1);
3546 return convertIEEEFloatToAPInt<APFloatBase::semFloatTF32>();
3547}
3548
3549APInt IEEEFloat::convertFloat8E8M0FNUAPFloatToAPInt() const {
3550 assert(partCount() == 1);
3551 return convertIEEEFloatToAPInt<APFloatBase::semFloat8E8M0FNU>();
3552}
3553
3554APInt IEEEFloat::convertFloat6E3M2FNAPFloatToAPInt() const {
3555 assert(partCount() == 1);
3556 return convertIEEEFloatToAPInt<APFloatBase::semFloat6E3M2FN>();
3557}
3558
3559APInt IEEEFloat::convertFloat6E2M3FNAPFloatToAPInt() const {
3560 assert(partCount() == 1);
3561 return convertIEEEFloatToAPInt<APFloatBase::semFloat6E2M3FN>();
3562}
3563
3564APInt IEEEFloat::convertFloat4E2M1FNAPFloatToAPInt() const {
3565 assert(partCount() == 1);
3566 return convertIEEEFloatToAPInt<APFloatBase::semFloat4E2M1FN>();
3567}
3568
3569// This function creates an APInt that is just a bit map of the floating
3570// point constant as it would appear in memory. It is not a conversion,
3571// and treating the result as a normal integer is unlikely to be useful.
3572
3574 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEhalf)
3575 return convertHalfAPFloatToAPInt();
3576
3577 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semBFloat)
3578 return convertBFloatAPFloatToAPInt();
3579
3580 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEsingle)
3581 return convertFloatAPFloatToAPInt();
3582
3583 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEdouble)
3584 return convertDoubleAPFloatToAPInt();
3585
3586 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEquad)
3587 return convertQuadrupleAPFloatToAPInt();
3588
3589 if (semantics ==
3590 (const llvm::fltSemantics *)&APFloatBase::semPPCDoubleDoubleLegacy)
3591 return convertPPCDoubleDoubleLegacyAPFloatToAPInt();
3592
3593 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E5M2)
3594 return convertFloat8E5M2APFloatToAPInt();
3595
3596 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E5M2FNUZ)
3597 return convertFloat8E5M2FNUZAPFloatToAPInt();
3598
3599 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E4M3)
3600 return convertFloat8E4M3APFloatToAPInt();
3601
3602 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E4M3FN)
3603 return convertFloat8E4M3FNAPFloatToAPInt();
3604
3605 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E4M3FNUZ)
3606 return convertFloat8E4M3FNUZAPFloatToAPInt();
3607
3608 if (semantics ==
3609 (const llvm::fltSemantics *)&APFloatBase::semFloat8E4M3B11FNUZ)
3610 return convertFloat8E4M3B11FNUZAPFloatToAPInt();
3611
3612 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E3M4)
3613 return convertFloat8E3M4APFloatToAPInt();
3614
3615 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloatTF32)
3616 return convertFloatTF32APFloatToAPInt();
3617
3618 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E8M0FNU)
3619 return convertFloat8E8M0FNUAPFloatToAPInt();
3620
3621 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat6E3M2FN)
3622 return convertFloat6E3M2FNAPFloatToAPInt();
3623
3624 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat6E2M3FN)
3625 return convertFloat6E2M3FNAPFloatToAPInt();
3626
3627 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat4E2M1FN)
3628 return convertFloat4E2M1FNAPFloatToAPInt();
3629
3630 assert(semantics ==
3631 (const llvm::fltSemantics *)&APFloatBase::semX87DoubleExtended &&
3632 "unknown format!");
3633 return convertF80LongDoubleAPFloatToAPInt();
3634}
3635
3637 assert(semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEsingle &&
3638 "Float semantics are not IEEEsingle");
3639 APInt api = bitcastToAPInt();
3640 return api.bitsToFloat();
3641}
3642
3644 assert(semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEdouble &&
3645 "Float semantics are not IEEEdouble");
3646 APInt api = bitcastToAPInt();
3647 return api.bitsToDouble();
3648}
3649
3650#ifdef HAS_IEE754_FLOAT128
3651float128 IEEEFloat::convertToQuad() const {
3652 assert(semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEquad &&
3653 "Float semantics are not IEEEquads");
3654 APInt api = bitcastToAPInt();
3655 return api.bitsToQuad();
3656}
3657#endif
3658
3659/// Integer bit is explicit in this format. Intel hardware (387 and later)
3660/// does not support these bit patterns:
3661/// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
3662/// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
3663/// exponent!=0 nor all 1's, integer bit 0 ("unnormal")
3664/// exponent = 0, integer bit 1 ("pseudodenormal")
3665/// At the moment, the first three are treated as NaNs, the last one as Normal.
3666void IEEEFloat::initFromF80LongDoubleAPInt(const APInt &api) {
3667 uint64_t i1 = api.getRawData()[0];
3668 uint64_t i2 = api.getRawData()[1];
3669 uint64_t myexponent = (i2 & 0x7fff);
3670 uint64_t mysignificand = i1;
3671 uint8_t myintegerbit = mysignificand >> 63;
3672
3673 initialize(&APFloatBase::semX87DoubleExtended);
3674 assert(partCount()==2);
3675
3676 sign = static_cast<unsigned int>(i2>>15);
3677 if (myexponent == 0 && mysignificand == 0) {
3678 makeZero(sign);
3679 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
3680 makeInf(sign);
3681 } else if ((myexponent == 0x7fff && mysignificand != 0x8000000000000000ULL) ||
3682 (myexponent != 0x7fff && myexponent != 0 && myintegerbit == 0)) {
3683 category = fcNaN;
3684 exponent = exponentNaN();
3685 significandParts()[0] = mysignificand;
3686 significandParts()[1] = 0;
3687 } else {
3688 category = fcNormal;
3689 exponent = myexponent - 16383;
3690 significandParts()[0] = mysignificand;
3691 significandParts()[1] = 0;
3692 if (myexponent==0) // denormal
3693 exponent = -16382;
3694 }
3695}
3696
3697void IEEEFloat::initFromPPCDoubleDoubleLegacyAPInt(const APInt &api) {
3698 uint64_t i1 = api.getRawData()[0];
3699 uint64_t i2 = api.getRawData()[1];
3700 bool losesInfo;
3701
3702 // Get the first double and convert to our format.
3703 initFromDoubleAPInt(APInt(64, i1));
3704 [[maybe_unused]] opStatus fs = convert(APFloatBase::semPPCDoubleDoubleLegacy,
3705 rmNearestTiesToEven, &losesInfo);
3706 // (convert may return opInvalidOp if i1 is an sNaN).
3707 assert((fs == opOK || fs == opInvalidOp) && !losesInfo);
3708
3709 // Unless we have a special case, add in second double.
3710 if (isFiniteNonZero()) {
3711 IEEEFloat v(APFloatBase::semIEEEdouble, APInt(64, i2));
3712 fs = v.convert(APFloatBase::semPPCDoubleDoubleLegacy, rmNearestTiesToEven,
3713 &losesInfo);
3714 assert(fs == opOK && !losesInfo);
3715
3717 }
3718}
3719
3720// The E8M0 format has the following characteristics:
3721// It is an 8-bit unsigned format with only exponents (no actual significand).
3722// No encodings for {zero, infinities or denorms}.
3723// NaN is represented by all 1's.
3724// Bias is 127.
3725void IEEEFloat::initFromFloat8E8M0FNUAPInt(const APInt &api) {
3726 const uint64_t exponent_mask = 0xff;
3727 uint64_t val = api.getRawData()[0];
3728 uint64_t myexponent = val & exponent_mask;
3729
3730 initialize(&APFloatBase::semFloat8E8M0FNU);
3731 assert(partCount() == 1);
3732
3733 // This format has unsigned representation only
3734 sign = 0;
3735
3736 // Set the significand
3737 // This format does not have any significand but the 'Pth' precision bit is
3738 // always set to 1 for consistency in APFloat's internal representation.
3739 uint64_t mysignificand = 1;
3740 significandParts()[0] = mysignificand;
3741
3742 // This format can either have a NaN or fcNormal
3743 // All 1's i.e. 255 is a NaN
3744 if (val == exponent_mask) {
3745 category = fcNaN;
3746 exponent = exponentNaN();
3747 return;
3748 }
3749 // Handle fcNormal...
3750 category = fcNormal;
3751 exponent = myexponent - 127; // 127 is bias
3752}
3753
3754template <const fltSemantics &S>
3755void IEEEFloat::initFromIEEEAPInt(const APInt &api) {
3756 assert(api.getBitWidth() == S.sizeInBits);
3757 constexpr integerPart integer_bit = integerPart{1}
3758 << ((S.precision - 1) % integerPartWidth);
3759 constexpr uint64_t significand_mask = integer_bit - 1;
3760 constexpr unsigned int trailing_significand_bits = S.precision - 1;
3761 constexpr unsigned int stored_significand_parts =
3762 partCountForBits(trailing_significand_bits);
3763 constexpr unsigned int exponent_bits =
3764 S.sizeInBits - 1 - trailing_significand_bits;
3765 static_assert(exponent_bits < 64);
3766 constexpr uint64_t exponent_mask = (uint64_t{1} << exponent_bits) - 1;
3767 constexpr int bias = -(S.minExponent - 1);
3768
3769 // Copy the bits of the significand. We need to clear out the exponent and
3770 // sign bit in the last word.
3771 std::array<integerPart, stored_significand_parts> mysignificand;
3772 std::copy_n(api.getRawData(), mysignificand.size(), mysignificand.begin());
3773 if constexpr (significand_mask != 0) {
3774 mysignificand[mysignificand.size() - 1] &= significand_mask;
3775 }
3776
3777 // We assume the last word holds the sign bit, the exponent, and potentially
3778 // some of the trailing significand field.
3779 uint64_t last_word = api.getRawData()[api.getNumWords() - 1];
3780 uint64_t myexponent =
3781 (last_word >> (trailing_significand_bits % 64)) & exponent_mask;
3782
3783 initialize(&S);
3784 assert(partCount() == mysignificand.size());
3785
3786 sign = static_cast<unsigned int>(last_word >> ((S.sizeInBits - 1) % 64));
3787
3788 bool all_zero_significand = llvm::all_of(mysignificand, equal_to(0));
3789
3790 bool is_zero = myexponent == 0 && all_zero_significand;
3791
3792 if constexpr (S.nonFiniteBehavior == fltNonfiniteBehavior::IEEE754) {
3793 if (myexponent - bias == ::exponentInf(S) && all_zero_significand) {
3794 makeInf(sign);
3795 return;
3796 }
3797 }
3798
3799 bool is_nan = false;
3800
3801 if constexpr (S.nanEncoding == fltNanEncoding::IEEE) {
3802 is_nan = myexponent - bias == ::exponentNaN(S) && !all_zero_significand;
3803 } else if constexpr (S.nanEncoding == fltNanEncoding::AllOnes) {
3804 bool all_ones_significand =
3805 std::all_of(mysignificand.begin(), mysignificand.end() - 1,
3806 [](integerPart bits) { return bits == ~integerPart{0}; }) &&
3807 (!significand_mask ||
3808 mysignificand[mysignificand.size() - 1] == significand_mask);
3809 is_nan = myexponent - bias == ::exponentNaN(S) && all_ones_significand;
3810 } else if constexpr (S.nanEncoding == fltNanEncoding::NegativeZero) {
3811 is_nan = is_zero && sign;
3812 }
3813
3814 if (is_nan) {
3815 category = fcNaN;
3816 exponent = ::exponentNaN(S);
3817 std::copy_n(mysignificand.begin(), mysignificand.size(),
3818 significandParts());
3819 return;
3820 }
3821
3822 if (is_zero) {
3823 makeZero(sign);
3824 return;
3825 }
3826
3827 category = fcNormal;
3828 exponent = myexponent - bias;
3829 std::copy_n(mysignificand.begin(), mysignificand.size(), significandParts());
3830 if (myexponent == 0) // denormal
3831 exponent = S.minExponent;
3832 else
3833 significandParts()[mysignificand.size()-1] |= integer_bit; // integer bit
3834}
3835
3836void IEEEFloat::initFromQuadrupleAPInt(const APInt &api) {
3837 initFromIEEEAPInt<APFloatBase::semIEEEquad>(api);
3838}
3839
3840void IEEEFloat::initFromDoubleAPInt(const APInt &api) {
3841 initFromIEEEAPInt<APFloatBase::semIEEEdouble>(api);
3842}
3843
3844void IEEEFloat::initFromFloatAPInt(const APInt &api) {
3845 initFromIEEEAPInt<APFloatBase::semIEEEsingle>(api);
3846}
3847
3848void IEEEFloat::initFromBFloatAPInt(const APInt &api) {
3849 initFromIEEEAPInt<APFloatBase::semBFloat>(api);
3850}
3851
3852void IEEEFloat::initFromHalfAPInt(const APInt &api) {
3853 initFromIEEEAPInt<APFloatBase::semIEEEhalf>(api);
3854}
3855
3856void IEEEFloat::initFromFloat8E5M2APInt(const APInt &api) {
3857 initFromIEEEAPInt<APFloatBase::semFloat8E5M2>(api);
3858}
3859
3860void IEEEFloat::initFromFloat8E5M2FNUZAPInt(const APInt &api) {
3861 initFromIEEEAPInt<APFloatBase::semFloat8E5M2FNUZ>(api);
3862}
3863
3864void IEEEFloat::initFromFloat8E4M3APInt(const APInt &api) {
3865 initFromIEEEAPInt<APFloatBase::semFloat8E4M3>(api);
3866}
3867
3868void IEEEFloat::initFromFloat8E4M3FNAPInt(const APInt &api) {
3869 initFromIEEEAPInt<APFloatBase::semFloat8E4M3FN>(api);
3870}
3871
3872void IEEEFloat::initFromFloat8E4M3FNUZAPInt(const APInt &api) {
3873 initFromIEEEAPInt<APFloatBase::semFloat8E4M3FNUZ>(api);
3874}
3875
3876void IEEEFloat::initFromFloat8E4M3B11FNUZAPInt(const APInt &api) {
3877 initFromIEEEAPInt<APFloatBase::semFloat8E4M3B11FNUZ>(api);
3878}
3879
3880void IEEEFloat::initFromFloat8E3M4APInt(const APInt &api) {
3881 initFromIEEEAPInt<APFloatBase::semFloat8E3M4>(api);
3882}
3883
3884void IEEEFloat::initFromFloatTF32APInt(const APInt &api) {
3885 initFromIEEEAPInt<APFloatBase::semFloatTF32>(api);
3886}
3887
3888void IEEEFloat::initFromFloat6E3M2FNAPInt(const APInt &api) {
3889 initFromIEEEAPInt<APFloatBase::semFloat6E3M2FN>(api);
3890}
3891
3892void IEEEFloat::initFromFloat6E2M3FNAPInt(const APInt &api) {
3893 initFromIEEEAPInt<APFloatBase::semFloat6E2M3FN>(api);
3894}
3895
3896void IEEEFloat::initFromFloat4E2M1FNAPInt(const APInt &api) {
3897 initFromIEEEAPInt<APFloatBase::semFloat4E2M1FN>(api);
3898}
3899
3900/// Treat api as containing the bits of a floating point number.
3901void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) {
3902 assert(api.getBitWidth() == Sem->sizeInBits);
3903 if (Sem == &APFloatBase::semIEEEhalf)
3904 return initFromHalfAPInt(api);
3905 if (Sem == &APFloatBase::semBFloat)
3906 return initFromBFloatAPInt(api);
3907 if (Sem == &APFloatBase::semIEEEsingle)
3908 return initFromFloatAPInt(api);
3909 if (Sem == &APFloatBase::semIEEEdouble)
3910 return initFromDoubleAPInt(api);
3911 if (Sem == &APFloatBase::semX87DoubleExtended)
3912 return initFromF80LongDoubleAPInt(api);
3913 if (Sem == &APFloatBase::semIEEEquad)
3914 return initFromQuadrupleAPInt(api);
3915 if (Sem == &APFloatBase::semPPCDoubleDoubleLegacy)
3916 return initFromPPCDoubleDoubleLegacyAPInt(api);
3917 if (Sem == &APFloatBase::semFloat8E5M2)
3918 return initFromFloat8E5M2APInt(api);
3919 if (Sem == &APFloatBase::semFloat8E5M2FNUZ)
3920 return initFromFloat8E5M2FNUZAPInt(api);
3921 if (Sem == &APFloatBase::semFloat8E4M3)
3922 return initFromFloat8E4M3APInt(api);
3923 if (Sem == &APFloatBase::semFloat8E4M3FN)
3924 return initFromFloat8E4M3FNAPInt(api);
3925 if (Sem == &APFloatBase::semFloat8E4M3FNUZ)
3926 return initFromFloat8E4M3FNUZAPInt(api);
3927 if (Sem == &APFloatBase::semFloat8E4M3B11FNUZ)
3928 return initFromFloat8E4M3B11FNUZAPInt(api);
3929 if (Sem == &APFloatBase::semFloat8E3M4)
3930 return initFromFloat8E3M4APInt(api);
3931 if (Sem == &APFloatBase::semFloatTF32)
3932 return initFromFloatTF32APInt(api);
3933 if (Sem == &APFloatBase::semFloat8E8M0FNU)
3934 return initFromFloat8E8M0FNUAPInt(api);
3935 if (Sem == &APFloatBase::semFloat6E3M2FN)
3936 return initFromFloat6E3M2FNAPInt(api);
3937 if (Sem == &APFloatBase::semFloat6E2M3FN)
3938 return initFromFloat6E2M3FNAPInt(api);
3939 if (Sem == &APFloatBase::semFloat4E2M1FN)
3940 return initFromFloat4E2M1FNAPInt(api);
3941
3942 llvm_unreachable("unsupported semantics");
3943}
3944
3945/// Make this number the largest magnitude normal number in the given
3946/// semantics.
3947void IEEEFloat::makeLargest(bool Negative) {
3948 if (Negative && !semantics->hasSignedRepr)
3950 "This floating point format does not support signed values");
3951 // We want (in interchange format):
3952 // sign = {Negative}
3953 // exponent = 1..10
3954 // significand = 1..1
3955 category = fcNormal;
3956 sign = Negative;
3957 exponent = semantics->maxExponent;
3958
3959 // Use memset to set all but the highest integerPart to all ones.
3960 integerPart *significand = significandParts();
3961 unsigned PartCount = partCount();
3962 memset(significand, 0xFF, sizeof(integerPart)*(PartCount - 1));
3963
3964 // Set the high integerPart especially setting all unused top bits for
3965 // internal consistency.
3966 const unsigned NumUnusedHighBits =
3967 PartCount*integerPartWidth - semantics->precision;
3968 significand[PartCount - 1] = (NumUnusedHighBits < integerPartWidth)
3969 ? (~integerPart(0) >> NumUnusedHighBits)
3970 : 0;
3971 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
3972 semantics->nanEncoding == fltNanEncoding::AllOnes &&
3973 (semantics->precision > 1))
3974 significand[0] &= ~integerPart(1);
3975}
3976
3977/// Make this number the smallest magnitude denormal number in the given
3978/// semantics.
3979void IEEEFloat::makeSmallest(bool Negative) {
3980 if (Negative && !semantics->hasSignedRepr)
3982 "This floating point format does not support signed values");
3983 // We want (in interchange format):
3984 // sign = {Negative}
3985 // exponent = 0..0
3986 // significand = 0..01
3987 category = fcNormal;
3988 sign = Negative;
3989 exponent = semantics->minExponent;
3990 APInt::tcSet(significandParts(), 1, partCount());
3991}
3992
3994 if (Negative && !semantics->hasSignedRepr)
3996 "This floating point format does not support signed values");
3997 // We want (in interchange format):
3998 // sign = {Negative}
3999 // exponent = 0..0
4000 // significand = 10..0
4001
4002 category = fcNormal;
4003 zeroSignificand();
4004 sign = Negative;
4005 exponent = semantics->minExponent;
4006 APInt::tcSetBit(significandParts(), semantics->precision - 1);
4007}
4008
4009IEEEFloat::IEEEFloat(const fltSemantics &Sem, const APInt &API) {
4010 initFromAPInt(&Sem, API);
4011}
4012
4014 initFromAPInt(&APFloatBase::semIEEEsingle, APInt::floatToBits(f));
4015}
4016
4018 initFromAPInt(&APFloatBase::semIEEEdouble, APInt::doubleToBits(d));
4019}
4020
4021namespace {
4022 void append(SmallVectorImpl<char> &Buffer, StringRef Str) {
4023 Buffer.append(Str.begin(), Str.end());
4024 }
4025
4026 /// Removes data from the given significand until it is no more
4027 /// precise than is required for the desired precision.
4028 void AdjustToPrecision(APInt &significand,
4029 int &exp, unsigned FormatPrecision) {
4030 unsigned bits = significand.getActiveBits();
4031
4032 // 196/59 is a very slight overestimate of lg_2(10).
4033 unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59;
4034
4035 if (bits <= bitsRequired) return;
4036
4037 unsigned tensRemovable = (bits - bitsRequired) * 59 / 196;
4038 if (!tensRemovable) return;
4039
4040 exp += tensRemovable;
4041
4042 APInt divisor(significand.getBitWidth(), 1);
4043 APInt powten(significand.getBitWidth(), 10);
4044 while (true) {
4045 if (tensRemovable & 1)
4046 divisor *= powten;
4047 tensRemovable >>= 1;
4048 if (!tensRemovable) break;
4049 powten *= powten;
4050 }
4051
4052 significand = significand.udiv(divisor);
4053
4054 // Truncate the significand down to its active bit count.
4055 significand = significand.trunc(significand.getActiveBits());
4056 }
4057
4058
4059 void AdjustToPrecision(SmallVectorImpl<char> &buffer,
4060 int &exp, unsigned FormatPrecision) {
4061 unsigned N = buffer.size();
4062 if (N <= FormatPrecision) return;
4063
4064 // The most significant figures are the last ones in the buffer.
4065 unsigned FirstSignificant = N - FormatPrecision;
4066
4067 // Round.
4068 // FIXME: this probably shouldn't use 'round half up'.
4069
4070 // Rounding down is just a truncation, except we also want to drop
4071 // trailing zeros from the new result.
4072 if (buffer[FirstSignificant - 1] < '5') {
4073 while (FirstSignificant < N && buffer[FirstSignificant] == '0')
4074 FirstSignificant++;
4075
4076 exp += FirstSignificant;
4077 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
4078 return;
4079 }
4080
4081 // Rounding up requires a decimal add-with-carry. If we continue
4082 // the carry, the newly-introduced zeros will just be truncated.
4083 for (unsigned I = FirstSignificant; I != N; ++I) {
4084 if (buffer[I] == '9') {
4085 FirstSignificant++;
4086 } else {
4087 buffer[I]++;
4088 break;
4089 }
4090 }
4091
4092 // If we carried through, we have exactly one digit of precision.
4093 if (FirstSignificant == N) {
4094 exp += FirstSignificant;
4095 buffer.clear();
4096 buffer.push_back('1');
4097 return;
4098 }
4099
4100 exp += FirstSignificant;
4101 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
4102 }
4103
4104 void toStringImpl(SmallVectorImpl<char> &Str, const bool isNeg, int exp,
4105 APInt significand, unsigned FormatPrecision,
4106 unsigned FormatMaxPadding, bool TruncateZero) {
4107 const int semanticsPrecision = significand.getBitWidth();
4108
4109 if (isNeg)
4110 Str.push_back('-');
4111
4112 // Set FormatPrecision if zero. We want to do this before we
4113 // truncate trailing zeros, as those are part of the precision.
4114 if (!FormatPrecision) {
4115 // We use enough digits so the number can be round-tripped back to an
4116 // APFloat. The formula comes from "How to Print Floating-Point Numbers
4117 // Accurately" by Steele and White.
4118 // FIXME: Using a formula based purely on the precision is conservative;
4119 // we can print fewer digits depending on the actual value being printed.
4120
4121 // FormatPrecision = 2 + floor(significandBits / lg_2(10))
4122 FormatPrecision = 2 + semanticsPrecision * 59 / 196;
4123 }
4124
4125 // Ignore trailing binary zeros.
4126 int trailingZeros = significand.countr_zero();
4127 exp += trailingZeros;
4128 significand.lshrInPlace(trailingZeros);
4129
4130 // Change the exponent from 2^e to 10^e.
4131 if (exp == 0) {
4132 // Nothing to do.
4133 } else if (exp > 0) {
4134 // Just shift left.
4135 significand = significand.zext(semanticsPrecision + exp);
4136 significand <<= exp;
4137 exp = 0;
4138 } else { /* exp < 0 */
4139 int texp = -exp;
4140
4141 // We transform this using the identity:
4142 // (N)(2^-e) == (N)(5^e)(10^-e)
4143 // This means we have to multiply N (the significand) by 5^e.
4144 // To avoid overflow, we have to operate on numbers large
4145 // enough to store N * 5^e:
4146 // log2(N * 5^e) == log2(N) + e * log2(5)
4147 // <= semantics->precision + e * 137 / 59
4148 // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59)
4149
4150 unsigned precision = semanticsPrecision + (137 * texp + 136) / 59;
4151
4152 // Multiply significand by 5^e.
4153 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
4154 significand = significand.zext(precision);
4155 APInt five_to_the_i(precision, 5);
4156 while (true) {
4157 if (texp & 1)
4158 significand *= five_to_the_i;
4159
4160 texp >>= 1;
4161 if (!texp)
4162 break;
4163 five_to_the_i *= five_to_the_i;
4164 }
4165 }
4166
4167 AdjustToPrecision(significand, exp, FormatPrecision);
4168
4170
4171 // Fill the buffer.
4172 unsigned precision = significand.getBitWidth();
4173 if (precision < 4) {
4174 // We need enough precision to store the value 10.
4175 precision = 4;
4176 significand = significand.zext(precision);
4177 }
4178 APInt ten(precision, 10);
4179 APInt digit(precision, 0);
4180
4181 bool inTrail = true;
4182 while (significand != 0) {
4183 // digit <- significand % 10
4184 // significand <- significand / 10
4185 APInt::udivrem(significand, ten, significand, digit);
4186
4187 unsigned d = digit.getZExtValue();
4188
4189 // Drop trailing zeros.
4190 if (inTrail && !d)
4191 exp++;
4192 else {
4193 buffer.push_back((char) ('0' + d));
4194 inTrail = false;
4195 }
4196 }
4197
4198 assert(!buffer.empty() && "no characters in buffer!");
4199
4200 // Drop down to FormatPrecision.
4201 // TODO: don't do more precise calculations above than are required.
4202 AdjustToPrecision(buffer, exp, FormatPrecision);
4203
4204 unsigned NDigits = buffer.size();
4205
4206 // Check whether we should use scientific notation.
4207 bool FormatScientific;
4208 if (!FormatMaxPadding) {
4209 FormatScientific = true;
4210 } else {
4211 if (exp >= 0) {
4212 // 765e3 --> 765000
4213 // ^^^
4214 // But we shouldn't make the number look more precise than it is.
4215 FormatScientific = ((unsigned) exp > FormatMaxPadding ||
4216 NDigits + (unsigned) exp > FormatPrecision);
4217 } else {
4218 // Power of the most significant digit.
4219 int MSD = exp + (int) (NDigits - 1);
4220 if (MSD >= 0) {
4221 // 765e-2 == 7.65
4222 FormatScientific = false;
4223 } else {
4224 // 765e-5 == 0.00765
4225 // ^ ^^
4226 FormatScientific = ((unsigned) -MSD) > FormatMaxPadding;
4227 }
4228 }
4229 }
4230
4231 // Scientific formatting is pretty straightforward.
4232 if (FormatScientific) {
4233 exp += (NDigits - 1);
4234
4235 Str.push_back(buffer[NDigits-1]);
4236 Str.push_back('.');
4237 if (NDigits == 1 && TruncateZero)
4238 Str.push_back('0');
4239 else
4240 for (unsigned I = 1; I != NDigits; ++I)
4241 Str.push_back(buffer[NDigits-1-I]);
4242 // Fill with zeros up to FormatPrecision.
4243 if (!TruncateZero && FormatPrecision > NDigits - 1)
4244 Str.append(FormatPrecision - NDigits + 1, '0');
4245 // For !TruncateZero we use lower 'e'.
4246 Str.push_back(TruncateZero ? 'E' : 'e');
4247
4248 Str.push_back(exp >= 0 ? '+' : '-');
4249 if (exp < 0)
4250 exp = -exp;
4251 SmallVector<char, 6> expbuf;
4252 do {
4253 expbuf.push_back((char) ('0' + (exp % 10)));
4254 exp /= 10;
4255 } while (exp);
4256 // Exponent always at least two digits if we do not truncate zeros.
4257 if (!TruncateZero && expbuf.size() < 2)
4258 expbuf.push_back('0');
4259 for (unsigned I = 0, E = expbuf.size(); I != E; ++I)
4260 Str.push_back(expbuf[E-1-I]);
4261 return;
4262 }
4263
4264 // Non-scientific, positive exponents.
4265 if (exp >= 0) {
4266 for (unsigned I = 0; I != NDigits; ++I)
4267 Str.push_back(buffer[NDigits-1-I]);
4268 for (unsigned I = 0; I != (unsigned) exp; ++I)
4269 Str.push_back('0');
4270 return;
4271 }
4272
4273 // Non-scientific, negative exponents.
4274
4275 // The number of digits to the left of the decimal point.
4276 int NWholeDigits = exp + (int) NDigits;
4277
4278 unsigned I = 0;
4279 if (NWholeDigits > 0) {
4280 for (; I != (unsigned) NWholeDigits; ++I)
4281 Str.push_back(buffer[NDigits-I-1]);
4282 Str.push_back('.');
4283 } else {
4284 unsigned NZeros = 1 + (unsigned) -NWholeDigits;
4285
4286 Str.push_back('0');
4287 Str.push_back('.');
4288 for (unsigned Z = 1; Z != NZeros; ++Z)
4289 Str.push_back('0');
4290 }
4291
4292 for (; I != NDigits; ++I)
4293 Str.push_back(buffer[NDigits-I-1]);
4294
4295 }
4296} // namespace
4297
4298void IEEEFloat::toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision,
4299 unsigned FormatMaxPadding, bool TruncateZero) const {
4300 switch (category) {
4301 case fcInfinity:
4302 if (isNegative())
4303 return append(Str, "-Inf");
4304 else
4305 return append(Str, "+Inf");
4306
4307 case fcNaN: return append(Str, "NaN");
4308
4309 case fcZero:
4310 if (isNegative())
4311 Str.push_back('-');
4312
4313 if (!FormatMaxPadding) {
4314 if (TruncateZero)
4315 append(Str, "0.0E+0");
4316 else {
4317 append(Str, "0.0");
4318 if (FormatPrecision > 1)
4319 Str.append(FormatPrecision - 1, '0');
4320 append(Str, "e+00");
4321 }
4322 } else {
4323 Str.push_back('0');
4324 }
4325 return;
4326
4327 case fcNormal:
4328 break;
4329 }
4330
4331 // Decompose the number into an APInt and an exponent.
4332 int exp = exponent - ((int) semantics->precision - 1);
4333 APInt significand(
4334 semantics->precision,
4335 ArrayRef(significandParts(), partCountForBits(semantics->precision)));
4336
4337 toStringImpl(Str, isNegative(), exp, significand, FormatPrecision,
4338 FormatMaxPadding, TruncateZero);
4339
4340}
4341
4343 if (!isFinite() || isZero())
4344 return INT_MIN;
4345
4346 const integerPart *Parts = significandParts();
4347 const int PartCount = partCountForBits(semantics->precision);
4348
4349 int PopCount = 0;
4350 for (int i = 0; i < PartCount; ++i) {
4351 PopCount += llvm::popcount(Parts[i]);
4352 if (PopCount > 1)
4353 return INT_MIN;
4354 }
4355
4356 if (exponent != semantics->minExponent)
4357 return exponent;
4358
4359 int CountrParts = 0;
4360 for (int i = 0; i < PartCount;
4361 ++i, CountrParts += APInt::APINT_BITS_PER_WORD) {
4362 if (Parts[i] != 0) {
4363 return exponent - semantics->precision + CountrParts +
4364 llvm::countr_zero(Parts[i]) + 1;
4365 }
4366 }
4367
4368 llvm_unreachable("didn't find the set bit");
4369}
4370
4372 if (!isNaN())
4373 return false;
4374 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly ||
4375 semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
4376 return false;
4377
4378 // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the
4379 // first bit of the trailing significand being 0.
4380 return !APInt::tcExtractBit(significandParts(), semantics->precision - 2);
4381}
4382
4383/// IEEE-754R 2008 5.3.1: nextUp/nextDown.
4384///
4385/// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with
4386/// appropriate sign switching before/after the computation.
4388 // If we are performing nextDown, swap sign so we have -x.
4389 if (nextDown)
4390 changeSign();
4391
4392 // Compute nextUp(x)
4393 opStatus result = opOK;
4394
4395 // Handle each float category separately.
4396 switch (category) {
4397 case fcInfinity:
4398 // nextUp(+inf) = +inf
4399 if (!isNegative())
4400 break;
4401 // nextUp(-inf) = -getLargest()
4402 makeLargest(true);
4403 break;
4404 case fcNaN:
4405 // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag.
4406 // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not
4407 // change the payload.
4408 if (isSignaling()) {
4409 result = opInvalidOp;
4410 // For consistency, propagate the sign of the sNaN to the qNaN.
4411 makeNaN(false, isNegative(), nullptr);
4412 }
4413 break;
4414 case fcZero:
4415 // nextUp(pm 0) = +getSmallest()
4416 makeSmallest(false);
4417 break;
4418 case fcNormal:
4419 // nextUp(-getSmallest()) = -0
4420 if (isSmallest() && isNegative()) {
4421 APInt::tcSet(significandParts(), 0, partCount());
4422 category = fcZero;
4423 exponent = 0;
4424 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
4425 sign = false;
4426 if (!semantics->hasZero)
4428 break;
4429 }
4430
4431 if (isLargest() && !isNegative()) {
4432 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
4433 // nextUp(getLargest()) == NAN
4434 makeNaN();
4435 break;
4436 } else if (semantics->nonFiniteBehavior ==
4438 // nextUp(getLargest()) == getLargest()
4439 break;
4440 } else {
4441 // nextUp(getLargest()) == INFINITY
4442 APInt::tcSet(significandParts(), 0, partCount());
4443 category = fcInfinity;
4444 exponent = semantics->maxExponent + 1;
4445 break;
4446 }
4447 }
4448
4449 // nextUp(normal) == normal + inc.
4450 if (isNegative()) {
4451 // If we are negative, we need to decrement the significand.
4452
4453 // We only cross a binade boundary that requires adjusting the exponent
4454 // if:
4455 // 1. exponent != semantics->minExponent. This implies we are not in the
4456 // smallest binade or are dealing with denormals.
4457 // 2. Our significand excluding the integral bit is all zeros.
4458 bool WillCrossBinadeBoundary =
4459 exponent != semantics->minExponent && isSignificandAllZeros();
4460
4461 // Decrement the significand.
4462 //
4463 // We always do this since:
4464 // 1. If we are dealing with a non-binade decrement, by definition we
4465 // just decrement the significand.
4466 // 2. If we are dealing with a normal -> normal binade decrement, since
4467 // we have an explicit integral bit the fact that all bits but the
4468 // integral bit are zero implies that subtracting one will yield a
4469 // significand with 0 integral bit and 1 in all other spots. Thus we
4470 // must just adjust the exponent and set the integral bit to 1.
4471 // 3. If we are dealing with a normal -> denormal binade decrement,
4472 // since we set the integral bit to 0 when we represent denormals, we
4473 // just decrement the significand.
4474 integerPart *Parts = significandParts();
4475 APInt::tcDecrement(Parts, partCount());
4476
4477 if (WillCrossBinadeBoundary) {
4478 // Our result is a normal number. Do the following:
4479 // 1. Set the integral bit to 1.
4480 // 2. Decrement the exponent.
4481 APInt::tcSetBit(Parts, semantics->precision - 1);
4482 exponent--;
4483 }
4484 } else {
4485 // If we are positive, we need to increment the significand.
4486
4487 // We only cross a binade boundary that requires adjusting the exponent if
4488 // the input is not a denormal and all of said input's significand bits
4489 // are set. If all of said conditions are true: clear the significand, set
4490 // the integral bit to 1, and increment the exponent. If we have a
4491 // denormal always increment since moving denormals and the numbers in the
4492 // smallest normal binade have the same exponent in our representation.
4493 // If there are only exponents, any increment always crosses the
4494 // BinadeBoundary.
4495 bool WillCrossBinadeBoundary = !APFloat::hasSignificand(*semantics) ||
4496 (!isDenormal() && isSignificandAllOnes());
4497
4498 if (WillCrossBinadeBoundary) {
4499 integerPart *Parts = significandParts();
4500 APInt::tcSet(Parts, 0, partCount());
4501 APInt::tcSetBit(Parts, semantics->precision - 1);
4502 assert(exponent != semantics->maxExponent &&
4503 "We can not increment an exponent beyond the maxExponent allowed"
4504 " by the given floating point semantics.");
4505 exponent++;
4506 } else {
4507 incrementSignificand();
4508 }
4509 }
4510 break;
4511 }
4512
4513 // If we are performing nextDown, swap sign so we have -nextUp(-x)
4514 if (nextDown)
4515 changeSign();
4516
4517 return result;
4518}
4519
4521 assert(isNaN() && "Can only be called on NaN values");
4522 // Number of bits in the payload, excluding the (maybe implied) integer bit.
4523 unsigned Bits = semantics->precision - 1;
4524 return APInt(Bits, ArrayRef(significandParts(), partCountForBits(Bits)));
4525}
4526
4527APFloatBase::ExponentType IEEEFloat::exponentNaN() const {
4528 return ::exponentNaN(*semantics);
4529}
4530
4531APFloatBase::ExponentType IEEEFloat::exponentInf() const {
4532 return ::exponentInf(*semantics);
4533}
4534
4535APFloatBase::ExponentType IEEEFloat::exponentZero() const {
4536 return ::exponentZero(*semantics);
4537}
4538
4539void IEEEFloat::makeInf(bool Negative) {
4540 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
4541 llvm_unreachable("This floating point format does not support Inf");
4542
4543 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
4544 // There is no Inf, so make NaN instead.
4545 makeNaN(false, Negative);
4546 return;
4547 }
4548 category = fcInfinity;
4549 sign = Negative;
4550 exponent = exponentInf();
4551 APInt::tcSet(significandParts(), 0, partCount());
4552}
4553
4554void IEEEFloat::makeZero(bool Negative) {
4555 if (!semantics->hasZero)
4556 llvm_unreachable("This floating point format does not support Zero");
4557
4558 category = fcZero;
4559 sign = Negative;
4560 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
4561 // Merge negative zero to positive because 0b10000...000 is used for NaN
4562 sign = false;
4563 }
4564 exponent = exponentZero();
4565 APInt::tcSet(significandParts(), 0, partCount());
4566}
4567
4569 assert(isNaN());
4570 if (semantics->nonFiniteBehavior != fltNonfiniteBehavior::NanOnly)
4571 APInt::tcSetBit(significandParts(), semantics->precision - 2);
4572}
4573
4574int ilogb(const IEEEFloat &Arg) {
4575 if (Arg.isNaN())
4576 return APFloat::IEK_NaN;
4577 if (Arg.isZero())
4578 return APFloat::IEK_Zero;
4579 if (Arg.isInfinity())
4580 return APFloat::IEK_Inf;
4581 if (!Arg.isDenormal())
4582 return Arg.exponent;
4583
4584 IEEEFloat Normalized(Arg);
4585 int SignificandBits = Arg.getSemantics().precision - 1;
4586
4587 Normalized.exponent += SignificandBits;
4588 Normalized.normalize(APFloat::rmNearestTiesToEven, lfExactlyZero);
4589 return Normalized.exponent - SignificandBits;
4590}
4591
4593 auto MaxExp = X.getSemantics().maxExponent;
4594 auto MinExp = X.getSemantics().minExponent;
4595
4596 // If Exp is wildly out-of-scale, simply adding it to X.exponent will
4597 // overflow; clamp it to a safe range before adding, but ensure that the range
4598 // is large enough that the clamp does not change the result. The range we
4599 // need to support is the difference between the largest possible exponent and
4600 // the normalized exponent of half the smallest denormal.
4601
4602 int SignificandBits = X.getSemantics().precision - 1;
4603 int MaxIncrement = MaxExp - (MinExp - SignificandBits) + 1;
4604
4605 // Clamp to one past the range ends to let normalize handle overlflow.
4606 X.exponent += std::clamp(Exp, -MaxIncrement - 1, MaxIncrement);
4607 X.normalize(RoundingMode, lfExactlyZero);
4608 if (X.isNaN())
4609 X.makeQuiet();
4610 return X;
4611}
4612
4613IEEEFloat frexp(const IEEEFloat &Val, int &Exp, roundingMode RM) {
4614 Exp = ilogb(Val);
4615
4616 // Quiet signalling nans.
4617 if (Exp == APFloat::IEK_NaN) {
4618 IEEEFloat Quiet(Val);
4619 Quiet.makeQuiet();
4620 return Quiet;
4621 }
4622
4623 if (Exp == APFloat::IEK_Inf)
4624 return Val;
4625
4626 // 1 is added because frexp is defined to return a normalized fraction in
4627 // +/-[0.5, 1.0), rather than the usual +/-[1.0, 2.0).
4628 Exp = Exp == APFloat::IEK_Zero ? 0 : Exp + 1;
4629 return scalbn(Val, -Exp, RM);
4630}
4631
4633 : Semantics(&S),
4634 Floats(new APFloat[2]{APFloat(APFloatBase::semIEEEdouble),
4635 APFloat(APFloatBase::semIEEEdouble)}) {
4636 assert(Semantics == &APFloatBase::semPPCDoubleDouble);
4637}
4638
4640 : Semantics(&S), Floats(new APFloat[2]{
4641 APFloat(APFloatBase::semIEEEdouble, uninitialized),
4642 APFloat(APFloatBase::semIEEEdouble, uninitialized)}) {
4643 assert(Semantics == &APFloatBase::semPPCDoubleDouble);
4644}
4645
4647 : Semantics(&S),
4648 Floats(new APFloat[2]{APFloat(APFloatBase::semIEEEdouble, I),
4649 APFloat(APFloatBase::semIEEEdouble)}) {
4650 assert(Semantics == &APFloatBase::semPPCDoubleDouble);
4651}
4652
4654 : Semantics(&S),
4655 Floats(new APFloat[2]{
4656 APFloat(APFloatBase::semIEEEdouble, APInt(64, I.getRawData()[0])),
4657 APFloat(APFloatBase::semIEEEdouble, APInt(64, I.getRawData()[1]))}) {
4658 assert(Semantics == &APFloatBase::semPPCDoubleDouble);
4659}
4660
4662 APFloat &&Second)
4663 : Semantics(&S),
4664 Floats(new APFloat[2]{std::move(First), std::move(Second)}) {
4665 assert(Semantics == &APFloatBase::semPPCDoubleDouble);
4666 assert(&Floats[0].getSemantics() == &APFloatBase::semIEEEdouble);
4667 assert(&Floats[1].getSemantics() == &APFloatBase::semIEEEdouble);
4668}
4669
4671 : Semantics(RHS.Semantics),
4672 Floats(RHS.Floats ? new APFloat[2]{APFloat(RHS.Floats[0]),
4673 APFloat(RHS.Floats[1])}
4674 : nullptr) {
4675 assert(Semantics == &APFloatBase::semPPCDoubleDouble);
4676}
4677
4679 : Semantics(RHS.Semantics), Floats(RHS.Floats) {
4680 RHS.Semantics = &APFloatBase::semBogus;
4681 RHS.Floats = nullptr;
4682 assert(Semantics == &APFloatBase::semPPCDoubleDouble);
4683}
4684
4686 if (Semantics == RHS.Semantics && RHS.Floats) {
4687 Floats[0] = RHS.Floats[0];
4688 Floats[1] = RHS.Floats[1];
4689 } else if (this != &RHS) {
4690 this->~DoubleAPFloat();
4691 new (this) DoubleAPFloat(RHS);
4692 }
4693 return *this;
4694}
4695
4696// Returns a result such that:
4697// 1. abs(Lo) <= ulp(Hi)/2
4698// 2. Hi == RTNE(Hi + Lo)
4699// 3. Hi + Lo == X + Y
4700//
4701// Requires that log2(X) >= log2(Y).
4702static std::pair<APFloat, APFloat> fastTwoSum(APFloat X, APFloat Y) {
4703 if (!X.isFinite())
4704 return {X, APFloat::getZero(X.getSemantics(), /*Negative=*/false)};
4705 APFloat Hi = X + Y;
4706 APFloat Delta = Hi - X;
4707 APFloat Lo = Y - Delta;
4708 return {Hi, Lo};
4709}
4710
4711// Implement addition, subtraction, multiplication and division based on:
4712// "Software for Doubled-Precision Floating-Point Computations",
4713// by Seppo Linnainmaa, ACM TOMS vol 7 no 3, September 1981, pages 272-283.
4714APFloat::opStatus DoubleAPFloat::addImpl(const APFloat &a, const APFloat &aa,
4715 const APFloat &c, const APFloat &cc,
4716 roundingMode RM) {
4717 int Status = opOK;
4718 APFloat z = a;
4719 Status |= z.add(c, RM);
4720 if (!z.isFinite()) {
4721 if (!z.isInfinity()) {
4722 Floats[0] = std::move(z);
4723 Floats[1].makeZero(/* Neg = */ false);
4724 return (opStatus)Status;
4725 }
4726 Status = opOK;
4727 auto AComparedToC = a.compareAbsoluteValue(c);
4728 z = cc;
4729 Status |= z.add(aa, RM);
4730 if (AComparedToC == APFloat::cmpGreaterThan) {
4731 // z = cc + aa + c + a;
4732 Status |= z.add(c, RM);
4733 Status |= z.add(a, RM);
4734 } else {
4735 // z = cc + aa + a + c;
4736 Status |= z.add(a, RM);
4737 Status |= z.add(c, RM);
4738 }
4739 if (!z.isFinite()) {
4740 Floats[0] = std::move(z);
4741 Floats[1].makeZero(/* Neg = */ false);
4742 return (opStatus)Status;
4743 }
4744 Floats[0] = z;
4745 APFloat zz = aa;
4746 Status |= zz.add(cc, RM);
4747 if (AComparedToC == APFloat::cmpGreaterThan) {
4748 // Floats[1] = a - z + c + zz;
4749 Floats[1] = a;
4750 Status |= Floats[1].subtract(z, RM);
4751 Status |= Floats[1].add(c, RM);
4752 Status |= Floats[1].add(zz, RM);
4753 } else {
4754 // Floats[1] = c - z + a + zz;
4755 Floats[1] = c;
4756 Status |= Floats[1].subtract(z, RM);
4757 Status |= Floats[1].add(a, RM);
4758 Status |= Floats[1].add(zz, RM);
4759 }
4760 } else {
4761 // q = a - z;
4762 APFloat q = a;
4763 Status |= q.subtract(z, RM);
4764
4765 // zz = q + c + (a - (q + z)) + aa + cc;
4766 // Compute a - (q + z) as -((q + z) - a) to avoid temporary copies.
4767 auto zz = q;
4768 Status |= zz.add(c, RM);
4769 Status |= q.add(z, RM);
4770 Status |= q.subtract(a, RM);
4771 q.changeSign();
4772 Status |= zz.add(q, RM);
4773 Status |= zz.add(aa, RM);
4774 Status |= zz.add(cc, RM);
4775 if (zz.isZero() && !zz.isNegative()) {
4776 Floats[0] = std::move(z);
4777 Floats[1].makeZero(/* Neg = */ false);
4778 return opOK;
4779 }
4780 Floats[0] = z;
4781 Status |= Floats[0].add(zz, RM);
4782 if (!Floats[0].isFinite()) {
4783 Floats[1].makeZero(/* Neg = */ false);
4784 return (opStatus)Status;
4785 }
4786 Floats[1] = std::move(z);
4787 Status |= Floats[1].subtract(Floats[0], RM);
4788 Status |= Floats[1].add(zz, RM);
4789 }
4790 return (opStatus)Status;
4791}
4792
4793APFloat::opStatus DoubleAPFloat::addWithSpecial(const DoubleAPFloat &LHS,
4794 const DoubleAPFloat &RHS,
4795 DoubleAPFloat &Out,
4796 roundingMode RM) {
4797 if (LHS.getCategory() == fcNaN) {
4798 Out = LHS;
4799 return opOK;
4800 }
4801 if (RHS.getCategory() == fcNaN) {
4802 Out = RHS;
4803 return opOK;
4804 }
4805 if (LHS.getCategory() == fcZero) {
4806 Out = RHS;
4807 return opOK;
4808 }
4809 if (RHS.getCategory() == fcZero) {
4810 Out = LHS;
4811 return opOK;
4812 }
4813 if (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcInfinity &&
4814 LHS.isNegative() != RHS.isNegative()) {
4815 Out.makeNaN(false, Out.isNegative(), nullptr);
4816 return opInvalidOp;
4817 }
4818 if (LHS.getCategory() == fcInfinity) {
4819 Out = LHS;
4820 return opOK;
4821 }
4822 if (RHS.getCategory() == fcInfinity) {
4823 Out = RHS;
4824 return opOK;
4825 }
4826 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal);
4827
4828 APFloat A(LHS.Floats[0]), AA(LHS.Floats[1]), C(RHS.Floats[0]),
4829 CC(RHS.Floats[1]);
4830 assert(&A.getSemantics() == &APFloatBase::semIEEEdouble);
4831 assert(&AA.getSemantics() == &APFloatBase::semIEEEdouble);
4832 assert(&C.getSemantics() == &APFloatBase::semIEEEdouble);
4833 assert(&CC.getSemantics() == &APFloatBase::semIEEEdouble);
4834 assert(&Out.Floats[0].getSemantics() == &APFloatBase::semIEEEdouble);
4835 assert(&Out.Floats[1].getSemantics() == &APFloatBase::semIEEEdouble);
4836 return Out.addImpl(A, AA, C, CC, RM);
4837}
4838
4840 roundingMode RM) {
4841 return addWithSpecial(*this, RHS, *this, RM);
4842}
4843
4845 roundingMode RM) {
4846 changeSign();
4847 auto Ret = add(RHS, RM);
4848 changeSign();
4849 return Ret;
4850}
4851
4854 const auto &LHS = *this;
4855 auto &Out = *this;
4856 /* Interesting observation: For special categories, finding the lowest
4857 common ancestor of the following layered graph gives the correct
4858 return category:
4859
4860 NaN
4861 / \
4862 Zero Inf
4863 \ /
4864 Normal
4865
4866 e.g. NaN * NaN = NaN
4867 Zero * Inf = NaN
4868 Normal * Zero = Zero
4869 Normal * Inf = Inf
4870 */
4871 if (LHS.getCategory() == fcNaN) {
4872 Out = LHS;
4873 return opOK;
4874 }
4875 if (RHS.getCategory() == fcNaN) {
4876 Out = RHS;
4877 return opOK;
4878 }
4879 if ((LHS.getCategory() == fcZero && RHS.getCategory() == fcInfinity) ||
4880 (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcZero)) {
4881 Out.makeNaN(false, false, nullptr);
4882 return opOK;
4883 }
4884 if (LHS.getCategory() == fcZero || LHS.getCategory() == fcInfinity) {
4885 Out = LHS;
4886 return opOK;
4887 }
4888 if (RHS.getCategory() == fcZero || RHS.getCategory() == fcInfinity) {
4889 Out = RHS;
4890 return opOK;
4891 }
4892 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal &&
4893 "Special cases not handled exhaustively");
4894
4895 int Status = opOK;
4896 APFloat A = Floats[0], B = Floats[1], C = RHS.Floats[0], D = RHS.Floats[1];
4897 // t = a * c
4898 APFloat T = A;
4899 Status |= T.multiply(C, RM);
4900 if (!T.isFiniteNonZero()) {
4901 Floats[0] = std::move(T);
4902 Floats[1].makeZero(/* Neg = */ false);
4903 return (opStatus)Status;
4904 }
4905
4906 // tau = fmsub(a, c, t), that is -fmadd(-a, c, t).
4907 APFloat Tau = A;
4908 T.changeSign();
4909 Status |= Tau.fusedMultiplyAdd(C, T, RM);
4910 T.changeSign();
4911 {
4912 // v = a * d
4913 APFloat V = A;
4914 Status |= V.multiply(D, RM);
4915 // w = b * c
4916 APFloat W = B;
4917 Status |= W.multiply(C, RM);
4918 Status |= V.add(W, RM);
4919 // tau += v + w
4920 Status |= Tau.add(V, RM);
4921 }
4922 // u = t + tau
4923 APFloat U = T;
4924 Status |= U.add(Tau, RM);
4925
4926 Floats[0] = U;
4927 if (!U.isFinite()) {
4928 Floats[1].makeZero(/* Neg = */ false);
4929 } else {
4930 // Floats[1] = (t - u) + tau
4931 Status |= T.subtract(U, RM);
4932 Status |= T.add(Tau, RM);
4933 Floats[1] = std::move(T);
4934 }
4935 return (opStatus)Status;
4936}
4937
4940 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
4941 "Unexpected Semantics");
4942 APFloat Tmp(APFloatBase::semPPCDoubleDoubleLegacy, bitcastToAPInt());
4943 auto Ret = Tmp.divide(
4944 APFloat(APFloatBase::semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()), RM);
4945 *this = DoubleAPFloat(APFloatBase::semPPCDoubleDouble, Tmp.bitcastToAPInt());
4946 return Ret;
4947}
4948
4950 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
4951 "Unexpected Semantics");
4952 APFloat Tmp(APFloatBase::semPPCDoubleDoubleLegacy, bitcastToAPInt());
4953 auto Ret = Tmp.remainder(
4954 APFloat(APFloatBase::semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
4955 *this = DoubleAPFloat(APFloatBase::semPPCDoubleDouble, Tmp.bitcastToAPInt());
4956 return Ret;
4957}
4958
4960 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
4961 "Unexpected Semantics");
4962 APFloat Tmp(APFloatBase::semPPCDoubleDoubleLegacy, bitcastToAPInt());
4963 auto Ret = Tmp.mod(
4964 APFloat(APFloatBase::semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
4965 *this = DoubleAPFloat(APFloatBase::semPPCDoubleDouble, Tmp.bitcastToAPInt());
4966 return Ret;
4967}
4968
4971 const DoubleAPFloat &Addend,
4973 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
4974 "Unexpected Semantics");
4975 APFloat Tmp(APFloatBase::semPPCDoubleDoubleLegacy, bitcastToAPInt());
4976 auto Ret = Tmp.fusedMultiplyAdd(
4977 APFloat(APFloatBase::semPPCDoubleDoubleLegacy,
4978 Multiplicand.bitcastToAPInt()),
4979 APFloat(APFloatBase::semPPCDoubleDoubleLegacy, Addend.bitcastToAPInt()),
4980 RM);
4981 *this = DoubleAPFloat(APFloatBase::semPPCDoubleDouble, Tmp.bitcastToAPInt());
4982 return Ret;
4983}
4984
4986 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
4987 "Unexpected Semantics");
4988 const APFloat &Hi = getFirst();
4989 const APFloat &Lo = getSecond();
4990
4991 APFloat RoundedHi = Hi;
4992 const opStatus HiStatus = RoundedHi.roundToIntegral(RM);
4993
4994 // We can reduce the problem to just the high part if the input:
4995 // 1. Represents a non-finite value.
4996 // 2. Has a component which is zero.
4997 if (!Hi.isFiniteNonZero() || Lo.isZero()) {
4998 Floats[0] = std::move(RoundedHi);
4999 Floats[1].makeZero(/*Neg=*/false);
5000 return HiStatus;
5001 }
5002
5003 // Adjust `Rounded` in the direction of `TieBreaker` if `ToRound` was at a
5004 // halfway point.
5005 auto RoundToNearestHelper = [](APFloat ToRound, APFloat Rounded,
5006 APFloat TieBreaker) {
5007 // RoundingError tells us which direction we rounded:
5008 // - RoundingError > 0: we rounded up.
5009 // - RoundingError < 0: we rounded down.
5010 // Sterbenz' lemma ensures that RoundingError is exact.
5011 const APFloat RoundingError = Rounded - ToRound;
5012 if (TieBreaker.isNonZero() &&
5013 TieBreaker.isNegative() != RoundingError.isNegative() &&
5014 abs(RoundingError).isExactlyValue(0.5))
5015 Rounded.add(
5016 APFloat::getOne(Rounded.getSemantics(), TieBreaker.isNegative()),
5018 return Rounded;
5019 };
5020
5021 // Case 1: Hi is not an integer.
5022 // Special cases are for rounding modes that are sensitive to ties.
5023 if (RoundedHi != Hi) {
5024 // We need to consider the case where Hi was between two integers and the
5025 // rounding mode broke the tie when, in fact, Lo may have had a different
5026 // sign than Hi.
5027 if (RM == rmNearestTiesToAway || RM == rmNearestTiesToEven)
5028 RoundedHi = RoundToNearestHelper(Hi, RoundedHi, Lo);
5029
5030 Floats[0] = std::move(RoundedHi);
5031 Floats[1].makeZero(/*Neg=*/false);
5032 return HiStatus;
5033 }
5034
5035 // Case 2: Hi is an integer.
5036 // Special cases are for rounding modes which are rounding towards or away from zero.
5037 RoundingMode LoRoundingMode;
5038 if (RM == rmTowardZero)
5039 // When our input is positive, we want the Lo component rounded toward
5040 // negative infinity to get the smallest result magnitude. Likewise,
5041 // negative inputs want the Lo component rounded toward positive infinity.
5042 LoRoundingMode = isNegative() ? rmTowardPositive : rmTowardNegative;
5043 else
5044 LoRoundingMode = RM;
5045
5046 APFloat RoundedLo = Lo;
5047 const opStatus LoStatus = RoundedLo.roundToIntegral(LoRoundingMode);
5048 if (LoRoundingMode == rmNearestTiesToAway)
5049 // We need to consider the case where Lo was between two integers and the
5050 // rounding mode broke the tie when, in fact, Hi may have had a different
5051 // sign than Lo.
5052 RoundedLo = RoundToNearestHelper(Lo, RoundedLo, Hi);
5053
5054 // We must ensure that the final result has no overlap between the two APFloat values.
5055 std::tie(RoundedHi, RoundedLo) = fastTwoSum(RoundedHi, RoundedLo);
5056
5057 Floats[0] = std::move(RoundedHi);
5058 Floats[1] = std::move(RoundedLo);
5059 return LoStatus;
5060}
5061
5063 Floats[0].changeSign();
5064 Floats[1].changeSign();
5065}
5066
5069 // Compare absolute values of the high parts.
5070 const cmpResult HiPartCmp = Floats[0].compareAbsoluteValue(RHS.Floats[0]);
5071 if (HiPartCmp != cmpEqual)
5072 return HiPartCmp;
5073
5074 // Zero, regardless of sign, is equal.
5075 if (Floats[1].isZero() && RHS.Floats[1].isZero())
5076 return cmpEqual;
5077
5078 // At this point, |this->Hi| == |RHS.Hi|.
5079 // The magnitude is |Hi+Lo| which is Hi+|Lo| if signs of Hi and Lo are the
5080 // same, and Hi-|Lo| if signs are different.
5081 const bool ThisIsSubtractive =
5082 Floats[0].isNegative() != Floats[1].isNegative();
5083 const bool RHSIsSubtractive =
5084 RHS.Floats[0].isNegative() != RHS.Floats[1].isNegative();
5085
5086 // Case 1: The low part of 'this' is zero.
5087 if (Floats[1].isZero())
5088 // We are comparing |Hi| vs. |Hi| ± |RHS.Lo|.
5089 // If RHS is subtractive, its magnitude is smaller.
5090 // If RHS is additive, its magnitude is larger.
5091 return RHSIsSubtractive ? cmpGreaterThan : cmpLessThan;
5092
5093 // Case 2: The low part of 'RHS' is zero (and we know 'this' is not).
5094 if (RHS.Floats[1].isZero())
5095 // We are comparing |Hi| ± |This.Lo| vs. |Hi|.
5096 // If 'this' is subtractive, its magnitude is smaller.
5097 // If 'this' is additive, its magnitude is larger.
5098 return ThisIsSubtractive ? cmpLessThan : cmpGreaterThan;
5099
5100 // If their natures differ, the additive one is larger.
5101 if (ThisIsSubtractive != RHSIsSubtractive)
5102 return ThisIsSubtractive ? cmpLessThan : cmpGreaterThan;
5103
5104 // Case 3: Both are additive (Hi+|Lo|) or both are subtractive (Hi-|Lo|).
5105 // The comparison now depends on the magnitude of the low parts.
5106 const cmpResult LoPartCmp = Floats[1].compareAbsoluteValue(RHS.Floats[1]);
5107
5108 if (ThisIsSubtractive) {
5109 // Both are subtractive (Hi-|Lo|), so the comparison of |Lo| is inverted.
5110 if (LoPartCmp == cmpLessThan)
5111 return cmpGreaterThan;
5112 if (LoPartCmp == cmpGreaterThan)
5113 return cmpLessThan;
5114 }
5115
5116 // If additive, the comparison of |Lo| is direct.
5117 // If equal, they are equal.
5118 return LoPartCmp;
5119}
5120
5122 return Floats[0].getCategory();
5123}
5124
5125bool DoubleAPFloat::isNegative() const { return Floats[0].isNegative(); }
5126
5128 Floats[0].makeInf(Neg);
5129 Floats[1].makeZero(/* Neg = */ false);
5130}
5131
5133 Floats[0].makeZero(Neg);
5134 Floats[1].makeZero(/* Neg = */ false);
5135}
5136
5138 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5139 "Unexpected Semantics");
5140 Floats[0] =
5141 APFloat(APFloatBase::semIEEEdouble, APInt(64, 0x7fefffffffffffffull));
5142 Floats[1] =
5143 APFloat(APFloatBase::semIEEEdouble, APInt(64, 0x7c8ffffffffffffeull));
5144 if (Neg)
5145 changeSign();
5146}
5147
5149 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5150 "Unexpected Semantics");
5151 Floats[0].makeSmallest(Neg);
5152 Floats[1].makeZero(/* Neg = */ false);
5153}
5154
5156 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5157 "Unexpected Semantics");
5158 Floats[0] =
5159 APFloat(APFloatBase::semIEEEdouble, APInt(64, 0x0360000000000000ull));
5160 if (Neg)
5161 Floats[0].changeSign();
5162 Floats[1].makeZero(/* Neg = */ false);
5163}
5164
5165void DoubleAPFloat::makeNaN(bool SNaN, bool Neg, const APInt *fill) {
5166 Floats[0].makeNaN(SNaN, Neg, fill);
5167 Floats[1].makeZero(/* Neg = */ false);
5168}
5169
5171 auto Result = Floats[0].compare(RHS.Floats[0]);
5172 // |Float[0]| > |Float[1]|
5173 if (Result == APFloat::cmpEqual)
5174 return Floats[1].compare(RHS.Floats[1]);
5175 return Result;
5176}
5177
5179 return Floats[0].bitwiseIsEqual(RHS.Floats[0]) &&
5180 Floats[1].bitwiseIsEqual(RHS.Floats[1]);
5181}
5182
5184 if (Arg.Floats)
5185 return hash_combine(hash_value(Arg.Floats[0]), hash_value(Arg.Floats[1]));
5186 return hash_combine(Arg.Semantics);
5187}
5188
5190 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5191 "Unexpected Semantics");
5192 uint64_t Data[] = {
5193 Floats[0].bitcastToAPInt().getRawData()[0],
5194 Floats[1].bitcastToAPInt().getRawData()[0],
5195 };
5196 return APInt(128, Data);
5197}
5198
5200 roundingMode RM) {
5201 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5202 "Unexpected Semantics");
5203 APFloat Tmp(APFloatBase::semPPCDoubleDoubleLegacy);
5204 auto Ret = Tmp.convertFromString(S, RM);
5205 *this = DoubleAPFloat(APFloatBase::semPPCDoubleDouble, Tmp.bitcastToAPInt());
5206 return Ret;
5207}
5208
5209// The double-double lattice of values corresponds to numbers which obey:
5210// - abs(lo) <= 1/2 * ulp(hi)
5211// - roundTiesToEven(hi + lo) == hi
5212//
5213// nextUp must choose the smallest output > input that follows these rules.
5214// nexDown must choose the largest output < input that follows these rules.
5216 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5217 "Unexpected Semantics");
5218 // nextDown(x) = -nextUp(-x)
5219 if (nextDown) {
5220 changeSign();
5221 APFloat::opStatus Result = next(/*nextDown=*/false);
5222 changeSign();
5223 return Result;
5224 }
5225 switch (getCategory()) {
5226 case fcInfinity:
5227 // nextUp(+inf) = +inf
5228 // nextUp(-inf) = -getLargest()
5229 if (isNegative())
5230 makeLargest(true);
5231 return opOK;
5232
5233 case fcNaN:
5234 // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag.
5235 // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not
5236 // change the payload.
5237 if (getFirst().isSignaling()) {
5238 // For consistency, propagate the sign of the sNaN to the qNaN.
5239 makeNaN(false, isNegative(), nullptr);
5240 return opInvalidOp;
5241 }
5242 return opOK;
5243
5244 case fcZero:
5245 // nextUp(pm 0) = +getSmallest()
5246 makeSmallest(false);
5247 return opOK;
5248
5249 case fcNormal:
5250 break;
5251 }
5252
5253 const APFloat &HiOld = getFirst();
5254 const APFloat &LoOld = getSecond();
5255
5256 APFloat NextLo = LoOld;
5257 NextLo.next(/*nextDown=*/false);
5258
5259 // We want to admit values where:
5260 // 1. abs(Lo) <= ulp(Hi)/2
5261 // 2. Hi == RTNE(Hi + lo)
5262 auto InLattice = [](const APFloat &Hi, const APFloat &Lo) {
5263 return Hi + Lo == Hi;
5264 };
5265
5266 // Check if (HiOld, nextUp(LoOld) is in the lattice.
5267 if (InLattice(HiOld, NextLo)) {
5268 // Yes, the result is (HiOld, nextUp(LoOld)).
5269 Floats[1] = std::move(NextLo);
5270
5271 // TODO: Because we currently rely on semPPCDoubleDoubleLegacy, our maximum
5272 // value is defined to have exactly 106 bits of precision. This limitation
5273 // results in semPPCDoubleDouble being unable to reach its maximum canonical
5274 // value.
5275 DoubleAPFloat Largest{*Semantics, uninitialized};
5276 Largest.makeLargest(/*Neg=*/false);
5277 if (compare(Largest) == cmpGreaterThan)
5278 makeInf(/*Neg=*/false);
5279
5280 return opOK;
5281 }
5282
5283 // Now we need to handle the cases where (HiOld, nextUp(LoOld)) is not the
5284 // correct result. We know the new hi component will be nextUp(HiOld) but our
5285 // lattice rules make it a little ambiguous what the correct NextLo must be.
5286 APFloat NextHi = HiOld;
5287 NextHi.next(/*nextDown=*/false);
5288
5289 // nextUp(getLargest()) == INFINITY
5290 if (NextHi.isInfinity()) {
5291 makeInf(/*Neg=*/false);
5292 return opOK;
5293 }
5294
5295 // IEEE 754-2019 5.3.1:
5296 // "If x is the negative number of least magnitude in x's format, nextUp(x) is
5297 // -0."
5298 if (NextHi.isZero()) {
5299 makeZero(/*Neg=*/true);
5300 return opOK;
5301 }
5302
5303 // abs(NextLo) must be <= ulp(NextHi)/2. We want NextLo to be as close to
5304 // negative infinity as possible.
5305 NextLo = neg(scalbn(harrisonUlp(NextHi), -1, rmTowardZero));
5306 if (!InLattice(NextHi, NextLo))
5307 // RTNE may mean that Lo must be < ulp(NextHi) / 2 so we bump NextLo.
5308 NextLo.next(/*nextDown=*/false);
5309
5310 Floats[0] = std::move(NextHi);
5311 Floats[1] = std::move(NextLo);
5312
5313 return opOK;
5314}
5315
5316APFloat::opStatus DoubleAPFloat::convertToSignExtendedInteger(
5317 MutableArrayRef<integerPart> Input, unsigned int Width, bool IsSigned,
5318 roundingMode RM, bool *IsExact) const {
5319 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5320 "Unexpected Semantics");
5321
5322 // If Hi is not finite, or Lo is zero, the value is entirely represented
5323 // by Hi. Delegate to the simpler single-APFloat conversion.
5324 if (!getFirst().isFiniteNonZero() || getSecond().isZero())
5325 return getFirst().convertToInteger(Input, Width, IsSigned, RM, IsExact);
5326
5327 // First, round the full double-double value to an integral value. This
5328 // simplifies the rest of the function, as we no longer need to consider
5329 // fractional parts.
5330 *IsExact = false;
5331 DoubleAPFloat Integral = *this;
5332 const opStatus RoundStatus = Integral.roundToIntegral(RM);
5333 if (RoundStatus == opInvalidOp)
5334 return opInvalidOp;
5335 const APFloat &IntegralHi = Integral.getFirst();
5336 const APFloat &IntegralLo = Integral.getSecond();
5337
5338 // If rounding results in either component being zero, the sum is trivial.
5339 // Delegate to the simpler single-APFloat conversion.
5340 bool HiIsExact;
5341 if (IntegralHi.isZero() || IntegralLo.isZero()) {
5342 const opStatus HiStatus =
5343 IntegralHi.convertToInteger(Input, Width, IsSigned, RM, &HiIsExact);
5344 // The conversion from an integer-valued float to an APInt may fail if the
5345 // result would be out of range. Regardless, taking this path is only
5346 // possible if rounding occurred during the initial `roundToIntegral`.
5347 return HiStatus == opOK ? opInexact : HiStatus;
5348 }
5349
5350 // A negative number cannot be represented by an unsigned integer.
5351 // Since a double-double is canonical, if Hi is negative, the sum is negative.
5352 if (!IsSigned && IntegralHi.isNegative())
5353 return opInvalidOp;
5354
5355 // Handle the special boundary case where |Hi| is exactly the power of two
5356 // that marks the edge of the integer's range (e.g., 2^63 for int64_t). In
5357 // this situation, Hi itself won't fit, but the sum Hi + Lo might.
5358 // `PositiveOverflowWidth` is the bit number for this boundary (N-1 for
5359 // signed, N for unsigned).
5360 bool LoIsExact;
5361 const int HiExactLog2 = IntegralHi.getExactLog2Abs();
5362 const unsigned PositiveOverflowWidth = IsSigned ? Width - 1 : Width;
5363 if (HiExactLog2 >= 0 &&
5364 static_cast<unsigned>(HiExactLog2) == PositiveOverflowWidth) {
5365 // If Hi and Lo have the same sign, |Hi + Lo| > |Hi|, so the sum is
5366 // guaranteed to overflow. E.g., for uint128_t, (2^128, 1) overflows.
5367 if (IntegralHi.isNegative() == IntegralLo.isNegative())
5368 return opInvalidOp;
5369
5370 // If the signs differ, the sum will fit. We can compute the result using
5371 // properties of two's complement arithmetic without a wide intermediate
5372 // integer. E.g., for uint128_t, (2^128, -1) should be 2^128 - 1.
5373 const opStatus LoStatus = IntegralLo.convertToInteger(
5374 Input, Width, /*IsSigned=*/true, RM, &LoIsExact);
5375 if (LoStatus == opInvalidOp)
5376 return opInvalidOp;
5377
5378 // Adjust the bit pattern of Lo to account for Hi's value:
5379 // - For unsigned (Hi=2^Width): `2^Width + Lo` in `Width`-bit
5380 // arithmetic is equivalent to just `Lo`. The conversion of `Lo` above
5381 // already produced the correct final bit pattern.
5382 // - For signed (Hi=2^(Width-1)): The sum `2^(Width-1) + Lo` (where Lo<0)
5383 // can be computed by taking the two's complement pattern for `Lo` and
5384 // clearing the sign bit.
5385 if (IsSigned && !IntegralHi.isNegative())
5386 APInt::tcClearBit(Input.data(), PositiveOverflowWidth);
5387 *IsExact = RoundStatus == opOK;
5388 return RoundStatus;
5389 }
5390
5391 // Convert Hi into an integer. This may not fit but that is OK: we know that
5392 // Hi + Lo would not fit either in this situation.
5393 const opStatus HiStatus = IntegralHi.convertToInteger(
5394 Input, Width, IsSigned, rmTowardZero, &HiIsExact);
5395 if (HiStatus == opInvalidOp)
5396 return HiStatus;
5397
5398 // Convert Lo into a temporary integer of the same width.
5399 APSInt LoResult{Width, /*isUnsigned=*/!IsSigned};
5400 const opStatus LoStatus =
5401 IntegralLo.convertToInteger(LoResult, rmTowardZero, &LoIsExact);
5402 if (LoStatus == opInvalidOp)
5403 return LoStatus;
5404
5405 // Add Lo to Hi. This addition is guaranteed not to overflow because of the
5406 // double-double canonicalization rule (`|Lo| <= ulp(Hi)/2`). The only case
5407 // where the sum could cross the integer type's boundary is when Hi is a
5408 // power of two, which is handled by the special case block above.
5409 APInt::tcAdd(Input.data(), LoResult.getRawData(), /*carry=*/0, Input.size());
5410
5411 *IsExact = RoundStatus == opOK;
5412 return RoundStatus;
5413}
5414
5417 unsigned int Width, bool IsSigned,
5418 roundingMode RM, bool *IsExact) const {
5419 opStatus FS =
5420 convertToSignExtendedInteger(Input, Width, IsSigned, RM, IsExact);
5421
5422 if (FS == opInvalidOp) {
5423 const unsigned DstPartsCount = partCountForBits(Width);
5424 assert(DstPartsCount <= Input.size() && "Integer too big");
5425
5426 unsigned Bits;
5427 if (getCategory() == fcNaN)
5428 Bits = 0;
5429 else if (isNegative())
5430 Bits = IsSigned;
5431 else
5432 Bits = Width - IsSigned;
5433
5434 tcSetLeastSignificantBits(Input.data(), DstPartsCount, Bits);
5435 if (isNegative() && IsSigned)
5436 APInt::tcShiftLeft(Input.data(), DstPartsCount, Width - 1);
5437 }
5438
5439 return FS;
5440}
5441
5442APFloat::opStatus DoubleAPFloat::handleOverflow(roundingMode RM) {
5443 switch (RM) {
5445 makeLargest(/*Neg=*/isNegative());
5446 break;
5448 if (isNegative())
5449 makeInf(/*Neg=*/true);
5450 else
5451 makeLargest(/*Neg=*/false);
5452 break;
5454 if (isNegative())
5455 makeLargest(/*Neg=*/true);
5456 else
5457 makeInf(/*Neg=*/false);
5458 break;
5461 makeInf(/*Neg=*/isNegative());
5462 break;
5463 default:
5464 llvm_unreachable("Invalid rounding mode found");
5465 }
5466 opStatus S = opInexact;
5467 if (!getFirst().isFinite())
5468 S = static_cast<opStatus>(S | opOverflow);
5469 return S;
5470}
5471
5472APFloat::opStatus DoubleAPFloat::convertFromUnsignedParts(
5473 const integerPart *Src, unsigned int SrcCount, roundingMode RM) {
5474 // Find the most significant bit of the source integer. APInt::tcMSB returns
5475 // UINT_MAX for a zero value.
5476 const unsigned SrcMSB = APInt::tcMSB(Src, SrcCount);
5477 if (SrcMSB == UINT_MAX) {
5478 // The source integer is 0.
5479 makeZero(/*Neg=*/false);
5480 return opOK;
5481 }
5482
5483 // Create a minimally-sized APInt to represent the source value.
5484 const unsigned SrcBitWidth = SrcMSB + 1;
5485 APSInt SrcInt{APInt{/*numBits=*/SrcBitWidth, ArrayRef(Src, SrcCount)},
5486 /*isUnsigned=*/true};
5487
5488 // Stage 1: Initial Approximation.
5489 // Convert the source integer SrcInt to the Hi part of the DoubleAPFloat.
5490 // We use round-to-nearest because it minimizes the initial error, which is
5491 // crucial for the subsequent steps.
5493 Hi.convertFromAPInt(SrcInt, /*IsSigned=*/false, rmNearestTiesToEven);
5494
5495 // If the first approximation already overflows, the number is too large.
5496 // NOTE: The underlying semantics are *more* conservative when choosing to
5497 // overflow because their notion of ULP is much larger. As such, it is always
5498 // safe to overflow at the DoubleAPFloat level if the APFloat overflows.
5499 if (!Hi.isFinite())
5500 return handleOverflow(RM);
5501
5502 // Stage 2: Exact Error Calculation.
5503 // Calculate the exact error of the first approximation: Error = SrcInt - Hi.
5504 // This is done by converting Hi back to an integer and subtracting it from
5505 // the original source.
5506 bool HiAsIntIsExact;
5507 // Create an integer representation of Hi. Its width is determined by the
5508 // exponent of Hi, ensuring it's just large enough. This width can exceed
5509 // SrcBitWidth if the conversion to Hi rounded up to a power of two.
5510 // accurately when converted back to an integer.
5511 APSInt HiAsInt{static_cast<uint32_t>(ilogb(Hi) + 1), /*isUnsigned=*/true};
5512 Hi.convertToInteger(HiAsInt, rmNearestTiesToEven, &HiAsIntIsExact);
5513 const APInt Error = SrcInt.zext(HiAsInt.getBitWidth()) - HiAsInt;
5514
5515 // Stage 3: Error Approximation and Rounding.
5516 // Convert the integer error into the Lo part of the DoubleAPFloat. This step
5517 // captures the remainder of the original number. The rounding mode for this
5518 // conversion (LoRM) may need to be adjusted from the user-requested RM to
5519 // ensure the final sum (Hi + Lo) rounds correctly.
5520 roundingMode LoRM = RM;
5521 // Adjustments are only necessary when the initial approximation Hi was an
5522 // overestimate, making the Error negative.
5523 if (Error.isNegative()) {
5524 if (RM == rmNearestTiesToAway) {
5525 // For rmNearestTiesToAway, a tie should round away from zero. Since
5526 // SrcInt is positive, this means rounding toward +infinity.
5527 // A standard conversion of a negative Error would round ties toward
5528 // -infinity, causing the final sum Hi + Lo to be smaller. To
5529 // counteract this, we detect the tie case and override the rounding
5530 // mode for Lo to rmTowardPositive.
5531 const unsigned ErrorActiveBits = Error.getSignificantBits() - 1;
5532 const unsigned LoPrecision = getSecond().getSemantics().precision;
5533 if (ErrorActiveBits > LoPrecision) {
5534 const unsigned RoundingBoundary = ErrorActiveBits - LoPrecision;
5535 // A tie occurs when the bits to be truncated are of the form 100...0.
5536 // This is detected by checking if the number of trailing zeros is
5537 // exactly one less than the number of bits being truncated.
5538 if (Error.countTrailingZeros() == RoundingBoundary - 1)
5539 LoRM = rmTowardPositive;
5540 }
5541 } else if (RM == rmTowardZero) {
5542 // For rmTowardZero, the final positive result must be truncated (rounded
5543 // down). When Hi is an overestimate, Error is negative. A standard
5544 // rmTowardZero conversion of Error would make it *less* negative,
5545 // effectively rounding the final sum Hi + Lo *up*. To ensure the sum
5546 // rounds down correctly, we force Lo to round toward -infinity.
5547 LoRM = rmTowardNegative;
5548 }
5549 }
5550
5552 opStatus Status = Lo.convertFromAPInt(Error, /*IsSigned=*/true, LoRM);
5553
5554 // Renormalize the pair (Hi, Lo) into a canonical DoubleAPFloat form where the
5555 // components do not overlap. fastTwoSum performs this operation.
5556 std::tie(Hi, Lo) = fastTwoSum(Hi, Lo);
5557 Floats[0] = std::move(Hi);
5558 Floats[1] = std::move(Lo);
5559
5560 // A final check for overflow is needed because fastTwoSum can cause a
5561 // carry-out from Lo that pushes Hi to infinity.
5562 if (!getFirst().isFinite())
5563 return handleOverflow(RM);
5564
5565 // The largest DoubleAPFloat must be canonical. Values which are larger are
5566 // not canonical and are equivalent to overflow.
5567 if (getFirst().isFiniteNonZero() && Floats[0].isLargest()) {
5568 DoubleAPFloat Largest{*Semantics};
5569 Largest.makeLargest(/*Neg=*/false);
5570 if (compare(Largest) == APFloat::cmpGreaterThan)
5571 return handleOverflow(RM);
5572 }
5573
5574 // The final status of the operation is determined by the conversion of the
5575 // error term. If Lo could represent Error exactly, the entire conversion
5576 // is exact. Otherwise, it's inexact.
5577 return Status;
5578}
5579
5581 bool IsSigned,
5582 roundingMode RM) {
5583 const bool NegateInput = IsSigned && Input.isNegative();
5584 APInt API = Input;
5585 if (NegateInput)
5586 API.negate();
5587
5589 convertFromUnsignedParts(API.getRawData(), API.getNumWords(), RM);
5590 if (NegateInput)
5591 changeSign();
5592 return Status;
5593}
5594
5596 unsigned int HexDigits,
5597 bool UpperCase,
5598 roundingMode RM) const {
5599 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5600 "Unexpected Semantics");
5601 return APFloat(APFloatBase::semPPCDoubleDoubleLegacy, bitcastToAPInt())
5602 .convertToHexString(DST, HexDigits, UpperCase, RM);
5603}
5604
5606 return getCategory() == fcNormal &&
5607 (Floats[0].isDenormal() || Floats[1].isDenormal() ||
5608 // (double)(Hi + Lo) == Hi defines a normal number.
5609 Floats[0] != Floats[0] + Floats[1]);
5610}
5611
5613 if (getCategory() != fcNormal)
5614 return false;
5615 DoubleAPFloat Tmp(*this);
5616 Tmp.makeSmallest(this->isNegative());
5617 return Tmp.compare(*this) == cmpEqual;
5618}
5619
5621 if (getCategory() != fcNormal)
5622 return false;
5623
5624 DoubleAPFloat Tmp(*this);
5626 return Tmp.compare(*this) == cmpEqual;
5627}
5628
5630 if (getCategory() != fcNormal)
5631 return false;
5632 DoubleAPFloat Tmp(*this);
5633 Tmp.makeLargest(this->isNegative());
5634 return Tmp.compare(*this) == cmpEqual;
5635}
5636
5638 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5639 "Unexpected Semantics");
5640 return Floats[0].isInteger() && Floats[1].isInteger();
5641}
5642
5644 unsigned FormatPrecision,
5645 unsigned FormatMaxPadding,
5646 bool TruncateZero) const {
5647 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5648 "Unexpected Semantics");
5649 APFloat(APFloatBase::semPPCDoubleDoubleLegacy, bitcastToAPInt())
5650 .toString(Str, FormatPrecision, FormatMaxPadding, TruncateZero);
5651}
5652
5654 // In order for Hi + Lo to be a power of two, the following must be true:
5655 // 1. Hi must be a power of two.
5656 // 2. Lo must be zero.
5657 if (getSecond().isNonZero())
5658 return INT_MIN;
5659 return getFirst().getExactLog2Abs();
5660}
5661
5662int ilogb(const DoubleAPFloat &Arg) {
5663 const APFloat &Hi = Arg.getFirst();
5664 const APFloat &Lo = Arg.getSecond();
5665 int IlogbResult = ilogb(Hi);
5666 // Zero and non-finite values can delegate to ilogb(Hi).
5667 if (Arg.getCategory() != fcNormal)
5668 return IlogbResult;
5669 // If Lo can't change the binade, we can delegate to ilogb(Hi).
5670 if (Lo.isZero() || Hi.isNegative() == Lo.isNegative())
5671 return IlogbResult;
5672 if (Hi.getExactLog2Abs() == INT_MIN)
5673 return IlogbResult;
5674 // Numbers of the form 2^a - 2^b or -2^a + 2^b are almost powers of two but
5675 // get nudged out of the binade by the low component.
5676 return IlogbResult - 1;
5677}
5678
5681 assert(Arg.Semantics == &APFloatBase::PPCDoubleDouble() &&
5682 "Unexpected Semantics");
5684 scalbn(Arg.Floats[0], Exp, RM),
5685 scalbn(Arg.Floats[1], Exp, RM));
5686}
5687
5688DoubleAPFloat frexp(const DoubleAPFloat &Arg, int &Exp,
5690 assert(Arg.Semantics == &APFloatBase::PPCDoubleDouble() &&
5691 "Unexpected Semantics");
5692
5693 // Get the unbiased exponent e of the number, where |Arg| = m * 2^e for m in
5694 // [1.0, 2.0).
5695 Exp = ilogb(Arg);
5696
5697 // For NaNs, quiet any signaling NaN and return the result, as per standard
5698 // practice.
5699 if (Exp == APFloat::IEK_NaN) {
5700 DoubleAPFloat Quiet{Arg};
5701 Quiet.getFirst() = Quiet.getFirst().makeQuiet();
5702 return Quiet;
5703 }
5704
5705 // For infinity, return it unchanged. The exponent remains IEK_Inf.
5706 if (Exp == APFloat::IEK_Inf)
5707 return Arg;
5708
5709 // For zero, the fraction is zero and the standard requires the exponent be 0.
5710 if (Exp == APFloat::IEK_Zero) {
5711 Exp = 0;
5712 return Arg;
5713 }
5714
5715 const APFloat &Hi = Arg.getFirst();
5716 const APFloat &Lo = Arg.getSecond();
5717
5718 // frexp requires the fraction's absolute value to be in [0.5, 1.0).
5719 // ilogb provides an exponent for an absolute value in [1.0, 2.0).
5720 // Increment the exponent to ensure the fraction is in the correct range.
5721 ++Exp;
5722
5723 const bool SignsDisagree = Hi.isNegative() != Lo.isNegative();
5724 APFloat Second = Lo;
5725 if (Arg.getCategory() == APFloat::fcNormal && Lo.isFiniteNonZero()) {
5726 roundingMode LoRoundingMode;
5727 // The interpretation of rmTowardZero depends on the sign of the combined
5728 // Arg rather than the sign of the component.
5729 if (RM == rmTowardZero)
5730 LoRoundingMode = Arg.isNegative() ? rmTowardPositive : rmTowardNegative;
5731 // For rmNearestTiesToAway, we face a similar problem. If signs disagree,
5732 // Lo is a correction *toward* zero relative to Hi. Rounding Lo
5733 // "away from zero" based on its own sign would move the value in the
5734 // wrong direction. As a safe proxy, we use rmNearestTiesToEven, which is
5735 // direction-agnostic. We only need to bother with this if Lo is scaled
5736 // down.
5737 else if (RM == rmNearestTiesToAway && SignsDisagree && Exp > 0)
5738 LoRoundingMode = rmNearestTiesToEven;
5739 else
5740 LoRoundingMode = RM;
5741 Second = scalbn(Lo, -Exp, LoRoundingMode);
5742 // The rmNearestTiesToEven proxy is correct most of the time, but it
5743 // differs from rmNearestTiesToAway when the scaled value of Lo is an
5744 // exact midpoint.
5745 // NOTE: This is morally equivalent to roundTiesTowardZero.
5746 if (RM == rmNearestTiesToAway && LoRoundingMode == rmNearestTiesToEven) {
5747 // Re-scale the result back to check if rounding occurred.
5748 const APFloat RecomposedLo = scalbn(Second, Exp, rmNearestTiesToEven);
5749 if (RecomposedLo != Lo) {
5750 // RoundingError tells us which direction we rounded:
5751 // - RoundingError > 0: we rounded up.
5752 // - RoundingError < 0: we down up.
5753 const APFloat RoundingError = RecomposedLo - Lo;
5754 // Determine if scalbn(Lo, -Exp) landed exactly on a midpoint.
5755 // We do this by checking if the absolute rounding error is exactly
5756 // half a ULP of the result.
5757 const APFloat UlpOfSecond = harrisonUlp(Second);
5758 const APFloat ScaledUlpOfSecond =
5759 scalbn(UlpOfSecond, Exp - 1, rmNearestTiesToEven);
5760 const bool IsMidpoint = abs(RoundingError) == ScaledUlpOfSecond;
5761 const bool RoundedLoAway =
5762 Second.isNegative() == RoundingError.isNegative();
5763 // The sign of Hi and Lo disagree and we rounded Lo away: we must
5764 // decrease the magnitude of Second to increase the magnitude
5765 // First+Second.
5766 if (IsMidpoint && RoundedLoAway)
5767 Second.next(/*nextDown=*/!Second.isNegative());
5768 }
5769 }
5770 // Handle a tricky edge case where Arg is slightly less than a power of two
5771 // (e.g., Arg = 2^k - epsilon). In this situation:
5772 // 1. Hi is 2^k, and Lo is a small negative value -epsilon.
5773 // 2. ilogb(Arg) correctly returns k-1.
5774 // 3. Our initial Exp becomes (k-1) + 1 = k.
5775 // 4. Scaling Hi (2^k) by 2^-k would yield a magnitude of 1.0 and
5776 // scaling Lo by 2^-k would yield zero. This would make the result 1.0
5777 // which is an invalid fraction, as the required interval is [0.5, 1.0).
5778 // We detect this specific case by checking if Hi is a power of two and if
5779 // the scaled Lo underflowed to zero. The fix: Increment Exp to k+1. This
5780 // adjusts the scale factor, causing Hi to be scaled to 0.5, which is a
5781 // valid fraction.
5782 if (Second.isZero() && SignsDisagree && Hi.getExactLog2Abs() != INT_MIN)
5783 ++Exp;
5784 }
5785
5786 APFloat First = scalbn(Hi, -Exp, RM);
5788 std::move(Second));
5789}
5790
5791APInt DoubleAPFloat::getNaNPayload() const { return Floats[0].getNaNPayload(); }
5792} // namespace detail
5793
5794APFloat::Storage::Storage(IEEEFloat F, const fltSemantics &Semantics) {
5795 if (usesLayout<IEEEFloat>(Semantics)) {
5796 new (&IEEE) IEEEFloat(std::move(F));
5797 return;
5798 }
5799 if (usesLayout<DoubleAPFloat>(Semantics)) {
5800 const fltSemantics& S = F.getSemantics();
5801 new (&Double) DoubleAPFloat(Semantics, APFloat(std::move(F), S),
5803 return;
5804 }
5805 llvm_unreachable("Unexpected semantics");
5806}
5807
5812
5813hash_code hash_value(const APFloat &Arg) {
5814 if (APFloat::usesLayout<detail::IEEEFloat>(Arg.getSemantics()))
5815 return hash_value(Arg.U.IEEE);
5816 if (APFloat::usesLayout<detail::DoubleAPFloat>(Arg.getSemantics()))
5817 return hash_value(Arg.U.Double);
5818 llvm_unreachable("Unexpected semantics");
5819}
5820
5822 : APFloat(Semantics) {
5823 auto StatusOrErr = convertFromString(S, rmNearestTiesToEven);
5824 assert(StatusOrErr && "Invalid floating point representation");
5825 consumeError(StatusOrErr.takeError());
5826}
5827
5829 if (isZero())
5830 return isNegative() ? fcNegZero : fcPosZero;
5831 if (isNormal())
5832 return isNegative() ? fcNegNormal : fcPosNormal;
5833 if (isDenormal())
5835 if (isInfinity())
5836 return isNegative() ? fcNegInf : fcPosInf;
5837 assert(isNaN() && "Other class of FP constant");
5838 return isSignaling() ? fcSNan : fcQNan;
5839}
5840
5841bool APFloat::getExactInverse(APFloat *Inv) const {
5842 // Only finite, non-zero numbers can have a useful, representable inverse.
5843 // This check filters out +/- zero, +/- infinity, and NaN.
5844 if (!isFiniteNonZero())
5845 return false;
5846
5847 // Historically, this function rejects subnormal inputs. One reason why this
5848 // might be important is that subnormals may behave differently under FTZ/DAZ
5849 // runtime behavior.
5850 if (isDenormal())
5851 return false;
5852
5853 // A number has an exact, representable inverse if and only if it is a power
5854 // of two.
5855 //
5856 // Mathematical Rationale:
5857 // 1. A binary floating-point number x is a dyadic rational, meaning it can
5858 // be written as x = M / 2^k for integers M (the significand) and k.
5859 // 2. The inverse is 1/x = 2^k / M.
5860 // 3. For 1/x to also be a dyadic rational (and thus exactly representable
5861 // in binary), its denominator M must also be a power of two.
5862 // Let's say M = 2^m.
5863 // 4. Substituting this back into the formula for x, we get
5864 // x = (2^m) / (2^k) = 2^(m-k).
5865 //
5866 // This proves that x must be a power of two.
5867
5868 // getExactLog2Abs() returns the integer exponent if the number is a power of
5869 // two or INT_MIN if it is not.
5870 const int Exp = getExactLog2Abs();
5871 if (Exp == INT_MIN)
5872 return false;
5873
5874 // The inverse of +/- 2^Exp is +/- 2^(-Exp). We can compute this by
5875 // scaling 1.0 by the negated exponent.
5876 APFloat Reciprocal =
5877 scalbn(APFloat::getOne(getSemantics(), /*Negative=*/isNegative()), -Exp,
5878 rmTowardZero);
5879
5880 // scalbn might round if the resulting exponent -Exp is outside the
5881 // representable range, causing overflow (to infinity) or underflow. We
5882 // must verify that the result is still the exact power of two we expect.
5883 if (Reciprocal.getExactLog2Abs() != -Exp)
5884 return false;
5885
5886 // Avoid multiplication with a subnormal, it is not safe on all platforms and
5887 // may be slower than a normal division.
5888 if (Reciprocal.isDenormal())
5889 return false;
5890
5891 assert(Reciprocal.isFiniteNonZero());
5892
5893 if (Inv)
5894 *Inv = std::move(Reciprocal);
5895
5896 return true;
5897}
5898
5900 roundingMode RM, bool *losesInfo) {
5901 if (&getSemantics() == &ToSemantics) {
5902 *losesInfo = false;
5903 return opOK;
5904 }
5905 if (usesLayout<IEEEFloat>(getSemantics()) &&
5906 usesLayout<IEEEFloat>(ToSemantics))
5907 return U.IEEE.convert(ToSemantics, RM, losesInfo);
5908 if (usesLayout<IEEEFloat>(getSemantics()) &&
5909 usesLayout<DoubleAPFloat>(ToSemantics)) {
5910 assert(&ToSemantics == &APFloatBase::semPPCDoubleDouble);
5911 auto Ret =
5912 U.IEEE.convert(APFloatBase::semPPCDoubleDoubleLegacy, RM, losesInfo);
5913 *this = APFloat(ToSemantics, U.IEEE.bitcastToAPInt());
5914 return Ret;
5915 }
5916 if (usesLayout<DoubleAPFloat>(getSemantics()) &&
5917 usesLayout<IEEEFloat>(ToSemantics)) {
5918 auto Ret = getIEEE().convert(ToSemantics, RM, losesInfo);
5919 *this = APFloat(std::move(getIEEE()), ToSemantics);
5920 return Ret;
5921 }
5922 llvm_unreachable("Unexpected semantics");
5923}
5924
5928
5930 SmallVector<char, 16> Buffer;
5931 toString(Buffer);
5932 OS << Buffer;
5933}
5934
5935#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
5937 print(dbgs());
5938 dbgs() << '\n';
5939}
5940#endif
5941
5943 NID.Add(bitcastToAPInt());
5944}
5945
5947 roundingMode rounding_mode,
5948 bool *isExact) const {
5949 unsigned bitWidth = result.getBitWidth();
5950 SmallVector<uint64_t, 4> parts(result.getNumWords());
5951 opStatus status = convertToInteger(parts, bitWidth, result.isSigned(),
5952 rounding_mode, isExact);
5953 // Keeps the original signed-ness.
5954 result = APInt(bitWidth, parts);
5955 return status;
5956}
5957
5959 if (&getSemantics() == &APFloatBase::semIEEEdouble)
5960 return getIEEE().convertToDouble();
5961 assert(isRepresentableBy(getSemantics(), semIEEEdouble) &&
5962 "Float semantics is not representable by IEEEdouble");
5963 APFloat Temp = *this;
5964 bool LosesInfo;
5965 [[maybe_unused]] opStatus St =
5966 Temp.convert(APFloatBase::semIEEEdouble, rmNearestTiesToEven, &LosesInfo);
5967 assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
5968 return Temp.getIEEE().convertToDouble();
5969}
5970
5971#ifdef HAS_IEE754_FLOAT128
5972float128 APFloat::convertToQuad() const {
5973 if (&getSemantics() == &APFloatBase::semIEEEquad)
5974 return getIEEE().convertToQuad();
5975 assert(isRepresentableBy(getSemantics(), semIEEEquad) &&
5976 "Float semantics is not representable by IEEEquad");
5977 APFloat Temp = *this;
5978 bool LosesInfo;
5979 [[maybe_unused]] opStatus St =
5980 Temp.convert(APFloatBase::semIEEEquad, rmNearestTiesToEven, &LosesInfo);
5981 assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
5982 return Temp.getIEEE().convertToQuad();
5983}
5984#endif
5985
5987 if (&getSemantics() == &APFloatBase::semIEEEsingle)
5988 return getIEEE().convertToFloat();
5989 assert(isRepresentableBy(getSemantics(), semIEEEsingle) &&
5990 "Float semantics is not representable by IEEEsingle");
5991 APFloat Temp = *this;
5992 bool LosesInfo;
5993 [[maybe_unused]] opStatus St =
5994 Temp.convert(APFloatBase::semIEEEsingle, rmNearestTiesToEven, &LosesInfo);
5995 assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
5996 return Temp.getIEEE().convertToFloat();
5997}
5998
6000 static constexpr StringLiteral ValidFormats[] = {
6001 "Float8E5M2", "Float8E5M2FNUZ", "Float8E4M3", "Float8E4M3FN",
6002 "Float8E4M3FNUZ", "Float8E4M3B11FNUZ", "Float8E3M4", "Float8E8M0FNU",
6003 "Float6E3M2FN", "Float6E2M3FN", "Float4E2M1FN"};
6004 return llvm::is_contained(ValidFormats, Format);
6005}
6006
6008 // TODO: extend to remaining arbitrary FP types: Float8E4M3, Float8E3M4,
6009 // Float8E5M2FNUZ, Float8E4M3FNUZ, Float8E4M3B11FNUZ, Float8E8M0FNU.
6011 .Case("Float8E5M2", &semFloat8E5M2)
6012 .Case("Float8E4M3FN", &semFloat8E4M3FN)
6013 .Case("Float4E2M1FN", &semFloat4E2M1FN)
6014 .Case("Float6E3M2FN", &semFloat6E3M2FN)
6015 .Case("Float6E2M3FN", &semFloat6E2M3FN)
6016 .Default(nullptr);
6017}
6018
6019APFloat::Storage::~Storage() {
6020 if (usesLayout<IEEEFloat>(*semantics)) {
6021 IEEE.~IEEEFloat();
6022 return;
6023 }
6024 if (usesLayout<DoubleAPFloat>(*semantics)) {
6025 Double.~DoubleAPFloat();
6026 return;
6027 }
6028 llvm_unreachable("Unexpected semantics");
6029}
6030
6031APFloat::Storage::Storage(const APFloat::Storage &RHS) {
6032 if (usesLayout<IEEEFloat>(*RHS.semantics)) {
6033 new (this) IEEEFloat(RHS.IEEE);
6034 return;
6035 }
6036 if (usesLayout<DoubleAPFloat>(*RHS.semantics)) {
6037 new (this) DoubleAPFloat(RHS.Double);
6038 return;
6039 }
6040 llvm_unreachable("Unexpected semantics");
6041}
6042
6043APFloat::Storage::Storage(APFloat::Storage &&RHS) {
6044 if (usesLayout<IEEEFloat>(*RHS.semantics)) {
6045 new (this) IEEEFloat(std::move(RHS.IEEE));
6046 return;
6047 }
6048 if (usesLayout<DoubleAPFloat>(*RHS.semantics)) {
6049 new (this) DoubleAPFloat(std::move(RHS.Double));
6050 return;
6051 }
6052 llvm_unreachable("Unexpected semantics");
6053}
6054
6055APFloat::Storage &APFloat::Storage::operator=(const APFloat::Storage &RHS) {
6056 if (usesLayout<IEEEFloat>(*semantics) &&
6057 usesLayout<IEEEFloat>(*RHS.semantics)) {
6058 IEEE = RHS.IEEE;
6059 } else if (usesLayout<DoubleAPFloat>(*semantics) &&
6060 usesLayout<DoubleAPFloat>(*RHS.semantics)) {
6061 Double = RHS.Double;
6062 } else if (this != &RHS) {
6063 this->~Storage();
6064 new (this) Storage(RHS);
6065 }
6066 return *this;
6067}
6068
6069APFloat::Storage &APFloat::Storage::operator=(APFloat::Storage &&RHS) {
6070 if (usesLayout<IEEEFloat>(*semantics) &&
6071 usesLayout<IEEEFloat>(*RHS.semantics)) {
6072 IEEE = std::move(RHS.IEEE);
6073 } else if (usesLayout<DoubleAPFloat>(*semantics) &&
6074 usesLayout<DoubleAPFloat>(*RHS.semantics)) {
6075 Double = std::move(RHS.Double);
6076 } else if (this != &RHS) {
6077 this->~Storage();
6078 new (this) Storage(std::move(RHS));
6079 }
6080 return *this;
6081}
6082
6083} // namespace llvm
6084
6085#undef APFLOAT_DISPATCH_ON_SEMANTICS
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
#define PackCategoriesIntoKey(_lhs, _rhs)
A macro used to combine two fcCategory enums into one key which can be used in a switch statement to ...
Definition APFloat.cpp:49
This file declares a class to represent arbitrary precision floating point values and provide a varie...
#define APFLOAT_DISPATCH_ON_SEMANTICS(METHOD_CALL)
Definition APFloat.h:26
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
Function Alias Analysis false
#define X(NUM, ENUM, NAME)
Definition ELF.h:851
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition Compiler.h:661
static bool isNeg(Value *V)
Returns true if the operation is a negation of V, and it works for both integers and floats.
static bool isSigned(unsigned Opcode)
Utilities for dealing with flags related to floating point properties and mode controls.
This file defines a hash set that can be used to remove duplication of nodes in a graph.
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition Lint.cpp:539
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define T
#define P(N)
if(PassOpts->AAPipeline)
This file contains some templates that are useful if you are working with the STL at all.
This file contains some functions that are useful when dealing with strings.
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, const llvm::StringTable &StandardNames, VectorLibrary VecLib)
Initialize the set of available library functions based on the specified target triple.
Value * RHS
Value * LHS
The Input class is used to parse a yaml document into in-memory structs and vectors.
static const fltSemantics & IEEEsingle()
Definition APFloat.h:296
static const fltSemantics & Float8E4M3FN()
Definition APFloat.h:306
static LLVM_ABI const llvm::fltSemantics & EnumToSemantics(Semantics S)
Definition APFloat.cpp:98
static LLVM_ABI bool semanticsHasInf(const fltSemantics &)
Definition APFloat.cpp:247
cmpResult
IEEE-754R 5.11: Floating Point Comparison Relations.
Definition APFloat.h:334
static constexpr roundingMode rmTowardZero
Definition APFloat.h:348
static LLVM_ABI ExponentType semanticsMinExponent(const fltSemantics &)
Definition APFloat.cpp:222
llvm::RoundingMode roundingMode
IEEE-754R 4.3: Rounding-direction attributes.
Definition APFloat.h:342
static const fltSemantics & BFloat()
Definition APFloat.h:295
static const fltSemantics & IEEEquad()
Definition APFloat.h:298
static LLVM_ABI unsigned int semanticsSizeInBits(const fltSemantics &)
Definition APFloat.cpp:225
static const fltSemantics & Float8E8M0FNU()
Definition APFloat.h:313
static LLVM_ABI bool semanticsHasSignedRepr(const fltSemantics &)
Definition APFloat.cpp:243
static const fltSemantics & IEEEdouble()
Definition APFloat.h:297
static LLVM_ABI unsigned getSizeInBits(const fltSemantics &Sem)
Returns the size of the floating point number (in bits) in the given semantics.
Definition APFloat.cpp:278
static const fltSemantics & x87DoubleExtended()
Definition APFloat.h:317
static constexpr roundingMode rmTowardNegative
Definition APFloat.h:347
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
static LLVM_ABI bool isValidArbitraryFPFormat(StringRef Format)
Returns true if the given string is a valid arbitrary floating-point format interpretation for llvm....
Definition APFloat.cpp:5999
static LLVM_ABI bool hasSignBitInMSB(const fltSemantics &)
Definition APFloat.cpp:260
static LLVM_ABI ExponentType semanticsMaxExponent(const fltSemantics &)
Definition APFloat.cpp:218
friend class APFloat
Definition APFloat.h:291
static LLVM_ABI unsigned int semanticsPrecision(const fltSemantics &)
Definition APFloat.cpp:214
static LLVM_ABI bool semanticsHasNaN(const fltSemantics &)
Definition APFloat.cpp:251
static LLVM_ABI Semantics SemanticsToEnum(const llvm::fltSemantics &Sem)
Definition APFloat.cpp:145
int32_t ExponentType
A signed type to represent a floating point numbers unbiased exponent.
Definition APFloat.h:155
static constexpr unsigned integerPartWidth
Definition APFloat.h:152
static const fltSemantics & PPCDoubleDoubleLegacy()
Definition APFloat.h:300
APInt::WordType integerPart
Definition APFloat.h:151
static LLVM_ABI bool semanticsHasZero(const fltSemantics &)
Definition APFloat.cpp:239
static LLVM_ABI bool isRepresentableAsNormalIn(const fltSemantics &Src, const fltSemantics &Dst)
Definition APFloat.cpp:264
static const fltSemantics & Float8E5M2FNUZ()
Definition APFloat.h:304
static const fltSemantics & Float8E4M3FNUZ()
Definition APFloat.h:307
static constexpr roundingMode rmTowardPositive
Definition APFloat.h:346
static const fltSemantics & IEEEhalf()
Definition APFloat.h:294
static const fltSemantics & Float4E2M1FN()
Definition APFloat.h:316
static const fltSemantics & Float6E2M3FN()
Definition APFloat.h:315
static const fltSemantics & Float8E4M3()
Definition APFloat.h:305
static const fltSemantics & Float8E4M3B11FNUZ()
Definition APFloat.h:308
static LLVM_ABI bool isRepresentableBy(const fltSemantics &A, const fltSemantics &B)
Definition APFloat.cpp:190
static const fltSemantics & Float8E3M4()
Definition APFloat.h:311
static LLVM_ABI bool isIEEELikeFP(const fltSemantics &)
Definition APFloat.cpp:255
static const fltSemantics & Float8E5M2()
Definition APFloat.h:303
fltCategory
Category of internally-represented number.
Definition APFloat.h:370
static constexpr roundingMode rmNearestTiesToAway
Definition APFloat.h:349
static const fltSemantics & PPCDoubleDouble()
Definition APFloat.h:299
static const fltSemantics & Float6E3M2FN()
Definition APFloat.h:314
opStatus
IEEE-754R 7: Default exception handling.
Definition APFloat.h:360
static LLVM_ABI const fltSemantics * getArbitraryFPSemantics(StringRef Format)
Returns the fltSemantics for a given arbitrary FP format string, or nullptr if invalid.
Definition APFloat.cpp:6007
static const fltSemantics & FloatTF32()
Definition APFloat.h:312
static LLVM_ABI unsigned int semanticsIntSizeInBits(const fltSemantics &, bool)
Definition APFloat.cpp:228
static APFloat getQNaN(const fltSemantics &Sem, bool Negative=false, const APInt *payload=nullptr)
Factory for QNaN values.
Definition APFloat.h:1179
LLVM_ABI void Profile(FoldingSetNodeID &NID) const
Used to insert APFloat objects, or objects that contain APFloat objects, into FoldingSets.
Definition APFloat.cpp:5942
opStatus divide(const APFloat &RHS, roundingMode RM)
Definition APFloat.h:1267
bool isFiniteNonZero() const
Definition APFloat.h:1548
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition APFloat.cpp:5899
LLVM_READONLY int getExactLog2Abs() const
Definition APFloat.h:1594
bool isNegative() const
Definition APFloat.h:1538
LLVM_ABI bool getExactInverse(APFloat *Inv) const
If this value is normal and has an exact, normal, multiplicative inverse, store it in inv and return ...
Definition APFloat.cpp:5841
cmpResult compareAbsoluteValue(const APFloat &RHS) const
Definition APFloat.h:1493
friend DoubleAPFloat
Definition APFloat.h:1610
LLVM_ABI double convertToDouble() const
Converts this APFloat to host double value.
Definition APFloat.cpp:5958
void toString(SmallVectorImpl< char > &Str, unsigned FormatPrecision=0, unsigned FormatMaxPadding=3, bool TruncateZero=true) const
Definition APFloat.h:1575
bool isNormal() const
Definition APFloat.h:1542
bool isDenormal() const
Definition APFloat.h:1539
opStatus add(const APFloat &RHS, roundingMode RM)
Definition APFloat.h:1240
static LLVM_ABI APFloat getAllOnesValue(const fltSemantics &Semantics)
Returns a float which is bitcasted from an all one value int.
Definition APFloat.cpp:5925
LLVM_ABI friend hash_code hash_value(const APFloat &Arg)
See friend declarations above.
Definition APFloat.cpp:5813
const fltSemantics & getSemantics() const
Definition APFloat.h:1546
bool isFinite() const
Definition APFloat.h:1543
bool isNaN() const
Definition APFloat.h:1536
static APFloat getOne(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative One.
Definition APFloat.h:1147
unsigned int convertToHexString(char *DST, unsigned int HexDigits, bool UpperCase, roundingMode RM) const
Definition APFloat.h:1528
LLVM_ABI float convertToFloat() const
Converts this APFloat to host float value.
Definition APFloat.cpp:5986
bool isSignaling() const
Definition APFloat.h:1540
opStatus fusedMultiplyAdd(const APFloat &Multiplicand, const APFloat &Addend, roundingMode RM)
Definition APFloat.h:1294
opStatus remainder(const APFloat &RHS)
Definition APFloat.h:1276
bool isZero() const
Definition APFloat.h:1534
APInt bitcastToAPInt() const
Definition APFloat.h:1430
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition APFloat.h:1391
opStatus next(bool nextDown)
Definition APFloat.h:1313
static APFloat getInf(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Infinity.
Definition APFloat.h:1157
friend APFloat scalbn(APFloat X, int Exp, roundingMode RM)
static APFloat getSmallest(const fltSemantics &Sem, bool Negative=false)
Returns the smallest (by magnitude) finite number in the given semantics.
Definition APFloat.h:1207
LLVM_ABI FPClassTest classify() const
Return the FPClassTest which will return true for the value.
Definition APFloat.cpp:5828
opStatus mod(const APFloat &RHS)
Definition APFloat.h:1285
Expected< opStatus > convertFromString(StringRef, roundingMode)
Fill this APFloat with the result of a string conversion.
Definition APFloat.cpp:5808
friend IEEEFloat
Definition APFloat.h:1609
LLVM_DUMP_METHOD void dump() const
Definition APFloat.cpp:5936
LLVM_ABI void print(raw_ostream &) const
Definition APFloat.cpp:5929
opStatus roundToIntegral(roundingMode RM)
Definition APFloat.h:1307
static bool hasSignificand(const fltSemantics &Sem)
Returns true if the given semantics has actual significand.
Definition APFloat.h:1232
static APFloat getZero(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Zero.
Definition APFloat.h:1138
bool isInfinity() const
Definition APFloat.h:1535
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt udiv(const APInt &RHS) const
Unsigned division operation.
Definition APInt.cpp:1616
static LLVM_ABI void tcSetBit(WordType *, unsigned bit)
Set the given bit of a bignum. Zero-based.
Definition APInt.cpp:2420
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
static LLVM_ABI void tcSet(WordType *, WordType, unsigned)
Sets the least significant part of a bignum to the input value, and zeroes out higher parts.
Definition APInt.cpp:2392
static LLVM_ABI void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
Dual division/remainder interface.
Definition APInt.cpp:1810
static LLVM_ABI int tcExtractBit(const WordType *, unsigned bit)
Extract the given bit of a bignum; returns 0 or 1. Zero-based.
Definition APInt.cpp:2415
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1055
static LLVM_ABI WordType tcAdd(WordType *, const WordType *, WordType carry, unsigned)
DST += RHS + CARRY where CARRY is zero or one. Returns the carry flag.
Definition APInt.cpp:2494
static LLVM_ABI void tcExtract(WordType *, unsigned dstCount, const WordType *, unsigned srcBits, unsigned srcLSB)
Copy the bit vector of width srcBITS from SRC, starting at bit srcLSB, to DST, of dstCOUNT parts,...
Definition APInt.cpp:2464
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition APInt.h:1535
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:968
static LLVM_ABI int tcCompare(const WordType *, const WordType *, unsigned)
Comparison (unsigned) of two bignums.
Definition APInt.cpp:2804
static APInt floatToBits(float V)
Converts a float to APInt bits.
Definition APInt.h:1775
uint64_t WordType
Definition APInt.h:80
static LLVM_ABI void tcAssign(WordType *, const WordType *, unsigned)
Assign one bignum to another.
Definition APInt.cpp:2400
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1511
static LLVM_ABI void tcShiftRight(WordType *, unsigned Words, unsigned Count)
Shift a bignum right Count bits.
Definition APInt.cpp:2778
static LLVM_ABI void tcFullMultiply(WordType *, const WordType *, const WordType *, unsigned, unsigned)
DST = LHS * RHS, where DST has width the sum of the widths of the operands.
Definition APInt.cpp:2684
unsigned getNumWords() const
Get the number of words.
Definition APInt.h:1518
bool isNegative() const
Determine sign of this APInt.
Definition APInt.h:330
static LLVM_ABI void tcClearBit(WordType *, unsigned bit)
Clear the given bit of a bignum. Zero-based.
Definition APInt.cpp:2425
void negate()
Negate this APInt in place.
Definition APInt.h:1491
static WordType tcDecrement(WordType *dst, unsigned parts)
Decrement a bignum in-place. Return the borrow flag.
Definition APInt.h:1941
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1662
static LLVM_ABI unsigned tcLSB(const WordType *, unsigned n)
Returns the bit number of the least or most significant set bit of a number.
Definition APInt.cpp:2431
static LLVM_ABI void tcShiftLeft(WordType *, unsigned Words, unsigned Count)
Shift a bignum left Count bits.
Definition APInt.cpp:2751
static LLVM_ABI bool tcIsZero(const WordType *, unsigned)
Returns true if a bignum is zero, false otherwise.
Definition APInt.cpp:2406
static LLVM_ABI unsigned tcMSB(const WordType *parts, unsigned n)
Returns the bit number of the most significant set bit of a number.
Definition APInt.cpp:2444
float bitsToFloat() const
Converts APInt bits to a float.
Definition APInt.h:1759
static LLVM_ABI int tcMultiplyPart(WordType *dst, const WordType *src, WordType multiplier, WordType carry, unsigned srcParts, unsigned dstParts, bool add)
DST += SRC * MULTIPLIER + PART if add is true DST = SRC * MULTIPLIER + PART if add is false.
Definition APInt.cpp:2582
static constexpr unsigned APINT_BITS_PER_WORD
Bits in a word.
Definition APInt.h:86
static LLVM_ABI WordType tcSubtract(WordType *, const WordType *, WordType carry, unsigned)
DST -= RHS + CARRY where CARRY is zero or one. Returns the carry flag.
Definition APInt.cpp:2529
static LLVM_ABI void tcNegate(WordType *, unsigned)
Negate a bignum in-place.
Definition APInt.cpp:2568
static APInt doubleToBits(double V)
Converts a double to APInt bits.
Definition APInt.h:1767
static WordType tcIncrement(WordType *dst, unsigned parts)
Increment a bignum in-place. Return the carry flag.
Definition APInt.h:1936
double bitsToDouble() const
Converts APInt bits to a double.
Definition APInt.h:1745
const uint64_t * getRawData() const
This function returns a pointer to the internal storage of the APInt.
Definition APInt.h:576
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition APInt.h:865
An arbitrary precision integer that knows its signedness.
Definition APSInt.h:24
bool isSigned() const
Definition APSInt.h:78
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
Get the array size.
Definition ArrayRef.h:141
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
static ErrorSuccess success()
Create a success value.
Definition Error.h:336
Tagged union holding either a T or a Error.
Definition Error.h:485
This class is used to gather all the unique data bits of a node.
Definition FoldingSet.h:208
void Add(const T &x)
Definition FoldingSet.h:248
Represent a mutable reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:294
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
iterator erase(const_iterator CI)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition StringRef.h:882
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition StringRef.h:490
const char * iterator
Definition StringRef.h:60
constexpr bool empty() const
Check if the string is empty.
Definition StringRef.h:141
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition StringRef.h:629
iterator begin() const
Definition StringRef.h:114
char back() const
Get the last character in the string.
Definition StringRef.h:153
StringRef slice(size_t Start, size_t End) const
Return a reference to the substring from [Start, End).
Definition StringRef.h:714
constexpr size_t size() const
Get the string size.
Definition StringRef.h:144
char front() const
Get the first character in the string.
Definition StringRef.h:147
iterator end() const
Definition StringRef.h:116
bool consume_front(char Prefix)
Returns true if this StringRef has the given prefix and removes that prefix.
Definition StringRef.h:655
bool consume_front_insensitive(StringRef Prefix)
Returns true if this StringRef has the given prefix, ignoring case, and removes that prefix.
Definition StringRef.h:675
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
LLVM_ABI void makeSmallestNormalized(bool Neg)
Definition APFloat.cpp:5155
LLVM_ABI DoubleAPFloat & operator=(const DoubleAPFloat &RHS)
Definition APFloat.cpp:4685
LLVM_ABI void changeSign()
Definition APFloat.cpp:5062
LLVM_ABI bool isLargest() const
Definition APFloat.cpp:5629
LLVM_ABI opStatus remainder(const DoubleAPFloat &RHS)
Definition APFloat.cpp:4949
LLVM_ABI opStatus multiply(const DoubleAPFloat &RHS, roundingMode RM)
Definition APFloat.cpp:4852
LLVM_ABI fltCategory getCategory() const
Definition APFloat.cpp:5121
LLVM_ABI bool bitwiseIsEqual(const DoubleAPFloat &RHS) const
Definition APFloat.cpp:5178
LLVM_ABI LLVM_READONLY int getExactLog2Abs() const
Definition APFloat.cpp:5653
LLVM_ABI opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition APFloat.cpp:5580
LLVM_ABI APInt bitcastToAPInt() const
Definition APFloat.cpp:5189
LLVM_ABI Expected< opStatus > convertFromString(StringRef, roundingMode)
Definition APFloat.cpp:5199
LLVM_ABI bool isSmallest() const
Definition APFloat.cpp:5612
LLVM_ABI opStatus subtract(const DoubleAPFloat &RHS, roundingMode RM)
Definition APFloat.cpp:4844
LLVM_ABI friend hash_code hash_value(const DoubleAPFloat &Arg)
Definition APFloat.cpp:5183
LLVM_ABI cmpResult compareAbsoluteValue(const DoubleAPFloat &RHS) const
Definition APFloat.cpp:5068
LLVM_ABI bool isDenormal() const
Definition APFloat.cpp:5605
LLVM_ABI opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition APFloat.cpp:5416
LLVM_ABI void makeSmallest(bool Neg)
Definition APFloat.cpp:5148
LLVM_ABI friend int ilogb(const DoubleAPFloat &X)
Definition APFloat.cpp:5662
LLVM_ABI opStatus next(bool nextDown)
Definition APFloat.cpp:5215
LLVM_ABI void makeInf(bool Neg)
Definition APFloat.cpp:5127
LLVM_ABI bool isInteger() const
Definition APFloat.cpp:5637
LLVM_ABI void makeZero(bool Neg)
Definition APFloat.cpp:5132
LLVM_ABI opStatus divide(const DoubleAPFloat &RHS, roundingMode RM)
Definition APFloat.cpp:4938
LLVM_ABI bool isSmallestNormalized() const
Definition APFloat.cpp:5620
LLVM_ABI opStatus mod(const DoubleAPFloat &RHS)
Definition APFloat.cpp:4959
LLVM_ABI DoubleAPFloat(const fltSemantics &S)
Definition APFloat.cpp:4632
LLVM_ABI void toString(SmallVectorImpl< char > &Str, unsigned FormatPrecision, unsigned FormatMaxPadding, bool TruncateZero=true) const
Definition APFloat.cpp:5643
LLVM_ABI void makeLargest(bool Neg)
Definition APFloat.cpp:5137
LLVM_ABI cmpResult compare(const DoubleAPFloat &RHS) const
Definition APFloat.cpp:5170
LLVM_ABI friend DoubleAPFloat scalbn(const DoubleAPFloat &X, int Exp, roundingMode)
LLVM_ABI opStatus roundToIntegral(roundingMode RM)
Definition APFloat.cpp:4985
LLVM_ABI opStatus fusedMultiplyAdd(const DoubleAPFloat &Multiplicand, const DoubleAPFloat &Addend, roundingMode RM)
Definition APFloat.cpp:4970
LLVM_ABI unsigned int convertToHexString(char *DST, unsigned int HexDigits, bool UpperCase, roundingMode RM) const
Definition APFloat.cpp:5595
LLVM_ABI bool isNegative() const
Definition APFloat.cpp:5125
LLVM_ABI opStatus add(const DoubleAPFloat &RHS, roundingMode RM)
Definition APFloat.cpp:4839
LLVM_ABI void makeNaN(bool SNaN, bool Neg, const APInt *fill)
Definition APFloat.cpp:5165
LLVM_ABI unsigned int convertToHexString(char *dst, unsigned int hexDigits, bool upperCase, roundingMode) const
Write out a hexadecimal representation of the floating point value to DST, which must be of sufficien...
Definition APFloat.cpp:3175
LLVM_ABI cmpResult compareAbsoluteValue(const IEEEFloat &) const
Definition APFloat.cpp:1424
LLVM_ABI opStatus mod(const IEEEFloat &)
C fmod, or llvm frem.
Definition APFloat.cpp:2180
fltCategory getCategory() const
Definition APFloat.h:582
LLVM_ABI opStatus convertFromAPInt(const APInt &, bool, roundingMode)
Definition APFloat.cpp:2735
APInt getNaNPayload() const
Definition APFloat.cpp:4520
bool isFiniteNonZero() const
Definition APFloat.h:585
bool needsCleanup() const
Returns whether this instance allocated memory.
Definition APFloat.h:472
LLVM_ABI void makeLargest(bool Neg=false)
Make this number the largest magnitude normal number in the given semantics.
Definition APFloat.cpp:3947
LLVM_ABI LLVM_READONLY int getExactLog2Abs() const
Definition APFloat.cpp:4342
LLVM_ABI APInt bitcastToAPInt() const
Definition APFloat.cpp:3573
LLVM_ABI friend IEEEFloat scalbn(IEEEFloat X, int Exp, roundingMode)
Definition APFloat.cpp:4592
LLVM_ABI cmpResult compare(const IEEEFloat &) const
IEEE comparison with another floating point number (NaNs compare unordered, 0==-0).
Definition APFloat.cpp:2348
bool isNegative() const
IEEE-754R isSignMinus: Returns true if and only if the current value is negative.
Definition APFloat.h:547
LLVM_ABI opStatus divide(const IEEEFloat &, roundingMode)
Definition APFloat.cpp:2054
bool isNaN() const
Returns true if and only if the float is a quiet or signaling NaN.
Definition APFloat.h:572
LLVM_ABI opStatus remainder(const IEEEFloat &)
IEEE remainder.
Definition APFloat.cpp:2072
LLVM_ABI double convertToDouble() const
Definition APFloat.cpp:3643
LLVM_ABI float convertToFloat() const
Definition APFloat.cpp:3636
LLVM_ABI opStatus subtract(const IEEEFloat &, roundingMode)
Definition APFloat.cpp:2030
LLVM_ABI void toString(SmallVectorImpl< char > &Str, unsigned FormatPrecision=0, unsigned FormatMaxPadding=3, bool TruncateZero=true) const
Converts this value into a decimal string.
Definition APFloat.cpp:4298
LLVM_ABI void makeSmallest(bool Neg=false)
Make this number the smallest magnitude denormal number in the given semantics.
Definition APFloat.cpp:3979
LLVM_ABI void makeInf(bool Neg=false)
Definition APFloat.cpp:4539
LLVM_ABI bool isSmallestNormalized() const
Returns true if this is the smallest (by magnitude) normalized finite number in the given semantics.
Definition APFloat.cpp:945
LLVM_ABI void makeQuiet()
Definition APFloat.cpp:4568
LLVM_ABI bool isLargest() const
Returns true if and only if the number has the largest possible finite magnitude in the current seman...
Definition APFloat.cpp:1047
LLVM_ABI opStatus add(const IEEEFloat &, roundingMode)
Definition APFloat.cpp:2024
bool isFinite() const
Returns true if and only if the current value is zero, subnormal, or normal.
Definition APFloat.h:559
LLVM_ABI Expected< opStatus > convertFromString(StringRef, roundingMode)
Definition APFloat.cpp:3118
LLVM_ABI void makeNaN(bool SNaN=false, bool Neg=false, const APInt *fill=nullptr)
Definition APFloat.cpp:834
LLVM_ABI opStatus multiply(const IEEEFloat &, roundingMode)
Definition APFloat.cpp:2036
LLVM_ABI opStatus roundToIntegral(roundingMode)
Definition APFloat.cpp:2263
LLVM_ABI IEEEFloat & operator=(const IEEEFloat &)
Definition APFloat.cpp:906
LLVM_ABI bool bitwiseIsEqual(const IEEEFloat &) const
Bitwise comparison for equality (QNaNs compare equal, 0!=-0).
Definition APFloat.cpp:1072
LLVM_ABI void makeSmallestNormalized(bool Negative=false)
Returns the smallest (by magnitude) normalized finite number in the given semantics.
Definition APFloat.cpp:3993
LLVM_ABI bool isInteger() const
Returns true if and only if the number is an exact integer.
Definition APFloat.cpp:1064
LLVM_ABI IEEEFloat(const fltSemantics &)
Definition APFloat.cpp:1099
LLVM_ABI opStatus fusedMultiplyAdd(const IEEEFloat &, const IEEEFloat &, roundingMode)
Definition APFloat.cpp:2217
LLVM_ABI friend int ilogb(const IEEEFloat &Arg)
Definition APFloat.cpp:4574
LLVM_ABI opStatus next(bool nextDown)
IEEE-754R 5.3.1: nextUp/nextDown.
Definition APFloat.cpp:4387
bool isInfinity() const
IEEE-754R isInfinite(): Returns true if and only if the float is infinity.
Definition APFloat.h:569
const fltSemantics & getSemantics() const
Definition APFloat.h:583
bool isZero() const
Returns true if and only if the float is plus or minus zero.
Definition APFloat.h:562
LLVM_ABI bool isSignaling() const
Returns true if and only if the float is a signaling NaN.
Definition APFloat.cpp:4371
LLVM_ABI void makeZero(bool Neg=false)
Definition APFloat.cpp:4554
LLVM_ABI opStatus convert(const fltSemantics &, roundingMode, bool *)
IEEEFloat::convert - convert a value of one floating point type to another.
Definition APFloat.cpp:2424
LLVM_ABI void changeSign()
Definition APFloat.cpp:1982
LLVM_ABI bool isDenormal() const
IEEE-754R isSubnormal(): Returns true if and only if the float is a denormal.
Definition APFloat.cpp:931
LLVM_ABI opStatus convertToInteger(MutableArrayRef< integerPart >, unsigned int, bool, roundingMode, bool *) const
Definition APFloat.cpp:2680
LLVM_ABI bool isSmallest() const
Returns true if and only if the number has the smallest possible non-zero magnitude in the current se...
Definition APFloat.cpp:937
An opaque object representing a hash code.
Definition Hashing.h:78
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
static constexpr opStatus opInexact
Definition APFloat.h:448
LLVM_ABI SlowDynamicAPInt abs(const SlowDynamicAPInt &X)
Redeclarations of friend declarations above to make it discoverable by lookups.
static constexpr fltCategory fcNaN
Definition APFloat.h:450
static constexpr opStatus opDivByZero
Definition APFloat.h:445
static constexpr opStatus opOverflow
Definition APFloat.h:446
static constexpr cmpResult cmpLessThan
Definition APFloat.h:440
const char unit< Period >::value[]
Definition Chrono.h:104
static void tcSetLeastSignificantBits(APInt::WordType *dst, unsigned parts, unsigned bits)
Definition APFloat.cpp:1447
static constexpr roundingMode rmTowardPositive
Definition APFloat.h:436
static constexpr uninitializedTag uninitialized
Definition APFloat.h:430
static constexpr fltCategory fcZero
Definition APFloat.h:452
static constexpr opStatus opOK
Definition APFloat.h:443
static constexpr cmpResult cmpGreaterThan
Definition APFloat.h:441
static constexpr unsigned integerPartWidth
Definition APFloat.h:438
LLVM_ABI hash_code hash_value(const IEEEFloat &Arg)
Definition APFloat.cpp:3315
APFloatBase::ExponentType ExponentType
Definition APFloat.h:429
static constexpr fltCategory fcNormal
Definition APFloat.h:451
static constexpr opStatus opInvalidOp
Definition APFloat.h:444
APFloatBase::opStatus opStatus
Definition APFloat.h:426
LLVM_ABI IEEEFloat frexp(const IEEEFloat &Val, int &Exp, roundingMode RM)
Definition APFloat.cpp:4613
APFloatBase::uninitializedTag uninitializedTag
Definition APFloat.h:424
static constexpr cmpResult cmpUnordered
Definition APFloat.h:442
static constexpr roundingMode rmTowardNegative
Definition APFloat.h:435
APFloatBase::roundingMode roundingMode
Definition APFloat.h:425
APFloatBase::cmpResult cmpResult
Definition APFloat.h:427
static constexpr fltCategory fcInfinity
Definition APFloat.h:449
static constexpr roundingMode rmNearestTiesToAway
Definition APFloat.h:433
static constexpr roundingMode rmTowardZero
Definition APFloat.h:437
static constexpr opStatus opUnderflow
Definition APFloat.h:447
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:431
LLVM_ABI int ilogb(const IEEEFloat &Arg)
Definition APFloat.cpp:4574
static constexpr cmpResult cmpEqual
Definition APFloat.h:439
LLVM_ABI IEEEFloat scalbn(IEEEFloat X, int Exp, roundingMode)
Definition APFloat.cpp:4592
static std::pair< APFloat, APFloat > fastTwoSum(APFloat X, APFloat Y)
Definition APFloat.cpp:4702
APFloatBase::integerPart integerPart
Definition APFloat.h:423
LLVM_ABI std::error_code status(const Twine &path, file_status &result, bool follow=true)
Get file status as if by POSIX stat().
This is an optimization pass for GlobalISel generic memory operations.
static unsigned int partAsHex(char *dst, APFloatBase::integerPart part, unsigned int count, const char *hexDigitChars)
Definition APFloat.cpp:731
void fill(R &&Range, T &&Value)
Provide wrappers to std::fill which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1758
static const char infinityL[]
Definition APFloat.cpp:722
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
static constexpr unsigned int partCountForBits(unsigned int bits)
Definition APFloat.cpp:309
static const char NaNU[]
Definition APFloat.cpp:725
static unsigned int HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
Definition APFloat.cpp:607
static unsigned int powerOf5(APFloatBase::integerPart *dst, unsigned int power)
Definition APFloat.cpp:666
unsigned hexDigitValue(char C)
Interpret the given character C as a hexadecimal digit and return its value.
static APFloat harrisonUlp(const APFloat &X)
Definition APFloat.cpp:778
static constexpr APFloatBase::ExponentType exponentZero(const fltSemantics &semantics)
Definition APFloat.cpp:283
static Expected< int > totalExponent(StringRef::iterator p, StringRef::iterator end, int exponentAdjustment)
Definition APFloat.cpp:366
LLVM_ABI std::error_code inconvertibleErrorCode()
The value returned by this function can be returned from convertToErrorCode for Error values where no...
Definition Error.cpp:94
const unsigned int maxPowerOfFiveExponent
Definition APFloat.cpp:209
int ilogb(const APFloat &Arg)
Returns the exponent of the internal representation of the APFloat.
Definition APFloat.h:1631
static char * writeUnsignedDecimal(char *dst, unsigned int n)
Definition APFloat.cpp:748
constexpr auto equal_to(T &&Arg)
Functor variant of std::equal_to that can be used as a UnaryPredicate in functional algorithms like a...
Definition STLExtras.h:2172
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition bit.h:156
const unsigned int maxPrecision
Definition APFloat.cpp:208
APFloat frexp(const APFloat &X, int &Exp, APFloat::roundingMode RM)
Equivalent of C standard library function.
Definition APFloat.h:1652
static const char NaNL[]
Definition APFloat.cpp:724
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:204
static const char infinityU[]
Definition APFloat.cpp:723
lostFraction
Enum that represents what fraction of the LSB truncated bits of an fp number represent.
Definition APFloat.h:50
@ lfMoreThanHalf
Definition APFloat.h:54
@ lfLessThanHalf
Definition APFloat.h:52
@ lfExactlyHalf
Definition APFloat.h:53
@ lfExactlyZero
Definition APFloat.h:51
static Error interpretDecimal(StringRef::iterator begin, StringRef::iterator end, decimalInfo *D)
Definition APFloat.cpp:456
LLVM_ABI bool isFinite(const Loop *L)
Return true if this loop can be assumed to run for a finite number of iterations.
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
const unsigned int maxPowerOfFiveParts
Definition APFloat.cpp:210
APFloat scalbn(APFloat X, int Exp, APFloat::roundingMode RM)
Returns: X * 2^Exp for integral exponents.
Definition APFloat.h:1640
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
static constexpr APFloatBase::ExponentType exponentNaN(const fltSemantics &semantics)
Definition APFloat.cpp:293
static Error createError(const Twine &Err)
Definition APFloat.cpp:305
static lostFraction shiftRight(APFloatBase::integerPart *dst, unsigned int parts, unsigned int bits)
Definition APFloat.cpp:575
Error make_error(ArgTs &&... Args)
Make a Error instance representing failure using the given error info type.
Definition Error.h:340
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:74
static const char hexDigitsUpper[]
Definition APFloat.cpp:721
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:221
const unsigned int maxExponent
Definition APFloat.cpp:207
static unsigned int decDigitValue(unsigned int c)
Definition APFloat.cpp:316
fltNonfiniteBehavior
Definition APFloat.h:952
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:2011
static lostFraction combineLostFractions(lostFraction moreSignificant, lostFraction lessSignificant)
Definition APFloat.cpp:586
static Expected< StringRef::iterator > skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end, StringRef::iterator *dot)
Definition APFloat.cpp:416
RoundingMode
Rounding mode.
ArrayRef(const T &OneElt) -> ArrayRef< T >
static constexpr APFloatBase::ExponentType exponentInf(const fltSemantics &semantics)
Definition APFloat.cpp:288
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1946
static lostFraction lostFractionThroughTruncation(const APFloatBase::integerPart *parts, unsigned int partCount, unsigned int bits)
Definition APFloat.cpp:555
APFloat neg(APFloat X)
Returns the negated value of the argument.
Definition APFloat.h:1666
static APFloatBase::integerPart ulpsFromBoundary(const APFloatBase::integerPart *parts, unsigned int bits, bool isNearest)
Definition APFloat.cpp:621
static char * writeSignedDecimal(char *dst, int value)
Definition APFloat.cpp:764
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
Definition Hashing.h:325
static Expected< lostFraction > trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end, unsigned int digitValue)
Definition APFloat.cpp:526
void consumeError(Error Err)
Consume a Error without doing anything.
Definition Error.h:1106
static Expected< int > readExponent(StringRef::iterator begin, StringRef::iterator end)
Definition APFloat.cpp:326
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
Definition Hashing.h:305
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition MathExtras.h:373
static const char hexDigitsLower[]
Definition APFloat.cpp:720
#define N
const char * lastSigDigit
Definition APFloat.cpp:451
const char * firstSigDigit
Definition APFloat.cpp:450
APFloatBase::ExponentType maxExponent
Definition APFloat.h:1000
fltNonfiniteBehavior nonFiniteBehavior
Definition APFloat.h:1013
APFloatBase::ExponentType minExponent
Definition APFloat.h:1004
unsigned int sizeInBits
Definition APFloat.h:1011
unsigned int precision
Definition APFloat.h:1008
fltNanEncoding nanEncoding
Definition APFloat.h:1015