LLVM 17.0.0git
APFloat.cpp
Go to the documentation of this file.
1//===-- APFloat.cpp - Implement APFloat class -----------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements a class to represent arbitrary precision floating
10// point values and provide a variety of arithmetic operations on them.
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/ADT/APFloat.h"
15#include "llvm/ADT/APSInt.h"
16#include "llvm/ADT/ArrayRef.h"
17#include "llvm/ADT/FoldingSet.h"
18#include "llvm/ADT/Hashing.h"
20#include "llvm/ADT/StringRef.h"
21#include "llvm/Config/llvm-config.h"
22#include "llvm/Support/Debug.h"
23#include "llvm/Support/Error.h"
26#include <cstring>
27#include <limits.h>
28
29#define APFLOAT_DISPATCH_ON_SEMANTICS(METHOD_CALL) \
30 do { \
31 if (usesLayout<IEEEFloat>(getSemantics())) \
32 return U.IEEE.METHOD_CALL; \
33 if (usesLayout<DoubleAPFloat>(getSemantics())) \
34 return U.Double.METHOD_CALL; \
35 llvm_unreachable("Unexpected semantics"); \
36 } while (false)
37
38using namespace llvm;
39
40/// A macro used to combine two fcCategory enums into one key which can be used
41/// in a switch statement to classify how the interaction of two APFloat's
42/// categories affects an operation.
43///
44/// TODO: If clang source code is ever allowed to use constexpr in its own
45/// codebase, change this into a static inline function.
46#define PackCategoriesIntoKey(_lhs, _rhs) ((_lhs) * 4 + (_rhs))
47
48/* Assumed in hexadecimal significand parsing, and conversion to
49 hexadecimal strings. */
50static_assert(APFloatBase::integerPartWidth % 4 == 0, "Part width must be divisible by 4!");
51
52namespace llvm {
53
54 // How the nonfinite values Inf and NaN are represented.
56 // Represents standard IEEE 754 behavior. A value is nonfinite if the
57 // exponent field is all 1s. In such cases, a value is Inf if the
58 // significand bits are all zero, and NaN otherwise
59 IEEE754,
60
61 // Only the Float8E5M2 has this behavior. There is no Inf representation. A
62 // value is NaN if the exponent field and the mantissa field are all 1s.
63 // This behavior matches the FP8 E4M3 type described in
64 // https://arxiv.org/abs/2209.05433. We treat both signed and unsigned NaNs
65 // as non-signalling, although the paper does not state whether the NaN
66 // values are signalling or not.
67 NanOnly,
68 };
69
70 /* Represents floating point arithmetic semantics. */
71 struct fltSemantics {
72 /* The largest E such that 2^E is representable; this matches the
73 definition of IEEE 754. */
75
76 /* The smallest E such that 2^E is a normalized number; this
77 matches the definition of IEEE 754. */
79
80 /* Number of bits in the significand. This includes the integer
81 bit. */
82 unsigned int precision;
83
84 /* Number of bits actually used in the semantics. */
85 unsigned int sizeInBits;
86
88
89 // Returns true if any number described by this semantics can be precisely
90 // represented by the specified semantics. Does not take into account
91 // the value of fltNonfiniteBehavior.
92 bool isRepresentableBy(const fltSemantics &S) const {
93 return maxExponent <= S.maxExponent && minExponent >= S.minExponent &&
95 }
96 };
97
98 static const fltSemantics semIEEEhalf = {15, -14, 11, 16};
99 static const fltSemantics semBFloat = {127, -126, 8, 16};
100 static const fltSemantics semIEEEsingle = {127, -126, 24, 32};
101 static const fltSemantics semIEEEdouble = {1023, -1022, 53, 64};
102 static const fltSemantics semIEEEquad = {16383, -16382, 113, 128};
103 static const fltSemantics semFloat8E5M2 = {15, -14, 3, 8};
104 static const fltSemantics semFloat8E4M3FN = {8, -6, 4, 8,
106 static const fltSemantics semX87DoubleExtended = {16383, -16382, 64, 80};
107 static const fltSemantics semBogus = {0, 0, 0, 0};
108
109 /* The IBM double-double semantics. Such a number consists of a pair of IEEE
110 64-bit doubles (Hi, Lo), where |Hi| > |Lo|, and if normal,
111 (double)(Hi + Lo) == Hi. The numeric value it's modeling is Hi + Lo.
112 Therefore it has two 53-bit mantissa parts that aren't necessarily adjacent
113 to each other, and two 11-bit exponents.
114
115 Note: we need to make the value different from semBogus as otherwise
116 an unsafe optimization may collapse both values to a single address,
117 and we heavily rely on them having distinct addresses. */
118 static const fltSemantics semPPCDoubleDouble = {-1, 0, 0, 128};
119
120 /* These are legacy semantics for the fallback, inaccrurate implementation of
121 IBM double-double, if the accurate semPPCDoubleDouble doesn't handle the
122 operation. It's equivalent to having an IEEE number with consecutive 106
123 bits of mantissa and 11 bits of exponent.
124
125 It's not equivalent to IBM double-double. For example, a legit IBM
126 double-double, 1 + epsilon:
127
128 1 + epsilon = 1 + (1 >> 1076)
129
130 is not representable by a consecutive 106 bits of mantissa.
131
132 Currently, these semantics are used in the following way:
133
134 semPPCDoubleDouble -> (IEEEdouble, IEEEdouble) ->
135 (64-bit APInt, 64-bit APInt) -> (128-bit APInt) ->
136 semPPCDoubleDoubleLegacy -> IEEE operations
137
138 We use bitcastToAPInt() to get the bit representation (in APInt) of the
139 underlying IEEEdouble, then use the APInt constructor to construct the
140 legacy IEEE float.
141
142 TODO: Implement all operations in semPPCDoubleDouble, and delete these
143 semantics. */
144 static const fltSemantics semPPCDoubleDoubleLegacy = {1023, -1022 + 53,
145 53 + 53, 128};
146
148 switch (S) {
149 case S_IEEEhalf:
150 return IEEEhalf();
151 case S_BFloat:
152 return BFloat();
153 case S_IEEEsingle:
154 return IEEEsingle();
155 case S_IEEEdouble:
156 return IEEEdouble();
157 case S_IEEEquad:
158 return IEEEquad();
160 return PPCDoubleDouble();
161 case S_Float8E5M2:
162 return Float8E5M2();
163 case S_Float8E4M3FN:
164 return Float8E4M3FN();
166 return x87DoubleExtended();
167 }
168 llvm_unreachable("Unrecognised floating semantics");
169 }
170
173 if (&Sem == &llvm::APFloat::IEEEhalf())
174 return S_IEEEhalf;
175 else if (&Sem == &llvm::APFloat::BFloat())
176 return S_BFloat;
177 else if (&Sem == &llvm::APFloat::IEEEsingle())
178 return S_IEEEsingle;
179 else if (&Sem == &llvm::APFloat::IEEEdouble())
180 return S_IEEEdouble;
181 else if (&Sem == &llvm::APFloat::IEEEquad())
182 return S_IEEEquad;
183 else if (&Sem == &llvm::APFloat::PPCDoubleDouble())
184 return S_PPCDoubleDouble;
185 else if (&Sem == &llvm::APFloat::Float8E5M2())
186 return S_Float8E5M2;
187 else if (&Sem == &llvm::APFloat::Float8E4M3FN())
188 return S_Float8E4M3FN;
189 else if (&Sem == &llvm::APFloat::x87DoubleExtended())
190 return S_x87DoubleExtended;
191 else
192 llvm_unreachable("Unknown floating semantics");
193 }
194
196 return semIEEEhalf;
197 }
199 return semBFloat;
200 }
202 return semIEEEsingle;
203 }
205 return semIEEEdouble;
206 }
209 return semPPCDoubleDouble;
210 }
215 }
217
223
224 /* A tight upper bound on number of parts required to hold the value
225 pow(5, power) is
226
227 power * 815 / (351 * integerPartWidth) + 1
228
229 However, whilst the result may require only this many parts,
230 because we are multiplying two values to get it, the
231 multiplication may require an extra part with the excess part
232 being zero (consider the trivial case of 1 * 1, tcFullMultiply
233 requires two parts to hold the single-part result). So we add an
234 extra one to guarantee enough space whilst multiplying. */
235 const unsigned int maxExponent = 16383;
236 const unsigned int maxPrecision = 113;
239
240 unsigned int APFloatBase::semanticsPrecision(const fltSemantics &semantics) {
241 return semantics.precision;
242 }
245 return semantics.maxExponent;
246 }
249 return semantics.minExponent;
250 }
251 unsigned int APFloatBase::semanticsSizeInBits(const fltSemantics &semantics) {
252 return semantics.sizeInBits;
253 }
255 bool isSigned) {
256 // The max FP value is pow(2, MaxExponent) * (1 + MaxFraction), so we need
257 // at least one more bit than the MaxExponent to hold the max FP value.
258 unsigned int MinBitWidth = semanticsMaxExponent(semantics) + 1;
259 // Extra sign bit needed.
260 if (isSigned)
261 ++MinBitWidth;
262 return MinBitWidth;
263 }
264
266 return Sem.sizeInBits;
267}
268
269/* A bunch of private, handy routines. */
270
271static inline Error createError(const Twine &Err) {
272 return make_error<StringError>(Err, inconvertibleErrorCode());
273}
274
275static inline unsigned int
277{
279}
280
281/* Returns 0U-9U. Return values >= 10U are not digits. */
282static inline unsigned int
283decDigitValue(unsigned int c)
284{
285 return c - '0';
286}
287
288/* Return the value of a decimal exponent of the form
289 [+-]ddddddd.
290
291 If the exponent overflows, returns a large exponent with the
292 appropriate sign. */
295 bool isNegative;
296 unsigned int absExponent;
297 const unsigned int overlargeExponent = 24000; /* FIXME. */
298 StringRef::iterator p = begin;
299
300 // Treat no exponent as 0 to match binutils
301 if (p == end || ((*p == '-' || *p == '+') && (p + 1) == end)) {
302 return 0;
303 }
304
305 isNegative = (*p == '-');
306 if (*p == '-' || *p == '+') {
307 p++;
308 if (p == end)
309 return createError("Exponent has no digits");
310 }
311
312 absExponent = decDigitValue(*p++);
313 if (absExponent >= 10U)
314 return createError("Invalid character in exponent");
315
316 for (; p != end; ++p) {
317 unsigned int value;
318
319 value = decDigitValue(*p);
320 if (value >= 10U)
321 return createError("Invalid character in exponent");
322
323 absExponent = absExponent * 10U + value;
324 if (absExponent >= overlargeExponent) {
325 absExponent = overlargeExponent;
326 break;
327 }
328 }
329
330 if (isNegative)
331 return -(int) absExponent;
332 else
333 return (int) absExponent;
334}
335
336/* This is ugly and needs cleaning up, but I don't immediately see
337 how whilst remaining safe. */
340 int exponentAdjustment) {
341 int unsignedExponent;
342 bool negative, overflow;
343 int exponent = 0;
344
345 if (p == end)
346 return createError("Exponent has no digits");
347
348 negative = *p == '-';
349 if (*p == '-' || *p == '+') {
350 p++;
351 if (p == end)
352 return createError("Exponent has no digits");
353 }
354
355 unsignedExponent = 0;
356 overflow = false;
357 for (; p != end; ++p) {
358 unsigned int value;
359
360 value = decDigitValue(*p);
361 if (value >= 10U)
362 return createError("Invalid character in exponent");
363
364 unsignedExponent = unsignedExponent * 10 + value;
365 if (unsignedExponent > 32767) {
366 overflow = true;
367 break;
368 }
369 }
370
371 if (exponentAdjustment > 32767 || exponentAdjustment < -32768)
372 overflow = true;
373
374 if (!overflow) {
375 exponent = unsignedExponent;
376 if (negative)
377 exponent = -exponent;
378 exponent += exponentAdjustment;
379 if (exponent > 32767 || exponent < -32768)
380 overflow = true;
381 }
382
383 if (overflow)
384 exponent = negative ? -32768: 32767;
385
386 return exponent;
387}
388
391 StringRef::iterator *dot) {
392 StringRef::iterator p = begin;
393 *dot = end;
394 while (p != end && *p == '0')
395 p++;
396
397 if (p != end && *p == '.') {
398 *dot = p++;
399
400 if (end - begin == 1)
401 return createError("Significand has no digits");
402
403 while (p != end && *p == '0')
404 p++;
405 }
406
407 return p;
408}
409
410/* Given a normal decimal floating point number of the form
411
412 dddd.dddd[eE][+-]ddd
413
414 where the decimal point and exponent are optional, fill out the
415 structure D. Exponent is appropriate if the significand is
416 treated as an integer, and normalizedExponent if the significand
417 is taken to have the decimal point after a single leading
418 non-zero digit.
419
420 If the value is zero, V->firstSigDigit points to a non-digit, and
421 the return exponent is zero.
422*/
424 const char *firstSigDigit;
425 const char *lastSigDigit;
428};
429
432 StringRef::iterator dot = end;
433
434 auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot);
435 if (!PtrOrErr)
436 return PtrOrErr.takeError();
437 StringRef::iterator p = *PtrOrErr;
438
439 D->firstSigDigit = p;
440 D->exponent = 0;
441 D->normalizedExponent = 0;
442
443 for (; p != end; ++p) {
444 if (*p == '.') {
445 if (dot != end)
446 return createError("String contains multiple dots");
447 dot = p++;
448 if (p == end)
449 break;
450 }
451 if (decDigitValue(*p) >= 10U)
452 break;
453 }
454
455 if (p != end) {
456 if (*p != 'e' && *p != 'E')
457 return createError("Invalid character in significand");
458 if (p == begin)
459 return createError("Significand has no digits");
460 if (dot != end && p - begin == 1)
461 return createError("Significand has no digits");
462
463 /* p points to the first non-digit in the string */
464 auto ExpOrErr = readExponent(p + 1, end);
465 if (!ExpOrErr)
466 return ExpOrErr.takeError();
467 D->exponent = *ExpOrErr;
468
469 /* Implied decimal point? */
470 if (dot == end)
471 dot = p;
472 }
473
474 /* If number is all zeroes accept any exponent. */
475 if (p != D->firstSigDigit) {
476 /* Drop insignificant trailing zeroes. */
477 if (p != begin) {
478 do
479 do
480 p--;
481 while (p != begin && *p == '0');
482 while (p != begin && *p == '.');
483 }
484
485 /* Adjust the exponents for any decimal point. */
486 D->exponent += static_cast<APFloat::ExponentType>((dot - p) - (dot > p));
487 D->normalizedExponent = (D->exponent +
488 static_cast<APFloat::ExponentType>((p - D->firstSigDigit)
489 - (dot > D->firstSigDigit && dot < p)));
490 }
491
492 D->lastSigDigit = p;
493 return Error::success();
494}
495
496/* Return the trailing fraction of a hexadecimal number.
497 DIGITVALUE is the first hex digit of the fraction, P points to
498 the next digit. */
501 unsigned int digitValue) {
502 unsigned int hexDigit;
503
504 /* If the first trailing digit isn't 0 or 8 we can work out the
505 fraction immediately. */
506 if (digitValue > 8)
507 return lfMoreThanHalf;
508 else if (digitValue < 8 && digitValue > 0)
509 return lfLessThanHalf;
510
511 // Otherwise we need to find the first non-zero digit.
512 while (p != end && (*p == '0' || *p == '.'))
513 p++;
514
515 if (p == end)
516 return createError("Invalid trailing hexadecimal fraction!");
517
518 hexDigit = hexDigitValue(*p);
519
520 /* If we ran off the end it is exactly zero or one-half, otherwise
521 a little more. */
522 if (hexDigit == -1U)
523 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
524 else
525 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
526}
527
528/* Return the fraction lost were a bignum truncated losing the least
529 significant BITS bits. */
530static lostFraction
532 unsigned int partCount,
533 unsigned int bits)
534{
535 unsigned int lsb;
536
537 lsb = APInt::tcLSB(parts, partCount);
538
539 /* Note this is guaranteed true if bits == 0, or LSB == -1U. */
540 if (bits <= lsb)
541 return lfExactlyZero;
542 if (bits == lsb + 1)
543 return lfExactlyHalf;
544 if (bits <= partCount * APFloatBase::integerPartWidth &&
545 APInt::tcExtractBit(parts, bits - 1))
546 return lfMoreThanHalf;
547
548 return lfLessThanHalf;
549}
550
551/* Shift DST right BITS bits noting lost fraction. */
552static lostFraction
553shiftRight(APFloatBase::integerPart *dst, unsigned int parts, unsigned int bits)
554{
555 lostFraction lost_fraction;
556
557 lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
558
559 APInt::tcShiftRight(dst, parts, bits);
560
561 return lost_fraction;
562}
563
564/* Combine the effect of two lost fractions. */
565static lostFraction
567 lostFraction lessSignificant)
568{
569 if (lessSignificant != lfExactlyZero) {
570 if (moreSignificant == lfExactlyZero)
571 moreSignificant = lfLessThanHalf;
572 else if (moreSignificant == lfExactlyHalf)
573 moreSignificant = lfMoreThanHalf;
574 }
575
576 return moreSignificant;
577}
578
579/* The error from the true value, in half-ulps, on multiplying two
580 floating point numbers, which differ from the value they
581 approximate by at most HUE1 and HUE2 half-ulps, is strictly less
582 than the returned value.
583
584 See "How to Read Floating Point Numbers Accurately" by William D
585 Clinger. */
586static unsigned int
587HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
588{
589 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
590
591 if (HUerr1 + HUerr2 == 0)
592 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */
593 else
594 return inexactMultiply + 2 * (HUerr1 + HUerr2);
595}
596
597/* The number of ulps from the boundary (zero, or half if ISNEAREST)
598 when the least significant BITS are truncated. BITS cannot be
599 zero. */
602 bool isNearest) {
603 unsigned int count, partBits;
604 APFloatBase::integerPart part, boundary;
605
606 assert(bits != 0);
607
608 bits--;
610 partBits = bits % APFloatBase::integerPartWidth + 1;
611
612 part = parts[count] & (~(APFloatBase::integerPart) 0 >> (APFloatBase::integerPartWidth - partBits));
613
614 if (isNearest)
615 boundary = (APFloatBase::integerPart) 1 << (partBits - 1);
616 else
617 boundary = 0;
618
619 if (count == 0) {
620 if (part - boundary <= boundary - part)
621 return part - boundary;
622 else
623 return boundary - part;
624 }
625
626 if (part == boundary) {
627 while (--count)
628 if (parts[count])
629 return ~(APFloatBase::integerPart) 0; /* A lot. */
630
631 return parts[0];
632 } else if (part == boundary - 1) {
633 while (--count)
634 if (~parts[count])
635 return ~(APFloatBase::integerPart) 0; /* A lot. */
636
637 return -parts[0];
638 }
639
640 return ~(APFloatBase::integerPart) 0; /* A lot. */
641}
642
643/* Place pow(5, power) in DST, and return the number of parts used.
644 DST must be at least one part larger than size of the answer. */
645static unsigned int
646powerOf5(APFloatBase::integerPart *dst, unsigned int power) {
647 static const APFloatBase::integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125, 15625, 78125 };
649 pow5s[0] = 78125 * 5;
650
651 unsigned int partsCount[16] = { 1 };
652 APFloatBase::integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
653 unsigned int result;
654 assert(power <= maxExponent);
655
656 p1 = dst;
657 p2 = scratch;
658
659 *p1 = firstEightPowers[power & 7];
660 power >>= 3;
661
662 result = 1;
663 pow5 = pow5s;
664
665 for (unsigned int n = 0; power; power >>= 1, n++) {
666 unsigned int pc;
667
668 pc = partsCount[n];
669
670 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */
671 if (pc == 0) {
672 pc = partsCount[n - 1];
673 APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc);
674 pc *= 2;
675 if (pow5[pc - 1] == 0)
676 pc--;
677 partsCount[n] = pc;
678 }
679
680 if (power & 1) {
682
683 APInt::tcFullMultiply(p2, p1, pow5, result, pc);
684 result += pc;
685 if (p2[result - 1] == 0)
686 result--;
687
688 /* Now result is in p1 with partsCount parts and p2 is scratch
689 space. */
690 tmp = p1;
691 p1 = p2;
692 p2 = tmp;
693 }
694
695 pow5 += pc;
696 }
697
698 if (p1 != dst)
699 APInt::tcAssign(dst, p1, result);
700
701 return result;
702}
703
704/* Zero at the end to avoid modular arithmetic when adding one; used
705 when rounding up during hexadecimal output. */
706static const char hexDigitsLower[] = "0123456789abcdef0";
707static const char hexDigitsUpper[] = "0123456789ABCDEF0";
708static const char infinityL[] = "infinity";
709static const char infinityU[] = "INFINITY";
710static const char NaNL[] = "nan";
711static const char NaNU[] = "NAN";
712
713/* Write out an integerPart in hexadecimal, starting with the most
714 significant nibble. Write out exactly COUNT hexdigits, return
715 COUNT. */
716static unsigned int
717partAsHex (char *dst, APFloatBase::integerPart part, unsigned int count,
718 const char *hexDigitChars)
719{
720 unsigned int result = count;
721
723
724 part >>= (APFloatBase::integerPartWidth - 4 * count);
725 while (count--) {
726 dst[count] = hexDigitChars[part & 0xf];
727 part >>= 4;
728 }
729
730 return result;
731}
732
733/* Write out an unsigned decimal integer. */
734static char *
735writeUnsignedDecimal (char *dst, unsigned int n)
736{
737 char buff[40], *p;
738
739 p = buff;
740 do
741 *p++ = '0' + n % 10;
742 while (n /= 10);
743
744 do
745 *dst++ = *--p;
746 while (p != buff);
747
748 return dst;
749}
750
751/* Write out a signed decimal integer. */
752static char *
753writeSignedDecimal (char *dst, int value)
754{
755 if (value < 0) {
756 *dst++ = '-';
757 dst = writeUnsignedDecimal(dst, -(unsigned) value);
758 } else
759 dst = writeUnsignedDecimal(dst, value);
760
761 return dst;
762}
763
764namespace detail {
765/* Constructors. */
766void IEEEFloat::initialize(const fltSemantics *ourSemantics) {
767 unsigned int count;
768
769 semantics = ourSemantics;
770 count = partCount();
771 if (count > 1)
772 significand.parts = new integerPart[count];
773}
774
775void IEEEFloat::freeSignificand() {
776 if (needsCleanup())
777 delete [] significand.parts;
778}
779
780void IEEEFloat::assign(const IEEEFloat &rhs) {
781 assert(semantics == rhs.semantics);
782
783 sign = rhs.sign;
784 category = rhs.category;
785 exponent = rhs.exponent;
786 if (isFiniteNonZero() || category == fcNaN)
787 copySignificand(rhs);
788}
789
790void IEEEFloat::copySignificand(const IEEEFloat &rhs) {
791 assert(isFiniteNonZero() || category == fcNaN);
792 assert(rhs.partCount() >= partCount());
793
794 APInt::tcAssign(significandParts(), rhs.significandParts(),
795 partCount());
796}
797
798/* Make this number a NaN, with an arbitrary but deterministic value
799 for the significand. If double or longer, this is a signalling NaN,
800 which may not be ideal. If float, this is QNaN(0). */
801void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) {
802 category = fcNaN;
803 sign = Negative;
804 exponent = exponentNaN();
805
806 integerPart *significand = significandParts();
807 unsigned numParts = partCount();
808
809 APInt fill_storage;
811 // The only NaN representation is where the mantissa is all 1s, which is
812 // non-signalling.
813 SNaN = false;
814 fill_storage = APInt::getAllOnes(semantics->precision - 1);
815 fill = &fill_storage;
816 }
817
818 // Set the significand bits to the fill.
819 if (!fill || fill->getNumWords() < numParts)
820 APInt::tcSet(significand, 0, numParts);
821 if (fill) {
822 APInt::tcAssign(significand, fill->getRawData(),
823 std::min(fill->getNumWords(), numParts));
824
825 // Zero out the excess bits of the significand.
826 unsigned bitsToPreserve = semantics->precision - 1;
827 unsigned part = bitsToPreserve / 64;
828 bitsToPreserve %= 64;
829 significand[part] &= ((1ULL << bitsToPreserve) - 1);
830 for (part++; part != numParts; ++part)
831 significand[part] = 0;
832 }
833
834 unsigned QNaNBit = semantics->precision - 2;
835
836 if (SNaN) {
837 // We always have to clear the QNaN bit to make it an SNaN.
838 APInt::tcClearBit(significand, QNaNBit);
839
840 // If there are no bits set in the payload, we have to set
841 // *something* to make it a NaN instead of an infinity;
842 // conventionally, this is the next bit down from the QNaN bit.
843 if (APInt::tcIsZero(significand, numParts))
844 APInt::tcSetBit(significand, QNaNBit - 1);
845 } else {
846 // We always have to set the QNaN bit to make it a QNaN.
847 APInt::tcSetBit(significand, QNaNBit);
848 }
849
850 // For x87 extended precision, we want to make a NaN, not a
851 // pseudo-NaN. Maybe we should expose the ability to make
852 // pseudo-NaNs?
853 if (semantics == &semX87DoubleExtended)
854 APInt::tcSetBit(significand, QNaNBit + 1);
855}
856
858 if (this != &rhs) {
859 if (semantics != rhs.semantics) {
860 freeSignificand();
861 initialize(rhs.semantics);
862 }
863 assign(rhs);
864 }
865
866 return *this;
867}
868
870 freeSignificand();
871
872 semantics = rhs.semantics;
873 significand = rhs.significand;
874 exponent = rhs.exponent;
875 category = rhs.category;
876 sign = rhs.sign;
877
878 rhs.semantics = &semBogus;
879 return *this;
880}
881
883 return isFiniteNonZero() && (exponent == semantics->minExponent) &&
884 (APInt::tcExtractBit(significandParts(),
885 semantics->precision - 1) == 0);
886}
887
889 // The smallest number by magnitude in our format will be the smallest
890 // denormal, i.e. the floating point number with exponent being minimum
891 // exponent and significand bitwise equal to 1 (i.e. with MSB equal to 0).
892 return isFiniteNonZero() && exponent == semantics->minExponent &&
893 significandMSB() == 0;
894}
895
897 return getCategory() == fcNormal && exponent == semantics->minExponent &&
898 isSignificandAllZerosExceptMSB();
899}
900
901bool IEEEFloat::isSignificandAllOnes() const {
902 // Test if the significand excluding the integral bit is all ones. This allows
903 // us to test for binade boundaries.
904 const integerPart *Parts = significandParts();
905 const unsigned PartCount = partCountForBits(semantics->precision);
906 for (unsigned i = 0; i < PartCount - 1; i++)
907 if (~Parts[i])
908 return false;
909
910 // Set the unused high bits to all ones when we compare.
911 const unsigned NumHighBits =
912 PartCount*integerPartWidth - semantics->precision + 1;
913 assert(NumHighBits <= integerPartWidth && NumHighBits > 0 &&
914 "Can not have more high bits to fill than integerPartWidth");
915 const integerPart HighBitFill =
916 ~integerPart(0) << (integerPartWidth - NumHighBits);
917 if (~(Parts[PartCount - 1] | HighBitFill))
918 return false;
919
920 return true;
921}
922
923bool IEEEFloat::isSignificandAllOnesExceptLSB() const {
924 // Test if the significand excluding the integral bit is all ones except for
925 // the least significant bit.
926 const integerPart *Parts = significandParts();
927
928 if (Parts[0] & 1)
929 return false;
930
931 const unsigned PartCount = partCountForBits(semantics->precision);
932 for (unsigned i = 0; i < PartCount - 1; i++) {
933 if (~Parts[i] & ~unsigned{!i})
934 return false;
935 }
936
937 // Set the unused high bits to all ones when we compare.
938 const unsigned NumHighBits =
939 PartCount * integerPartWidth - semantics->precision + 1;
940 assert(NumHighBits <= integerPartWidth && NumHighBits > 0 &&
941 "Can not have more high bits to fill than integerPartWidth");
942 const integerPart HighBitFill = ~integerPart(0)
943 << (integerPartWidth - NumHighBits);
944 if (~(Parts[PartCount - 1] | HighBitFill | 0x1))
945 return false;
946
947 return true;
948}
949
950bool IEEEFloat::isSignificandAllZeros() const {
951 // Test if the significand excluding the integral bit is all zeros. This
952 // allows us to test for binade boundaries.
953 const integerPart *Parts = significandParts();
954 const unsigned PartCount = partCountForBits(semantics->precision);
955
956 for (unsigned i = 0; i < PartCount - 1; i++)
957 if (Parts[i])
958 return false;
959
960 // Compute how many bits are used in the final word.
961 const unsigned NumHighBits =
962 PartCount*integerPartWidth - semantics->precision + 1;
963 assert(NumHighBits < integerPartWidth && "Can not have more high bits to "
964 "clear than integerPartWidth");
965 const integerPart HighBitMask = ~integerPart(0) >> NumHighBits;
966
967 if (Parts[PartCount - 1] & HighBitMask)
968 return false;
969
970 return true;
971}
972
973bool IEEEFloat::isSignificandAllZerosExceptMSB() const {
974 const integerPart *Parts = significandParts();
975 const unsigned PartCount = partCountForBits(semantics->precision);
976
977 for (unsigned i = 0; i < PartCount - 1; i++) {
978 if (Parts[i])
979 return false;
980 }
981
982 const unsigned NumHighBits =
983 PartCount * integerPartWidth - semantics->precision + 1;
984 return Parts[PartCount - 1] == integerPart(1)
985 << (integerPartWidth - NumHighBits);
986}
987
990 // The largest number by magnitude in our format will be the floating point
991 // number with maximum exponent and with significand that is all ones except
992 // the LSB.
993 return isFiniteNonZero() && exponent == semantics->maxExponent &&
994 isSignificandAllOnesExceptLSB();
995 } else {
996 // The largest number by magnitude in our format will be the floating point
997 // number with maximum exponent and with significand that is all ones.
998 return isFiniteNonZero() && exponent == semantics->maxExponent &&
999 isSignificandAllOnes();
1000 }
1001}
1002
1004 // This could be made more efficient; I'm going for obviously correct.
1005 if (!isFinite()) return false;
1006 IEEEFloat truncated = *this;
1007 truncated.roundToIntegral(rmTowardZero);
1008 return compare(truncated) == cmpEqual;
1009}
1010
1011bool IEEEFloat::bitwiseIsEqual(const IEEEFloat &rhs) const {
1012 if (this == &rhs)
1013 return true;
1014 if (semantics != rhs.semantics ||
1015 category != rhs.category ||
1016 sign != rhs.sign)
1017 return false;
1018 if (category==fcZero || category==fcInfinity)
1019 return true;
1020
1021 if (isFiniteNonZero() && exponent != rhs.exponent)
1022 return false;
1023
1024 return std::equal(significandParts(), significandParts() + partCount(),
1025 rhs.significandParts());
1026}
1027
1029 initialize(&ourSemantics);
1030 sign = 0;
1031 category = fcNormal;
1032 zeroSignificand();
1033 exponent = ourSemantics.precision - 1;
1034 significandParts()[0] = value;
1036}
1037
1039 initialize(&ourSemantics);
1040 makeZero(false);
1041}
1042
1043// Delegate to the previous constructor, because later copy constructor may
1044// actually inspects category, which can't be garbage.
1046 : IEEEFloat(ourSemantics) {}
1047
1049 initialize(rhs.semantics);
1050 assign(rhs);
1051}
1052
1054 *this = std::move(rhs);
1055}
1056
1057IEEEFloat::~IEEEFloat() { freeSignificand(); }
1058
1059unsigned int IEEEFloat::partCount() const {
1060 return partCountForBits(semantics->precision + 1);
1061}
1062
1063const IEEEFloat::integerPart *IEEEFloat::significandParts() const {
1064 return const_cast<IEEEFloat *>(this)->significandParts();
1065}
1066
1067IEEEFloat::integerPart *IEEEFloat::significandParts() {
1068 if (partCount() > 1)
1069 return significand.parts;
1070 else
1071 return &significand.part;
1072}
1073
1074void IEEEFloat::zeroSignificand() {
1075 APInt::tcSet(significandParts(), 0, partCount());
1076}
1077
1078/* Increment an fcNormal floating point number's significand. */
1079void IEEEFloat::incrementSignificand() {
1080 integerPart carry;
1081
1082 carry = APInt::tcIncrement(significandParts(), partCount());
1083
1084 /* Our callers should never cause us to overflow. */
1085 assert(carry == 0);
1086 (void)carry;
1087}
1088
1089/* Add the significand of the RHS. Returns the carry flag. */
1090IEEEFloat::integerPart IEEEFloat::addSignificand(const IEEEFloat &rhs) {
1091 integerPart *parts;
1092
1093 parts = significandParts();
1094
1095 assert(semantics == rhs.semantics);
1096 assert(exponent == rhs.exponent);
1097
1098 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
1099}
1100
1101/* Subtract the significand of the RHS with a borrow flag. Returns
1102 the borrow flag. */
1103IEEEFloat::integerPart IEEEFloat::subtractSignificand(const IEEEFloat &rhs,
1104 integerPart borrow) {
1105 integerPart *parts;
1106
1107 parts = significandParts();
1108
1109 assert(semantics == rhs.semantics);
1110 assert(exponent == rhs.exponent);
1111
1112 return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
1113 partCount());
1114}
1115
1116/* Multiply the significand of the RHS. If ADDEND is non-NULL, add it
1117 on to the full-precision result of the multiplication. Returns the
1118 lost fraction. */
1119lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs,
1120 IEEEFloat addend) {
1121 unsigned int omsb; // One, not zero, based MSB.
1122 unsigned int partsCount, newPartsCount, precision;
1123 integerPart *lhsSignificand;
1124 integerPart scratch[4];
1125 integerPart *fullSignificand;
1126 lostFraction lost_fraction;
1127 bool ignored;
1128
1129 assert(semantics == rhs.semantics);
1130
1131 precision = semantics->precision;
1132
1133 // Allocate space for twice as many bits as the original significand, plus one
1134 // extra bit for the addition to overflow into.
1135 newPartsCount = partCountForBits(precision * 2 + 1);
1136
1137 if (newPartsCount > 4)
1138 fullSignificand = new integerPart[newPartsCount];
1139 else
1140 fullSignificand = scratch;
1141
1142 lhsSignificand = significandParts();
1143 partsCount = partCount();
1144
1145 APInt::tcFullMultiply(fullSignificand, lhsSignificand,
1146 rhs.significandParts(), partsCount, partsCount);
1147
1148 lost_fraction = lfExactlyZero;
1149 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
1150 exponent += rhs.exponent;
1151
1152 // Assume the operands involved in the multiplication are single-precision
1153 // FP, and the two multiplicants are:
1154 // *this = a23 . a22 ... a0 * 2^e1
1155 // rhs = b23 . b22 ... b0 * 2^e2
1156 // the result of multiplication is:
1157 // *this = c48 c47 c46 . c45 ... c0 * 2^(e1+e2)
1158 // Note that there are three significant bits at the left-hand side of the
1159 // radix point: two for the multiplication, and an overflow bit for the
1160 // addition (that will always be zero at this point). Move the radix point
1161 // toward left by two bits, and adjust exponent accordingly.
1162 exponent += 2;
1163
1164 if (addend.isNonZero()) {
1165 // The intermediate result of the multiplication has "2 * precision"
1166 // signicant bit; adjust the addend to be consistent with mul result.
1167 //
1168 Significand savedSignificand = significand;
1169 const fltSemantics *savedSemantics = semantics;
1170 fltSemantics extendedSemantics;
1172 unsigned int extendedPrecision;
1173
1174 // Normalize our MSB to one below the top bit to allow for overflow.
1175 extendedPrecision = 2 * precision + 1;
1176 if (omsb != extendedPrecision - 1) {
1177 assert(extendedPrecision > omsb);
1178 APInt::tcShiftLeft(fullSignificand, newPartsCount,
1179 (extendedPrecision - 1) - omsb);
1180 exponent -= (extendedPrecision - 1) - omsb;
1181 }
1182
1183 /* Create new semantics. */
1184 extendedSemantics = *semantics;
1185 extendedSemantics.precision = extendedPrecision;
1186
1187 if (newPartsCount == 1)
1188 significand.part = fullSignificand[0];
1189 else
1190 significand.parts = fullSignificand;
1191 semantics = &extendedSemantics;
1192
1193 // Make a copy so we can convert it to the extended semantics.
1194 // Note that we cannot convert the addend directly, as the extendedSemantics
1195 // is a local variable (which we take a reference to).
1196 IEEEFloat extendedAddend(addend);
1197 status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored);
1198 assert(status == opOK);
1199 (void)status;
1200
1201 // Shift the significand of the addend right by one bit. This guarantees
1202 // that the high bit of the significand is zero (same as fullSignificand),
1203 // so the addition will overflow (if it does overflow at all) into the top bit.
1204 lost_fraction = extendedAddend.shiftSignificandRight(1);
1205 assert(lost_fraction == lfExactlyZero &&
1206 "Lost precision while shifting addend for fused-multiply-add.");
1207
1208 lost_fraction = addOrSubtractSignificand(extendedAddend, false);
1209
1210 /* Restore our state. */
1211 if (newPartsCount == 1)
1212 fullSignificand[0] = significand.part;
1213 significand = savedSignificand;
1214 semantics = savedSemantics;
1215
1216 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
1217 }
1218
1219 // Convert the result having "2 * precision" significant-bits back to the one
1220 // having "precision" significant-bits. First, move the radix point from
1221 // poision "2*precision - 1" to "precision - 1". The exponent need to be
1222 // adjusted by "2*precision - 1" - "precision - 1" = "precision".
1223 exponent -= precision + 1;
1224
1225 // In case MSB resides at the left-hand side of radix point, shift the
1226 // mantissa right by some amount to make sure the MSB reside right before
1227 // the radix point (i.e. "MSB . rest-significant-bits").
1228 //
1229 // Note that the result is not normalized when "omsb < precision". So, the
1230 // caller needs to call IEEEFloat::normalize() if normalized value is
1231 // expected.
1232 if (omsb > precision) {
1233 unsigned int bits, significantParts;
1234 lostFraction lf;
1235
1236 bits = omsb - precision;
1237 significantParts = partCountForBits(omsb);
1238 lf = shiftRight(fullSignificand, significantParts, bits);
1239 lost_fraction = combineLostFractions(lf, lost_fraction);
1240 exponent += bits;
1241 }
1242
1243 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
1244
1245 if (newPartsCount > 4)
1246 delete [] fullSignificand;
1247
1248 return lost_fraction;
1249}
1250
1251lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs) {
1252 return multiplySignificand(rhs, IEEEFloat(*semantics));
1253}
1254
1255/* Multiply the significands of LHS and RHS to DST. */
1256lostFraction IEEEFloat::divideSignificand(const IEEEFloat &rhs) {
1257 unsigned int bit, i, partsCount;
1258 const integerPart *rhsSignificand;
1259 integerPart *lhsSignificand, *dividend, *divisor;
1260 integerPart scratch[4];
1261 lostFraction lost_fraction;
1262
1263 assert(semantics == rhs.semantics);
1264
1265 lhsSignificand = significandParts();
1266 rhsSignificand = rhs.significandParts();
1267 partsCount = partCount();
1268
1269 if (partsCount > 2)
1270 dividend = new integerPart[partsCount * 2];
1271 else
1272 dividend = scratch;
1273
1274 divisor = dividend + partsCount;
1275
1276 /* Copy the dividend and divisor as they will be modified in-place. */
1277 for (i = 0; i < partsCount; i++) {
1278 dividend[i] = lhsSignificand[i];
1279 divisor[i] = rhsSignificand[i];
1280 lhsSignificand[i] = 0;
1281 }
1282
1283 exponent -= rhs.exponent;
1284
1285 unsigned int precision = semantics->precision;
1286
1287 /* Normalize the divisor. */
1288 bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
1289 if (bit) {
1290 exponent += bit;
1291 APInt::tcShiftLeft(divisor, partsCount, bit);
1292 }
1293
1294 /* Normalize the dividend. */
1295 bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
1296 if (bit) {
1297 exponent -= bit;
1298 APInt::tcShiftLeft(dividend, partsCount, bit);
1299 }
1300
1301 /* Ensure the dividend >= divisor initially for the loop below.
1302 Incidentally, this means that the division loop below is
1303 guaranteed to set the integer bit to one. */
1304 if (APInt::tcCompare(dividend, divisor, partsCount) < 0) {
1305 exponent--;
1306 APInt::tcShiftLeft(dividend, partsCount, 1);
1307 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
1308 }
1309
1310 /* Long division. */
1311 for (bit = precision; bit; bit -= 1) {
1312 if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
1313 APInt::tcSubtract(dividend, divisor, 0, partsCount);
1314 APInt::tcSetBit(lhsSignificand, bit - 1);
1315 }
1316
1317 APInt::tcShiftLeft(dividend, partsCount, 1);
1318 }
1319
1320 /* Figure out the lost fraction. */
1321 int cmp = APInt::tcCompare(dividend, divisor, partsCount);
1322
1323 if (cmp > 0)
1324 lost_fraction = lfMoreThanHalf;
1325 else if (cmp == 0)
1326 lost_fraction = lfExactlyHalf;
1327 else if (APInt::tcIsZero(dividend, partsCount))
1328 lost_fraction = lfExactlyZero;
1329 else
1330 lost_fraction = lfLessThanHalf;
1331
1332 if (partsCount > 2)
1333 delete [] dividend;
1334
1335 return lost_fraction;
1336}
1337
1338unsigned int IEEEFloat::significandMSB() const {
1339 return APInt::tcMSB(significandParts(), partCount());
1340}
1341
1342unsigned int IEEEFloat::significandLSB() const {
1343 return APInt::tcLSB(significandParts(), partCount());
1344}
1345
1346/* Note that a zero result is NOT normalized to fcZero. */
1347lostFraction IEEEFloat::shiftSignificandRight(unsigned int bits) {
1348 /* Our exponent should not overflow. */
1349 assert((ExponentType) (exponent + bits) >= exponent);
1350
1351 exponent += bits;
1352
1353 return shiftRight(significandParts(), partCount(), bits);
1354}
1355
1356/* Shift the significand left BITS bits, subtract BITS from its exponent. */
1357void IEEEFloat::shiftSignificandLeft(unsigned int bits) {
1358 assert(bits < semantics->precision);
1359
1360 if (bits) {
1361 unsigned int partsCount = partCount();
1362
1363 APInt::tcShiftLeft(significandParts(), partsCount, bits);
1364 exponent -= bits;
1365
1366 assert(!APInt::tcIsZero(significandParts(), partsCount));
1367 }
1368}
1369
1372 int compare;
1373
1374 assert(semantics == rhs.semantics);
1376 assert(rhs.isFiniteNonZero());
1377
1378 compare = exponent - rhs.exponent;
1379
1380 /* If exponents are equal, do an unsigned bignum comparison of the
1381 significands. */
1382 if (compare == 0)
1383 compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
1384 partCount());
1385
1386 if (compare > 0)
1387 return cmpGreaterThan;
1388 else if (compare < 0)
1389 return cmpLessThan;
1390 else
1391 return cmpEqual;
1392}
1393
1394/* Set the least significant BITS bits of a bignum, clear the
1395 rest. */
1396static void tcSetLeastSignificantBits(APInt::WordType *dst, unsigned parts,
1397 unsigned bits) {
1398 unsigned i = 0;
1400 dst[i++] = ~(APInt::WordType)0;
1402 }
1403
1404 if (bits)
1405 dst[i++] = ~(APInt::WordType)0 >> (APInt::APINT_BITS_PER_WORD - bits);
1406
1407 while (i < parts)
1408 dst[i++] = 0;
1409}
1410
1411/* Handle overflow. Sign is preserved. We either become infinity or
1412 the largest finite number. */
1413IEEEFloat::opStatus IEEEFloat::handleOverflow(roundingMode rounding_mode) {
1414 /* Infinity? */
1415 if (rounding_mode == rmNearestTiesToEven ||
1416 rounding_mode == rmNearestTiesToAway ||
1417 (rounding_mode == rmTowardPositive && !sign) ||
1418 (rounding_mode == rmTowardNegative && sign)) {
1420 makeNaN(false, sign);
1421 else
1422 category = fcInfinity;
1423 return (opStatus) (opOverflow | opInexact);
1424 }
1425
1426 /* Otherwise we become the largest finite number. */
1427 category = fcNormal;
1428 exponent = semantics->maxExponent;
1429 tcSetLeastSignificantBits(significandParts(), partCount(),
1430 semantics->precision);
1432 APInt::tcClearBit(significandParts(), 0);
1433
1434 return opInexact;
1435}
1436
1437/* Returns TRUE if, when truncating the current number, with BIT the
1438 new LSB, with the given lost fraction and rounding mode, the result
1439 would need to be rounded away from zero (i.e., by increasing the
1440 signficand). This routine must work for fcZero of both signs, and
1441 fcNormal numbers. */
1442bool IEEEFloat::roundAwayFromZero(roundingMode rounding_mode,
1443 lostFraction lost_fraction,
1444 unsigned int bit) const {
1445 /* NaNs and infinities should not have lost fractions. */
1446 assert(isFiniteNonZero() || category == fcZero);
1447
1448 /* Current callers never pass this so we don't handle it. */
1449 assert(lost_fraction != lfExactlyZero);
1450
1451 switch (rounding_mode) {
1453 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
1454
1456 if (lost_fraction == lfMoreThanHalf)
1457 return true;
1458
1459 /* Our zeroes don't have a significand to test. */
1460 if (lost_fraction == lfExactlyHalf && category != fcZero)
1461 return APInt::tcExtractBit(significandParts(), bit);
1462
1463 return false;
1464
1465 case rmTowardZero:
1466 return false;
1467
1468 case rmTowardPositive:
1469 return !sign;
1470
1471 case rmTowardNegative:
1472 return sign;
1473
1474 default:
1475 break;
1476 }
1477 llvm_unreachable("Invalid rounding mode found");
1478}
1479
1480IEEEFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode,
1481 lostFraction lost_fraction) {
1482 unsigned int omsb; /* One, not zero, based MSB. */
1483 int exponentChange;
1484
1485 if (!isFiniteNonZero())
1486 return opOK;
1487
1488 /* Before rounding normalize the exponent of fcNormal numbers. */
1489 omsb = significandMSB() + 1;
1490
1491 if (omsb) {
1492 /* OMSB is numbered from 1. We want to place it in the integer
1493 bit numbered PRECISION if possible, with a compensating change in
1494 the exponent. */
1495 exponentChange = omsb - semantics->precision;
1496
1497 /* If the resulting exponent is too high, overflow according to
1498 the rounding mode. */
1499 if (exponent + exponentChange > semantics->maxExponent)
1500 return handleOverflow(rounding_mode);
1501
1502 /* Subnormal numbers have exponent minExponent, and their MSB
1503 is forced based on that. */
1504 if (exponent + exponentChange < semantics->minExponent)
1505 exponentChange = semantics->minExponent - exponent;
1506
1507 /* Shifting left is easy as we don't lose precision. */
1508 if (exponentChange < 0) {
1509 assert(lost_fraction == lfExactlyZero);
1510
1511 shiftSignificandLeft(-exponentChange);
1512
1513 return opOK;
1514 }
1515
1516 if (exponentChange > 0) {
1517 lostFraction lf;
1518
1519 /* Shift right and capture any new lost fraction. */
1520 lf = shiftSignificandRight(exponentChange);
1521
1522 lost_fraction = combineLostFractions(lf, lost_fraction);
1523
1524 /* Keep OMSB up-to-date. */
1525 if (omsb > (unsigned) exponentChange)
1526 omsb -= exponentChange;
1527 else
1528 omsb = 0;
1529 }
1530 }
1531
1533 exponent == semantics->maxExponent && isSignificandAllOnes())
1534 return handleOverflow(rounding_mode);
1535
1536 /* Now round the number according to rounding_mode given the lost
1537 fraction. */
1538
1539 /* As specified in IEEE 754, since we do not trap we do not report
1540 underflow for exact results. */
1541 if (lost_fraction == lfExactlyZero) {
1542 /* Canonicalize zeroes. */
1543 if (omsb == 0)
1544 category = fcZero;
1545
1546 return opOK;
1547 }
1548
1549 /* Increment the significand if we're rounding away from zero. */
1550 if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
1551 if (omsb == 0)
1552 exponent = semantics->minExponent;
1553
1554 incrementSignificand();
1555 omsb = significandMSB() + 1;
1556
1557 /* Did the significand increment overflow? */
1558 if (omsb == (unsigned) semantics->precision + 1) {
1559 /* Renormalize by incrementing the exponent and shifting our
1560 significand right one. However if we already have the
1561 maximum exponent we overflow to infinity. */
1562 if (exponent == semantics->maxExponent) {
1563 category = fcInfinity;
1564
1565 return (opStatus) (opOverflow | opInexact);
1566 }
1567
1568 shiftSignificandRight(1);
1569
1570 return opInexact;
1571 }
1572
1574 exponent == semantics->maxExponent && isSignificandAllOnes())
1575 return handleOverflow(rounding_mode);
1576 }
1577
1578 /* The normal case - we were and are not denormal, and any
1579 significand increment above didn't overflow. */
1580 if (omsb == semantics->precision)
1581 return opInexact;
1582
1583 /* We have a non-zero denormal. */
1584 assert(omsb < semantics->precision);
1585
1586 /* Canonicalize zeroes. */
1587 if (omsb == 0)
1588 category = fcZero;
1589
1590 /* The fcZero case is a denormal that underflowed to zero. */
1591 return (opStatus) (opUnderflow | opInexact);
1592}
1593
1594IEEEFloat::opStatus IEEEFloat::addOrSubtractSpecials(const IEEEFloat &rhs,
1595 bool subtract) {
1596 switch (PackCategoriesIntoKey(category, rhs.category)) {
1597 default:
1598 llvm_unreachable(nullptr);
1599
1603 assign(rhs);
1604 [[fallthrough]];
1609 if (isSignaling()) {
1610 makeQuiet();
1611 return opInvalidOp;
1612 }
1613 return rhs.isSignaling() ? opInvalidOp : opOK;
1614
1618 return opOK;
1619
1622 category = fcInfinity;
1623 sign = rhs.sign ^ subtract;
1624 return opOK;
1625
1627 assign(rhs);
1628 sign = rhs.sign ^ subtract;
1629 return opOK;
1630
1632 /* Sign depends on rounding mode; handled by caller. */
1633 return opOK;
1634
1636 /* Differently signed infinities can only be validly
1637 subtracted. */
1638 if (((sign ^ rhs.sign)!=0) != subtract) {
1639 makeNaN();
1640 return opInvalidOp;
1641 }
1642
1643 return opOK;
1644
1646 return opDivByZero;
1647 }
1648}
1649
1650/* Add or subtract two normal numbers. */
1651lostFraction IEEEFloat::addOrSubtractSignificand(const IEEEFloat &rhs,
1652 bool subtract) {
1653 integerPart carry;
1654 lostFraction lost_fraction;
1655 int bits;
1656
1657 /* Determine if the operation on the absolute values is effectively
1658 an addition or subtraction. */
1659 subtract ^= static_cast<bool>(sign ^ rhs.sign);
1660
1661 /* Are we bigger exponent-wise than the RHS? */
1662 bits = exponent - rhs.exponent;
1663
1664 /* Subtraction is more subtle than one might naively expect. */
1665 if (subtract) {
1666 IEEEFloat temp_rhs(rhs);
1667
1668 if (bits == 0)
1669 lost_fraction = lfExactlyZero;
1670 else if (bits > 0) {
1671 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
1672 shiftSignificandLeft(1);
1673 } else {
1674 lost_fraction = shiftSignificandRight(-bits - 1);
1675 temp_rhs.shiftSignificandLeft(1);
1676 }
1677
1678 // Should we reverse the subtraction.
1679 if (compareAbsoluteValue(temp_rhs) == cmpLessThan) {
1680 carry = temp_rhs.subtractSignificand
1681 (*this, lost_fraction != lfExactlyZero);
1682 copySignificand(temp_rhs);
1683 sign = !sign;
1684 } else {
1685 carry = subtractSignificand
1686 (temp_rhs, lost_fraction != lfExactlyZero);
1687 }
1688
1689 /* Invert the lost fraction - it was on the RHS and
1690 subtracted. */
1691 if (lost_fraction == lfLessThanHalf)
1692 lost_fraction = lfMoreThanHalf;
1693 else if (lost_fraction == lfMoreThanHalf)
1694 lost_fraction = lfLessThanHalf;
1695
1696 /* The code above is intended to ensure that no borrow is
1697 necessary. */
1698 assert(!carry);
1699 (void)carry;
1700 } else {
1701 if (bits > 0) {
1702 IEEEFloat temp_rhs(rhs);
1703
1704 lost_fraction = temp_rhs.shiftSignificandRight(bits);
1705 carry = addSignificand(temp_rhs);
1706 } else {
1707 lost_fraction = shiftSignificandRight(-bits);
1708 carry = addSignificand(rhs);
1709 }
1710
1711 /* We have a guard bit; generating a carry cannot happen. */
1712 assert(!carry);
1713 (void)carry;
1714 }
1715
1716 return lost_fraction;
1717}
1718
1719IEEEFloat::opStatus IEEEFloat::multiplySpecials(const IEEEFloat &rhs) {
1720 switch (PackCategoriesIntoKey(category, rhs.category)) {
1721 default:
1722 llvm_unreachable(nullptr);
1723
1727 assign(rhs);
1728 sign = false;
1729 [[fallthrough]];
1734 sign ^= rhs.sign; // restore the original sign
1735 if (isSignaling()) {
1736 makeQuiet();
1737 return opInvalidOp;
1738 }
1739 return rhs.isSignaling() ? opInvalidOp : opOK;
1740
1744 category = fcInfinity;
1745 return opOK;
1746
1750 category = fcZero;
1751 return opOK;
1752
1755 makeNaN();
1756 return opInvalidOp;
1757
1759 return opOK;
1760 }
1761}
1762
1763IEEEFloat::opStatus IEEEFloat::divideSpecials(const IEEEFloat &rhs) {
1764 switch (PackCategoriesIntoKey(category, rhs.category)) {
1765 default:
1766 llvm_unreachable(nullptr);
1767
1771 assign(rhs);
1772 sign = false;
1773 [[fallthrough]];
1778 sign ^= rhs.sign; // restore the original sign
1779 if (isSignaling()) {
1780 makeQuiet();
1781 return opInvalidOp;
1782 }
1783 return rhs.isSignaling() ? opInvalidOp : opOK;
1784
1789 return opOK;
1790
1792 category = fcZero;
1793 return opOK;
1794
1796 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly)
1797 makeNaN(false, sign);
1798 else
1799 category = fcInfinity;
1800 return opDivByZero;
1801
1804 makeNaN();
1805 return opInvalidOp;
1806
1808 return opOK;
1809 }
1810}
1811
1812IEEEFloat::opStatus IEEEFloat::modSpecials(const IEEEFloat &rhs) {
1813 switch (PackCategoriesIntoKey(category, rhs.category)) {
1814 default:
1815 llvm_unreachable(nullptr);
1816
1820 assign(rhs);
1821 [[fallthrough]];
1826 if (isSignaling()) {
1827 makeQuiet();
1828 return opInvalidOp;
1829 }
1830 return rhs.isSignaling() ? opInvalidOp : opOK;
1831
1835 return opOK;
1836
1842 makeNaN();
1843 return opInvalidOp;
1844
1846 return opOK;
1847 }
1848}
1849
1850IEEEFloat::opStatus IEEEFloat::remainderSpecials(const IEEEFloat &rhs) {
1851 switch (PackCategoriesIntoKey(category, rhs.category)) {
1852 default:
1853 llvm_unreachable(nullptr);
1854
1858 assign(rhs);
1859 [[fallthrough]];
1864 if (isSignaling()) {
1865 makeQuiet();
1866 return opInvalidOp;
1867 }
1868 return rhs.isSignaling() ? opInvalidOp : opOK;
1869
1873 return opOK;
1874
1880 makeNaN();
1881 return opInvalidOp;
1882
1884 return opDivByZero; // fake status, indicating this is not a special case
1885 }
1886}
1887
1888/* Change sign. */
1890 /* Look mummy, this one's easy. */
1891 sign = !sign;
1892}
1893
1894/* Normalized addition or subtraction. */
1895IEEEFloat::opStatus IEEEFloat::addOrSubtract(const IEEEFloat &rhs,
1896 roundingMode rounding_mode,
1897 bool subtract) {
1898 opStatus fs;
1899
1900 fs = addOrSubtractSpecials(rhs, subtract);
1901
1902 /* This return code means it was not a simple case. */
1903 if (fs == opDivByZero) {
1904 lostFraction lost_fraction;
1905
1906 lost_fraction = addOrSubtractSignificand(rhs, subtract);
1907 fs = normalize(rounding_mode, lost_fraction);
1908
1909 /* Can only be zero if we lost no fraction. */
1910 assert(category != fcZero || lost_fraction == lfExactlyZero);
1911 }
1912
1913 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1914 positive zero unless rounding to minus infinity, except that
1915 adding two like-signed zeroes gives that zero. */
1916 if (category == fcZero) {
1917 if (rhs.category != fcZero || (sign == rhs.sign) == subtract)
1918 sign = (rounding_mode == rmTowardNegative);
1919 }
1920
1921 return fs;
1922}
1923
1924/* Normalized addition. */
1926 roundingMode rounding_mode) {
1927 return addOrSubtract(rhs, rounding_mode, false);
1928}
1929
1930/* Normalized subtraction. */
1932 roundingMode rounding_mode) {
1933 return addOrSubtract(rhs, rounding_mode, true);
1934}
1935
1936/* Normalized multiply. */
1938 roundingMode rounding_mode) {
1939 opStatus fs;
1940
1941 sign ^= rhs.sign;
1942 fs = multiplySpecials(rhs);
1943
1944 if (isFiniteNonZero()) {
1945 lostFraction lost_fraction = multiplySignificand(rhs);
1946 fs = normalize(rounding_mode, lost_fraction);
1947 if (lost_fraction != lfExactlyZero)
1948 fs = (opStatus) (fs | opInexact);
1949 }
1950
1951 return fs;
1952}
1953
1954/* Normalized divide. */
1956 roundingMode rounding_mode) {
1957 opStatus fs;
1958
1959 sign ^= rhs.sign;
1960 fs = divideSpecials(rhs);
1961
1962 if (isFiniteNonZero()) {
1963 lostFraction lost_fraction = divideSignificand(rhs);
1964 fs = normalize(rounding_mode, lost_fraction);
1965 if (lost_fraction != lfExactlyZero)
1966 fs = (opStatus) (fs | opInexact);
1967 }
1968
1969 return fs;
1970}
1971
1972/* Normalized remainder. */
1974 opStatus fs;
1975 unsigned int origSign = sign;
1976
1977 // First handle the special cases.
1978 fs = remainderSpecials(rhs);
1979 if (fs != opDivByZero)
1980 return fs;
1981
1982 fs = opOK;
1983
1984 // Make sure the current value is less than twice the denom. If the addition
1985 // did not succeed (an overflow has happened), which means that the finite
1986 // value we currently posses must be less than twice the denom (as we are
1987 // using the same semantics).
1988 IEEEFloat P2 = rhs;
1989 if (P2.add(rhs, rmNearestTiesToEven) == opOK) {
1990 fs = mod(P2);
1991 assert(fs == opOK);
1992 }
1993
1994 // Lets work with absolute numbers.
1995 IEEEFloat P = rhs;
1996 P.sign = false;
1997 sign = false;
1998
1999 //
2000 // To calculate the remainder we use the following scheme.
2001 //
2002 // The remainder is defained as follows:
2003 //
2004 // remainder = numer - rquot * denom = x - r * p
2005 //
2006 // Where r is the result of: x/p, rounded toward the nearest integral value
2007 // (with halfway cases rounded toward the even number).
2008 //
2009 // Currently, (after x mod 2p):
2010 // r is the number of 2p's present inside x, which is inherently, an even
2011 // number of p's.
2012 //
2013 // We may split the remaining calculation into 4 options:
2014 // - if x < 0.5p then we round to the nearest number with is 0, and are done.
2015 // - if x == 0.5p then we round to the nearest even number which is 0, and we
2016 // are done as well.
2017 // - if 0.5p < x < p then we round to nearest number which is 1, and we have
2018 // to subtract 1p at least once.
2019 // - if x >= p then we must subtract p at least once, as x must be a
2020 // remainder.
2021 //
2022 // By now, we were done, or we added 1 to r, which in turn, now an odd number.
2023 //
2024 // We can now split the remaining calculation to the following 3 options:
2025 // - if x < 0.5p then we round to the nearest number with is 0, and are done.
2026 // - if x == 0.5p then we round to the nearest even number. As r is odd, we
2027 // must round up to the next even number. so we must subtract p once more.
2028 // - if x > 0.5p (and inherently x < p) then we must round r up to the next
2029 // integral, and subtract p once more.
2030 //
2031
2032 // Extend the semantics to prevent an overflow/underflow or inexact result.
2033 bool losesInfo;
2034 fltSemantics extendedSemantics = *semantics;
2035 extendedSemantics.maxExponent++;
2036 extendedSemantics.minExponent--;
2037 extendedSemantics.precision += 2;
2038
2039 IEEEFloat VEx = *this;
2040 fs = VEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2041 assert(fs == opOK && !losesInfo);
2042 IEEEFloat PEx = P;
2043 fs = PEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2044 assert(fs == opOK && !losesInfo);
2045
2046 // It is simpler to work with 2x instead of 0.5p, and we do not need to lose
2047 // any fraction.
2048 fs = VEx.add(VEx, rmNearestTiesToEven);
2049 assert(fs == opOK);
2050
2051 if (VEx.compare(PEx) == cmpGreaterThan) {
2053 assert(fs == opOK);
2054
2055 // Make VEx = this.add(this), but because we have different semantics, we do
2056 // not want to `convert` again, so we just subtract PEx twice (which equals
2057 // to the desired value).
2058 fs = VEx.subtract(PEx, rmNearestTiesToEven);
2059 assert(fs == opOK);
2060 fs = VEx.subtract(PEx, rmNearestTiesToEven);
2061 assert(fs == opOK);
2062
2063 cmpResult result = VEx.compare(PEx);
2064 if (result == cmpGreaterThan || result == cmpEqual) {
2066 assert(fs == opOK);
2067 }
2068 }
2069
2070 if (isZero())
2071 sign = origSign; // IEEE754 requires this
2072 else
2073 sign ^= origSign;
2074 return fs;
2075}
2076
2077/* Normalized llvm frem (C fmod). */
2079 opStatus fs;
2080 fs = modSpecials(rhs);
2081 unsigned int origSign = sign;
2082
2083 while (isFiniteNonZero() && rhs.isFiniteNonZero() &&
2085 int Exp = ilogb(*this) - ilogb(rhs);
2086 IEEEFloat V = scalbn(rhs, Exp, rmNearestTiesToEven);
2087 // V can overflow to NaN with fltNonfiniteBehavior::NanOnly, so explicitly
2088 // check for it.
2089 if (V.isNaN() || compareAbsoluteValue(V) == cmpLessThan)
2090 V = scalbn(rhs, Exp - 1, rmNearestTiesToEven);
2091 V.sign = sign;
2092
2094 assert(fs==opOK);
2095 }
2096 if (isZero())
2097 sign = origSign; // fmod requires this
2098 return fs;
2099}
2100
2101/* Normalized fused-multiply-add. */
2103 const IEEEFloat &addend,
2104 roundingMode rounding_mode) {
2105 opStatus fs;
2106
2107 /* Post-multiplication sign, before addition. */
2108 sign ^= multiplicand.sign;
2109
2110 /* If and only if all arguments are normal do we need to do an
2111 extended-precision calculation. */
2112 if (isFiniteNonZero() &&
2113 multiplicand.isFiniteNonZero() &&
2114 addend.isFinite()) {
2115 lostFraction lost_fraction;
2116
2117 lost_fraction = multiplySignificand(multiplicand, addend);
2118 fs = normalize(rounding_mode, lost_fraction);
2119 if (lost_fraction != lfExactlyZero)
2120 fs = (opStatus) (fs | opInexact);
2121
2122 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
2123 positive zero unless rounding to minus infinity, except that
2124 adding two like-signed zeroes gives that zero. */
2125 if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign)
2126 sign = (rounding_mode == rmTowardNegative);
2127 } else {
2128 fs = multiplySpecials(multiplicand);
2129
2130 /* FS can only be opOK or opInvalidOp. There is no more work
2131 to do in the latter case. The IEEE-754R standard says it is
2132 implementation-defined in this case whether, if ADDEND is a
2133 quiet NaN, we raise invalid op; this implementation does so.
2134
2135 If we need to do the addition we can do so with normal
2136 precision. */
2137 if (fs == opOK)
2138 fs = addOrSubtract(addend, rounding_mode, false);
2139 }
2140
2141 return fs;
2142}
2143
2144/* Rounding-mode correct round to integral value. */
2146 opStatus fs;
2147
2148 if (isInfinity())
2149 // [IEEE Std 754-2008 6.1]:
2150 // The behavior of infinity in floating-point arithmetic is derived from the
2151 // limiting cases of real arithmetic with operands of arbitrarily
2152 // large magnitude, when such a limit exists.
2153 // ...
2154 // Operations on infinite operands are usually exact and therefore signal no
2155 // exceptions ...
2156 return opOK;
2157
2158 if (isNaN()) {
2159 if (isSignaling()) {
2160 // [IEEE Std 754-2008 6.2]:
2161 // Under default exception handling, any operation signaling an invalid
2162 // operation exception and for which a floating-point result is to be
2163 // delivered shall deliver a quiet NaN.
2164 makeQuiet();
2165 // [IEEE Std 754-2008 6.2]:
2166 // Signaling NaNs shall be reserved operands that, under default exception
2167 // handling, signal the invalid operation exception(see 7.2) for every
2168 // general-computational and signaling-computational operation except for
2169 // the conversions described in 5.12.
2170 return opInvalidOp;
2171 } else {
2172 // [IEEE Std 754-2008 6.2]:
2173 // For an operation with quiet NaN inputs, other than maximum and minimum
2174 // operations, if a floating-point result is to be delivered the result
2175 // shall be a quiet NaN which should be one of the input NaNs.
2176 // ...
2177 // Every general-computational and quiet-computational operation involving
2178 // one or more input NaNs, none of them signaling, shall signal no
2179 // exception, except fusedMultiplyAdd might signal the invalid operation
2180 // exception(see 7.2).
2181 return opOK;
2182 }
2183 }
2184
2185 if (isZero()) {
2186 // [IEEE Std 754-2008 6.3]:
2187 // ... the sign of the result of conversions, the quantize operation, the
2188 // roundToIntegral operations, and the roundToIntegralExact(see 5.3.1) is
2189 // the sign of the first or only operand.
2190 return opOK;
2191 }
2192
2193 // If the exponent is large enough, we know that this value is already
2194 // integral, and the arithmetic below would potentially cause it to saturate
2195 // to +/-Inf. Bail out early instead.
2196 if (exponent+1 >= (int)semanticsPrecision(*semantics))
2197 return opOK;
2198
2199 // The algorithm here is quite simple: we add 2^(p-1), where p is the
2200 // precision of our format, and then subtract it back off again. The choice
2201 // of rounding modes for the addition/subtraction determines the rounding mode
2202 // for our integral rounding as well.
2203 // NOTE: When the input value is negative, we do subtraction followed by
2204 // addition instead.
2205 APInt IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics)), 1);
2206 IntegerConstant <<= semanticsPrecision(*semantics)-1;
2207 IEEEFloat MagicConstant(*semantics);
2208 fs = MagicConstant.convertFromAPInt(IntegerConstant, false,
2210 assert(fs == opOK);
2211 MagicConstant.sign = sign;
2212
2213 // Preserve the input sign so that we can handle the case of zero result
2214 // correctly.
2215 bool inputSign = isNegative();
2216
2217 fs = add(MagicConstant, rounding_mode);
2218
2219 // Current value and 'MagicConstant' are both integers, so the result of the
2220 // subtraction is always exact according to Sterbenz' lemma.
2221 subtract(MagicConstant, rounding_mode);
2222
2223 // Restore the input sign.
2224 if (inputSign != isNegative())
2225 changeSign();
2226
2227 return fs;
2228}
2229
2230
2231/* Comparison requires normalized numbers. */
2233 cmpResult result;
2234
2235 assert(semantics == rhs.semantics);
2236
2237 switch (PackCategoriesIntoKey(category, rhs.category)) {
2238 default:
2239 llvm_unreachable(nullptr);
2240
2248 return cmpUnordered;
2249
2253 if (sign)
2254 return cmpLessThan;
2255 else
2256 return cmpGreaterThan;
2257
2261 if (rhs.sign)
2262 return cmpGreaterThan;
2263 else
2264 return cmpLessThan;
2265
2267 if (sign == rhs.sign)
2268 return cmpEqual;
2269 else if (sign)
2270 return cmpLessThan;
2271 else
2272 return cmpGreaterThan;
2273
2275 return cmpEqual;
2276
2278 break;
2279 }
2280
2281 /* Two normal numbers. Do they have the same sign? */
2282 if (sign != rhs.sign) {
2283 if (sign)
2284 result = cmpLessThan;
2285 else
2286 result = cmpGreaterThan;
2287 } else {
2288 /* Compare absolute values; invert result if negative. */
2289 result = compareAbsoluteValue(rhs);
2290
2291 if (sign) {
2292 if (result == cmpLessThan)
2293 result = cmpGreaterThan;
2294 else if (result == cmpGreaterThan)
2295 result = cmpLessThan;
2296 }
2297 }
2298
2299 return result;
2300}
2301
2302/// IEEEFloat::convert - convert a value of one floating point type to another.
2303/// The return value corresponds to the IEEE754 exceptions. *losesInfo
2304/// records whether the transformation lost information, i.e. whether
2305/// converting the result back to the original type will produce the
2306/// original value (this is almost the same as return value==fsOK, but there
2307/// are edge cases where this is not so).
2308
2310 roundingMode rounding_mode,
2311 bool *losesInfo) {
2313 unsigned int newPartCount, oldPartCount;
2314 opStatus fs;
2315 int shift;
2316 const fltSemantics &fromSemantics = *semantics;
2317 bool is_signaling = isSignaling();
2318
2320 newPartCount = partCountForBits(toSemantics.precision + 1);
2321 oldPartCount = partCount();
2322 shift = toSemantics.precision - fromSemantics.precision;
2323
2324 bool X86SpecialNan = false;
2325 if (&fromSemantics == &semX87DoubleExtended &&
2326 &toSemantics != &semX87DoubleExtended && category == fcNaN &&
2327 (!(*significandParts() & 0x8000000000000000ULL) ||
2328 !(*significandParts() & 0x4000000000000000ULL))) {
2329 // x86 has some unusual NaNs which cannot be represented in any other
2330 // format; note them here.
2331 X86SpecialNan = true;
2332 }
2333
2334 // If this is a truncation of a denormal number, and the target semantics
2335 // has larger exponent range than the source semantics (this can happen
2336 // when truncating from PowerPC double-double to double format), the
2337 // right shift could lose result mantissa bits. Adjust exponent instead
2338 // of performing excessive shift.
2339 // Also do a similar trick in case shifting denormal would produce zero
2340 // significand as this case isn't handled correctly by normalize.
2341 if (shift < 0 && isFiniteNonZero()) {
2342 int omsb = significandMSB() + 1;
2343 int exponentChange = omsb - fromSemantics.precision;
2344 if (exponent + exponentChange < toSemantics.minExponent)
2345 exponentChange = toSemantics.minExponent - exponent;
2346 if (exponentChange < shift)
2347 exponentChange = shift;
2348 if (exponentChange < 0) {
2349 shift -= exponentChange;
2350 exponent += exponentChange;
2351 } else if (omsb <= -shift) {
2352 exponentChange = omsb + shift - 1; // leave at least one bit set
2353 shift -= exponentChange;
2354 exponent += exponentChange;
2355 }
2356 }
2357
2358 // If this is a truncation, perform the shift before we narrow the storage.
2359 if (shift < 0 && (isFiniteNonZero() ||
2360 (category == fcNaN && semantics->nonFiniteBehavior !=
2362 lostFraction = shiftRight(significandParts(), oldPartCount, -shift);
2363
2364 // Fix the storage so it can hold to new value.
2365 if (newPartCount > oldPartCount) {
2366 // The new type requires more storage; make it available.
2367 integerPart *newParts;
2368 newParts = new integerPart[newPartCount];
2369 APInt::tcSet(newParts, 0, newPartCount);
2370 if (isFiniteNonZero() || category==fcNaN)
2371 APInt::tcAssign(newParts, significandParts(), oldPartCount);
2372 freeSignificand();
2373 significand.parts = newParts;
2374 } else if (newPartCount == 1 && oldPartCount != 1) {
2375 // Switch to built-in storage for a single part.
2376 integerPart newPart = 0;
2377 if (isFiniteNonZero() || category==fcNaN)
2378 newPart = significandParts()[0];
2379 freeSignificand();
2380 significand.part = newPart;
2381 }
2382
2383 // Now that we have the right storage, switch the semantics.
2384 semantics = &toSemantics;
2385
2386 // If this is an extension, perform the shift now that the storage is
2387 // available.
2388 if (shift > 0 && (isFiniteNonZero() || category==fcNaN))
2389 APInt::tcShiftLeft(significandParts(), newPartCount, shift);
2390
2391 if (isFiniteNonZero()) {
2392 fs = normalize(rounding_mode, lostFraction);
2393 *losesInfo = (fs != opOK);
2394 } else if (category == fcNaN) {
2396 *losesInfo =
2398 makeNaN(false, sign);
2399 return is_signaling ? opInvalidOp : opOK;
2400 }
2401
2402 *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan;
2403
2404 // For x87 extended precision, we want to make a NaN, not a special NaN if
2405 // the input wasn't special either.
2406 if (!X86SpecialNan && semantics == &semX87DoubleExtended)
2407 APInt::tcSetBit(significandParts(), semantics->precision - 1);
2408
2409 // Convert of sNaN creates qNaN and raises an exception (invalid op).
2410 // This also guarantees that a sNaN does not become Inf on a truncation
2411 // that loses all payload bits.
2412 if (is_signaling) {
2413 makeQuiet();
2414 fs = opInvalidOp;
2415 } else {
2416 fs = opOK;
2417 }
2418 } else if (category == fcInfinity &&
2420 makeNaN(false, sign);
2421 *losesInfo = true;
2422 fs = opInexact;
2423 } else {
2424 *losesInfo = false;
2425 fs = opOK;
2426 }
2427
2428 return fs;
2429}
2430
2431/* Convert a floating point number to an integer according to the
2432 rounding mode. If the rounded integer value is out of range this
2433 returns an invalid operation exception and the contents of the
2434 destination parts are unspecified. If the rounded value is in
2435 range but the floating point number is not the exact integer, the C
2436 standard doesn't require an inexact exception to be raised. IEEE
2437 854 does require it so we do that.
2438
2439 Note that for conversions to integer type the C standard requires
2440 round-to-zero to always be used. */
2441IEEEFloat::opStatus IEEEFloat::convertToSignExtendedInteger(
2442 MutableArrayRef<integerPart> parts, unsigned int width, bool isSigned,
2443 roundingMode rounding_mode, bool *isExact) const {
2444 lostFraction lost_fraction;
2445 const integerPart *src;
2446 unsigned int dstPartsCount, truncatedBits;
2447
2448 *isExact = false;
2449
2450 /* Handle the three special cases first. */
2451 if (category == fcInfinity || category == fcNaN)
2452 return opInvalidOp;
2453
2454 dstPartsCount = partCountForBits(width);
2455 assert(dstPartsCount <= parts.size() && "Integer too big");
2456
2457 if (category == fcZero) {
2458 APInt::tcSet(parts.data(), 0, dstPartsCount);
2459 // Negative zero can't be represented as an int.
2460 *isExact = !sign;
2461 return opOK;
2462 }
2463
2464 src = significandParts();
2465
2466 /* Step 1: place our absolute value, with any fraction truncated, in
2467 the destination. */
2468 if (exponent < 0) {
2469 /* Our absolute value is less than one; truncate everything. */
2470 APInt::tcSet(parts.data(), 0, dstPartsCount);
2471 /* For exponent -1 the integer bit represents .5, look at that.
2472 For smaller exponents leftmost truncated bit is 0. */
2473 truncatedBits = semantics->precision -1U - exponent;
2474 } else {
2475 /* We want the most significant (exponent + 1) bits; the rest are
2476 truncated. */
2477 unsigned int bits = exponent + 1U;
2478
2479 /* Hopelessly large in magnitude? */
2480 if (bits > width)
2481 return opInvalidOp;
2482
2483 if (bits < semantics->precision) {
2484 /* We truncate (semantics->precision - bits) bits. */
2485 truncatedBits = semantics->precision - bits;
2486 APInt::tcExtract(parts.data(), dstPartsCount, src, bits, truncatedBits);
2487 } else {
2488 /* We want at least as many bits as are available. */
2489 APInt::tcExtract(parts.data(), dstPartsCount, src, semantics->precision,
2490 0);
2491 APInt::tcShiftLeft(parts.data(), dstPartsCount,
2492 bits - semantics->precision);
2493 truncatedBits = 0;
2494 }
2495 }
2496
2497 /* Step 2: work out any lost fraction, and increment the absolute
2498 value if we would round away from zero. */
2499 if (truncatedBits) {
2500 lost_fraction = lostFractionThroughTruncation(src, partCount(),
2501 truncatedBits);
2502 if (lost_fraction != lfExactlyZero &&
2503 roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) {
2504 if (APInt::tcIncrement(parts.data(), dstPartsCount))
2505 return opInvalidOp; /* Overflow. */
2506 }
2507 } else {
2508 lost_fraction = lfExactlyZero;
2509 }
2510
2511 /* Step 3: check if we fit in the destination. */
2512 unsigned int omsb = APInt::tcMSB(parts.data(), dstPartsCount) + 1;
2513
2514 if (sign) {
2515 if (!isSigned) {
2516 /* Negative numbers cannot be represented as unsigned. */
2517 if (omsb != 0)
2518 return opInvalidOp;
2519 } else {
2520 /* It takes omsb bits to represent the unsigned integer value.
2521 We lose a bit for the sign, but care is needed as the
2522 maximally negative integer is a special case. */
2523 if (omsb == width &&
2524 APInt::tcLSB(parts.data(), dstPartsCount) + 1 != omsb)
2525 return opInvalidOp;
2526
2527 /* This case can happen because of rounding. */
2528 if (omsb > width)
2529 return opInvalidOp;
2530 }
2531
2532 APInt::tcNegate (parts.data(), dstPartsCount);
2533 } else {
2534 if (omsb >= width + !isSigned)
2535 return opInvalidOp;
2536 }
2537
2538 if (lost_fraction == lfExactlyZero) {
2539 *isExact = true;
2540 return opOK;
2541 } else
2542 return opInexact;
2543}
2544
2545/* Same as convertToSignExtendedInteger, except we provide
2546 deterministic values in case of an invalid operation exception,
2547 namely zero for NaNs and the minimal or maximal value respectively
2548 for underflow or overflow.
2549 The *isExact output tells whether the result is exact, in the sense
2550 that converting it back to the original floating point type produces
2551 the original value. This is almost equivalent to result==opOK,
2552 except for negative zeroes.
2553*/
2556 unsigned int width, bool isSigned,
2557 roundingMode rounding_mode, bool *isExact) const {
2558 opStatus fs;
2559
2560 fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode,
2561 isExact);
2562
2563 if (fs == opInvalidOp) {
2564 unsigned int bits, dstPartsCount;
2565
2566 dstPartsCount = partCountForBits(width);
2567 assert(dstPartsCount <= parts.size() && "Integer too big");
2568
2569 if (category == fcNaN)
2570 bits = 0;
2571 else if (sign)
2572 bits = isSigned;
2573 else
2574 bits = width - isSigned;
2575
2576 tcSetLeastSignificantBits(parts.data(), dstPartsCount, bits);
2577 if (sign && isSigned)
2578 APInt::tcShiftLeft(parts.data(), dstPartsCount, width - 1);
2579 }
2580
2581 return fs;
2582}
2583
2584/* Convert an unsigned integer SRC to a floating point number,
2585 rounding according to ROUNDING_MODE. The sign of the floating
2586 point number is not modified. */
2587IEEEFloat::opStatus IEEEFloat::convertFromUnsignedParts(
2588 const integerPart *src, unsigned int srcCount, roundingMode rounding_mode) {
2589 unsigned int omsb, precision, dstCount;
2590 integerPart *dst;
2591 lostFraction lost_fraction;
2592
2593 category = fcNormal;
2594 omsb = APInt::tcMSB(src, srcCount) + 1;
2595 dst = significandParts();
2596 dstCount = partCount();
2597 precision = semantics->precision;
2598
2599 /* We want the most significant PRECISION bits of SRC. There may not
2600 be that many; extract what we can. */
2601 if (precision <= omsb) {
2602 exponent = omsb - 1;
2603 lost_fraction = lostFractionThroughTruncation(src, srcCount,
2604 omsb - precision);
2605 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);
2606 } else {
2607 exponent = precision - 1;
2608 lost_fraction = lfExactlyZero;
2609 APInt::tcExtract(dst, dstCount, src, omsb, 0);
2610 }
2611
2612 return normalize(rounding_mode, lost_fraction);
2613}
2614
2616 roundingMode rounding_mode) {
2617 unsigned int partCount = Val.getNumWords();
2618 APInt api = Val;
2619
2620 sign = false;
2621 if (isSigned && api.isNegative()) {
2622 sign = true;
2623 api = -api;
2624 }
2625
2626 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2627}
2628
2629/* Convert a two's complement integer SRC to a floating point number,
2630 rounding according to ROUNDING_MODE. ISSIGNED is true if the
2631 integer is signed, in which case it must be sign-extended. */
2634 unsigned int srcCount, bool isSigned,
2635 roundingMode rounding_mode) {
2636 opStatus status;
2637
2638 if (isSigned &&
2639 APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) {
2641
2642 /* If we're signed and negative negate a copy. */
2643 sign = true;
2644 copy = new integerPart[srcCount];
2645 APInt::tcAssign(copy, src, srcCount);
2646 APInt::tcNegate(copy, srcCount);
2647 status = convertFromUnsignedParts(copy, srcCount, rounding_mode);
2648 delete [] copy;
2649 } else {
2650 sign = false;
2651 status = convertFromUnsignedParts(src, srcCount, rounding_mode);
2652 }
2653
2654 return status;
2655}
2656
2657/* FIXME: should this just take a const APInt reference? */
2660 unsigned int width, bool isSigned,
2661 roundingMode rounding_mode) {
2662 unsigned int partCount = partCountForBits(width);
2663 APInt api = APInt(width, ArrayRef(parts, partCount));
2664
2665 sign = false;
2666 if (isSigned && APInt::tcExtractBit(parts, width - 1)) {
2667 sign = true;
2668 api = -api;
2669 }
2670
2671 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2672}
2673
2675IEEEFloat::convertFromHexadecimalString(StringRef s,
2676 roundingMode rounding_mode) {
2677 lostFraction lost_fraction = lfExactlyZero;
2678
2679 category = fcNormal;
2680 zeroSignificand();
2681 exponent = 0;
2682
2683 integerPart *significand = significandParts();
2684 unsigned partsCount = partCount();
2685 unsigned bitPos = partsCount * integerPartWidth;
2686 bool computedTrailingFraction = false;
2687
2688 // Skip leading zeroes and any (hexa)decimal point.
2689 StringRef::iterator begin = s.begin();
2690 StringRef::iterator end = s.end();
2692 auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot);
2693 if (!PtrOrErr)
2694 return PtrOrErr.takeError();
2695 StringRef::iterator p = *PtrOrErr;
2696 StringRef::iterator firstSignificantDigit = p;
2697
2698 while (p != end) {
2699 integerPart hex_value;
2700
2701 if (*p == '.') {
2702 if (dot != end)
2703 return createError("String contains multiple dots");
2704 dot = p++;
2705 continue;
2706 }
2707
2708 hex_value = hexDigitValue(*p);
2709 if (hex_value == -1U)
2710 break;
2711
2712 p++;
2713
2714 // Store the number while we have space.
2715 if (bitPos) {
2716 bitPos -= 4;
2717 hex_value <<= bitPos % integerPartWidth;
2718 significand[bitPos / integerPartWidth] |= hex_value;
2719 } else if (!computedTrailingFraction) {
2720 auto FractOrErr = trailingHexadecimalFraction(p, end, hex_value);
2721 if (!FractOrErr)
2722 return FractOrErr.takeError();
2723 lost_fraction = *FractOrErr;
2724 computedTrailingFraction = true;
2725 }
2726 }
2727
2728 /* Hex floats require an exponent but not a hexadecimal point. */
2729 if (p == end)
2730 return createError("Hex strings require an exponent");
2731 if (*p != 'p' && *p != 'P')
2732 return createError("Invalid character in significand");
2733 if (p == begin)
2734 return createError("Significand has no digits");
2735 if (dot != end && p - begin == 1)
2736 return createError("Significand has no digits");
2737
2738 /* Ignore the exponent if we are zero. */
2739 if (p != firstSignificantDigit) {
2740 int expAdjustment;
2741
2742 /* Implicit hexadecimal point? */
2743 if (dot == end)
2744 dot = p;
2745
2746 /* Calculate the exponent adjustment implicit in the number of
2747 significant digits. */
2748 expAdjustment = static_cast<int>(dot - firstSignificantDigit);
2749 if (expAdjustment < 0)
2750 expAdjustment++;
2751 expAdjustment = expAdjustment * 4 - 1;
2752
2753 /* Adjust for writing the significand starting at the most
2754 significant nibble. */
2755 expAdjustment += semantics->precision;
2756 expAdjustment -= partsCount * integerPartWidth;
2757
2758 /* Adjust for the given exponent. */
2759 auto ExpOrErr = totalExponent(p + 1, end, expAdjustment);
2760 if (!ExpOrErr)
2761 return ExpOrErr.takeError();
2762 exponent = *ExpOrErr;
2763 }
2764
2765 return normalize(rounding_mode, lost_fraction);
2766}
2767
2769IEEEFloat::roundSignificandWithExponent(const integerPart *decSigParts,
2770 unsigned sigPartCount, int exp,
2771 roundingMode rounding_mode) {
2772 unsigned int parts, pow5PartCount;
2773 fltSemantics calcSemantics = { 32767, -32767, 0, 0 };
2775 bool isNearest;
2776
2777 isNearest = (rounding_mode == rmNearestTiesToEven ||
2778 rounding_mode == rmNearestTiesToAway);
2779
2780 parts = partCountForBits(semantics->precision + 11);
2781
2782 /* Calculate pow(5, abs(exp)). */
2783 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp);
2784
2785 for (;; parts *= 2) {
2786 opStatus sigStatus, powStatus;
2787 unsigned int excessPrecision, truncatedBits;
2788
2789 calcSemantics.precision = parts * integerPartWidth - 1;
2790 excessPrecision = calcSemantics.precision - semantics->precision;
2791 truncatedBits = excessPrecision;
2792
2793 IEEEFloat decSig(calcSemantics, uninitialized);
2794 decSig.makeZero(sign);
2795 IEEEFloat pow5(calcSemantics);
2796
2797 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount,
2799 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
2801 /* Add exp, as 10^n = 5^n * 2^n. */
2802 decSig.exponent += exp;
2803
2804 lostFraction calcLostFraction;
2805 integerPart HUerr, HUdistance;
2806 unsigned int powHUerr;
2807
2808 if (exp >= 0) {
2809 /* multiplySignificand leaves the precision-th bit set to 1. */
2810 calcLostFraction = decSig.multiplySignificand(pow5);
2811 powHUerr = powStatus != opOK;
2812 } else {
2813 calcLostFraction = decSig.divideSignificand(pow5);
2814 /* Denormal numbers have less precision. */
2815 if (decSig.exponent < semantics->minExponent) {
2816 excessPrecision += (semantics->minExponent - decSig.exponent);
2817 truncatedBits = excessPrecision;
2818 if (excessPrecision > calcSemantics.precision)
2819 excessPrecision = calcSemantics.precision;
2820 }
2821 /* Extra half-ulp lost in reciprocal of exponent. */
2822 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2;
2823 }
2824
2825 /* Both multiplySignificand and divideSignificand return the
2826 result with the integer bit set. */
2828 (decSig.significandParts(), calcSemantics.precision - 1) == 1);
2829
2830 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
2831 powHUerr);
2832 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
2833 excessPrecision, isNearest);
2834
2835 /* Are we guaranteed to round correctly if we truncate? */
2836 if (HUdistance >= HUerr) {
2837 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
2838 calcSemantics.precision - excessPrecision,
2839 excessPrecision);
2840 /* Take the exponent of decSig. If we tcExtract-ed less bits
2841 above we must adjust our exponent to compensate for the
2842 implicit right shift. */
2843 exponent = (decSig.exponent + semantics->precision
2844 - (calcSemantics.precision - excessPrecision));
2845 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
2846 decSig.partCount(),
2847 truncatedBits);
2848 return normalize(rounding_mode, calcLostFraction);
2849 }
2850 }
2851}
2852
2854IEEEFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) {
2855 decimalInfo D;
2856 opStatus fs;
2857
2858 /* Scan the text. */
2859 StringRef::iterator p = str.begin();
2860 if (Error Err = interpretDecimal(p, str.end(), &D))
2861 return std::move(Err);
2862
2863 /* Handle the quick cases. First the case of no significant digits,
2864 i.e. zero, and then exponents that are obviously too large or too
2865 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp
2866 definitely overflows if
2867
2868 (exp - 1) * L >= maxExponent
2869
2870 and definitely underflows to zero where
2871
2872 (exp + 1) * L <= minExponent - precision
2873
2874 With integer arithmetic the tightest bounds for L are
2875
2876 93/28 < L < 196/59 [ numerator <= 256 ]
2877 42039/12655 < L < 28738/8651 [ numerator <= 65536 ]
2878 */
2879
2880 // Test if we have a zero number allowing for strings with no null terminators
2881 // and zero decimals with non-zero exponents.
2882 //
2883 // We computed firstSigDigit by ignoring all zeros and dots. Thus if
2884 // D->firstSigDigit equals str.end(), every digit must be a zero and there can
2885 // be at most one dot. On the other hand, if we have a zero with a non-zero
2886 // exponent, then we know that D.firstSigDigit will be non-numeric.
2887 if (D.firstSigDigit == str.end() || decDigitValue(*D.firstSigDigit) >= 10U) {
2888 category = fcZero;
2889 fs = opOK;
2890
2891 /* Check whether the normalized exponent is high enough to overflow
2892 max during the log-rebasing in the max-exponent check below. */
2893 } else if (D.normalizedExponent - 1 > INT_MAX / 42039) {
2894 fs = handleOverflow(rounding_mode);
2895
2896 /* If it wasn't, then it also wasn't high enough to overflow max
2897 during the log-rebasing in the min-exponent check. Check that it
2898 won't overflow min in either check, then perform the min-exponent
2899 check. */
2900 } else if (D.normalizedExponent - 1 < INT_MIN / 42039 ||
2901 (D.normalizedExponent + 1) * 28738 <=
2902 8651 * (semantics->minExponent - (int) semantics->precision)) {
2903 /* Underflow to zero and round. */
2904 category = fcNormal;
2905 zeroSignificand();
2906 fs = normalize(rounding_mode, lfLessThanHalf);
2907
2908 /* We can finally safely perform the max-exponent check. */
2909 } else if ((D.normalizedExponent - 1) * 42039
2910 >= 12655 * semantics->maxExponent) {
2911 /* Overflow and round. */
2912 fs = handleOverflow(rounding_mode);
2913 } else {
2914 integerPart *decSignificand;
2915 unsigned int partCount;
2916
2917 /* A tight upper bound on number of bits required to hold an
2918 N-digit decimal integer is N * 196 / 59. Allocate enough space
2919 to hold the full significand, and an extra part required by
2920 tcMultiplyPart. */
2921 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1;
2922 partCount = partCountForBits(1 + 196 * partCount / 59);
2923 decSignificand = new integerPart[partCount + 1];
2924 partCount = 0;
2925
2926 /* Convert to binary efficiently - we do almost all multiplication
2927 in an integerPart. When this would overflow do we do a single
2928 bignum multiplication, and then revert again to multiplication
2929 in an integerPart. */
2930 do {
2931 integerPart decValue, val, multiplier;
2932
2933 val = 0;
2934 multiplier = 1;
2935
2936 do {
2937 if (*p == '.') {
2938 p++;
2939 if (p == str.end()) {
2940 break;
2941 }
2942 }
2943 decValue = decDigitValue(*p++);
2944 if (decValue >= 10U) {
2945 delete[] decSignificand;
2946 return createError("Invalid character in significand");
2947 }
2948 multiplier *= 10;
2949 val = val * 10 + decValue;
2950 /* The maximum number that can be multiplied by ten with any
2951 digit added without overflowing an integerPart. */
2952 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);
2953
2954 /* Multiply out the current part. */
2955 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
2956 partCount, partCount + 1, false);
2957
2958 /* If we used another part (likely but not guaranteed), increase
2959 the count. */
2960 if (decSignificand[partCount])
2961 partCount++;
2962 } while (p <= D.lastSigDigit);
2963
2964 category = fcNormal;
2965 fs = roundSignificandWithExponent(decSignificand, partCount,
2966 D.exponent, rounding_mode);
2967
2968 delete [] decSignificand;
2969 }
2970
2971 return fs;
2972}
2973
2974bool IEEEFloat::convertFromStringSpecials(StringRef str) {
2975 const size_t MIN_NAME_SIZE = 3;
2976
2977 if (str.size() < MIN_NAME_SIZE)
2978 return false;
2979
2980 if (str.equals("inf") || str.equals("INFINITY") || str.equals("+Inf")) {
2981 makeInf(false);
2982 return true;
2983 }
2984
2985 bool IsNegative = str.front() == '-';
2986 if (IsNegative) {
2987 str = str.drop_front();
2988 if (str.size() < MIN_NAME_SIZE)
2989 return false;
2990
2991 if (str.equals("inf") || str.equals("INFINITY") || str.equals("Inf")) {
2992 makeInf(true);
2993 return true;
2994 }
2995 }
2996
2997 // If we have a 's' (or 'S') prefix, then this is a Signaling NaN.
2998 bool IsSignaling = str.front() == 's' || str.front() == 'S';
2999 if (IsSignaling) {
3000 str = str.drop_front();
3001 if (str.size() < MIN_NAME_SIZE)
3002 return false;
3003 }
3004
3005 if (str.startswith("nan") || str.startswith("NaN")) {
3006 str = str.drop_front(3);
3007
3008 // A NaN without payload.
3009 if (str.empty()) {
3010 makeNaN(IsSignaling, IsNegative);
3011 return true;
3012 }
3013
3014 // Allow the payload to be inside parentheses.
3015 if (str.front() == '(') {
3016 // Parentheses should be balanced (and not empty).
3017 if (str.size() <= 2 || str.back() != ')')
3018 return false;
3019
3020 str = str.slice(1, str.size() - 1);
3021 }
3022
3023 // Determine the payload number's radix.
3024 unsigned Radix = 10;
3025 if (str[0] == '0') {
3026 if (str.size() > 1 && tolower(str[1]) == 'x') {
3027 str = str.drop_front(2);
3028 Radix = 16;
3029 } else
3030 Radix = 8;
3031 }
3032
3033 // Parse the payload and make the NaN.
3034 APInt Payload;
3035 if (!str.getAsInteger(Radix, Payload)) {
3036 makeNaN(IsSignaling, IsNegative, &Payload);
3037 return true;
3038 }
3039 }
3040
3041 return false;
3042}
3043
3046 if (str.empty())
3047 return createError("Invalid string length");
3048
3049 // Handle special cases.
3050 if (convertFromStringSpecials(str))
3051 return opOK;
3052
3053 /* Handle a leading minus sign. */
3054 StringRef::iterator p = str.begin();
3055 size_t slen = str.size();
3056 sign = *p == '-' ? 1 : 0;
3057 if (*p == '-' || *p == '+') {
3058 p++;
3059 slen--;
3060 if (!slen)
3061 return createError("String has no digits");
3062 }
3063
3064 if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
3065 if (slen == 2)
3066 return createError("Invalid string");
3067 return convertFromHexadecimalString(StringRef(p + 2, slen - 2),
3068 rounding_mode);
3069 }
3070
3071 return convertFromDecimalString(StringRef(p, slen), rounding_mode);
3072}
3073
3074/* Write out a hexadecimal representation of the floating point value
3075 to DST, which must be of sufficient size, in the C99 form
3076 [-]0xh.hhhhp[+-]d. Return the number of characters written,
3077 excluding the terminating NUL.
3078
3079 If UPPERCASE, the output is in upper case, otherwise in lower case.
3080
3081 HEXDIGITS digits appear altogether, rounding the value if
3082 necessary. If HEXDIGITS is 0, the minimal precision to display the
3083 number precisely is used instead. If nothing would appear after
3084 the decimal point it is suppressed.
3085
3086 The decimal exponent is always printed and has at least one digit.
3087 Zero values display an exponent of zero. Infinities and NaNs
3088 appear as "infinity" or "nan" respectively.
3089
3090 The above rules are as specified by C99. There is ambiguity about
3091 what the leading hexadecimal digit should be. This implementation
3092 uses whatever is necessary so that the exponent is displayed as
3093 stored. This implies the exponent will fall within the IEEE format
3094 range, and the leading hexadecimal digit will be 0 (for denormals),
3095 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
3096 any other digits zero).
3097*/
3098unsigned int IEEEFloat::convertToHexString(char *dst, unsigned int hexDigits,
3099 bool upperCase,
3100 roundingMode rounding_mode) const {
3101 char *p;
3102
3103 p = dst;
3104 if (sign)
3105 *dst++ = '-';
3106
3107 switch (category) {
3108 case fcInfinity:
3109 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);
3110 dst += sizeof infinityL - 1;
3111 break;
3112
3113 case fcNaN:
3114 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);
3115 dst += sizeof NaNU - 1;
3116 break;
3117
3118 case fcZero:
3119 *dst++ = '0';
3120 *dst++ = upperCase ? 'X': 'x';
3121 *dst++ = '0';
3122 if (hexDigits > 1) {
3123 *dst++ = '.';
3124 memset (dst, '0', hexDigits - 1);
3125 dst += hexDigits - 1;
3126 }
3127 *dst++ = upperCase ? 'P': 'p';
3128 *dst++ = '0';
3129 break;
3130
3131 case fcNormal:
3132 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
3133 break;
3134 }
3135
3136 *dst = 0;
3137
3138 return static_cast<unsigned int>(dst - p);
3139}
3140
3141/* Does the hard work of outputting the correctly rounded hexadecimal
3142 form of a normal floating point number with the specified number of
3143 hexadecimal digits. If HEXDIGITS is zero the minimum number of
3144 digits necessary to print the value precisely is output. */
3145char *IEEEFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
3146 bool upperCase,
3147 roundingMode rounding_mode) const {
3148 unsigned int count, valueBits, shift, partsCount, outputDigits;
3149 const char *hexDigitChars;
3150 const integerPart *significand;
3151 char *p;
3152 bool roundUp;
3153
3154 *dst++ = '0';
3155 *dst++ = upperCase ? 'X': 'x';
3156
3157 roundUp = false;
3158 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower;
3159
3160 significand = significandParts();
3161 partsCount = partCount();
3162
3163 /* +3 because the first digit only uses the single integer bit, so
3164 we have 3 virtual zero most-significant-bits. */
3165 valueBits = semantics->precision + 3;
3166 shift = integerPartWidth - valueBits % integerPartWidth;
3167
3168 /* The natural number of digits required ignoring trailing
3169 insignificant zeroes. */
3170 outputDigits = (valueBits - significandLSB () + 3) / 4;
3171
3172 /* hexDigits of zero means use the required number for the
3173 precision. Otherwise, see if we are truncating. If we are,
3174 find out if we need to round away from zero. */
3175 if (hexDigits) {
3176 if (hexDigits < outputDigits) {
3177 /* We are dropping non-zero bits, so need to check how to round.
3178 "bits" is the number of dropped bits. */
3179 unsigned int bits;
3180 lostFraction fraction;
3181
3182 bits = valueBits - hexDigits * 4;
3183 fraction = lostFractionThroughTruncation (significand, partsCount, bits);
3184 roundUp = roundAwayFromZero(rounding_mode, fraction, bits);
3185 }
3186 outputDigits = hexDigits;
3187 }
3188
3189 /* Write the digits consecutively, and start writing in the location
3190 of the hexadecimal point. We move the most significant digit
3191 left and add the hexadecimal point later. */
3192 p = ++dst;
3193
3194 count = (valueBits + integerPartWidth - 1) / integerPartWidth;
3195
3196 while (outputDigits && count) {
3197 integerPart part;
3198
3199 /* Put the most significant integerPartWidth bits in "part". */
3200 if (--count == partsCount)
3201 part = 0; /* An imaginary higher zero part. */
3202 else
3203 part = significand[count] << shift;
3204
3205 if (count && shift)
3206 part |= significand[count - 1] >> (integerPartWidth - shift);
3207
3208 /* Convert as much of "part" to hexdigits as we can. */
3209 unsigned int curDigits = integerPartWidth / 4;
3210
3211 if (curDigits > outputDigits)
3212 curDigits = outputDigits;
3213 dst += partAsHex (dst, part, curDigits, hexDigitChars);
3214 outputDigits -= curDigits;
3215 }
3216
3217 if (roundUp) {
3218 char *q = dst;
3219
3220 /* Note that hexDigitChars has a trailing '0'. */
3221 do {
3222 q--;
3223 *q = hexDigitChars[hexDigitValue (*q) + 1];
3224 } while (*q == '0');
3225 assert(q >= p);
3226 } else {
3227 /* Add trailing zeroes. */
3228 memset (dst, '0', outputDigits);
3229 dst += outputDigits;
3230 }
3231
3232 /* Move the most significant digit to before the point, and if there
3233 is something after the decimal point add it. This must come
3234 after rounding above. */
3235 p[-1] = p[0];
3236 if (dst -1 == p)
3237 dst--;
3238 else
3239 p[0] = '.';
3240
3241 /* Finally output the exponent. */
3242 *dst++ = upperCase ? 'P': 'p';
3243
3244 return writeSignedDecimal (dst, exponent);
3245}
3246
3248 if (!Arg.isFiniteNonZero())
3249 return hash_combine((uint8_t)Arg.category,
3250 // NaN has no sign, fix it at zero.
3251 Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign,
3252 Arg.semantics->precision);
3253
3254 // Normal floats need their exponent and significand hashed.
3255 return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign,
3256 Arg.semantics->precision, Arg.exponent,
3258 Arg.significandParts(),
3259 Arg.significandParts() + Arg.partCount()));
3260}
3261
3262// Conversion from APFloat to/from host float/double. It may eventually be
3263// possible to eliminate these and have everybody deal with APFloats, but that
3264// will take a while. This approach will not easily extend to long double.
3265// Current implementation requires integerPartWidth==64, which is correct at
3266// the moment but could be made more general.
3267
3268// Denormals have exponent minExponent in APFloat, but minExponent-1 in
3269// the actual IEEE respresentations. We compensate for that here.
3270
3271APInt IEEEFloat::convertF80LongDoubleAPFloatToAPInt() const {
3272 assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended);
3273 assert(partCount()==2);
3274
3275 uint64_t myexponent, mysignificand;
3276
3277 if (isFiniteNonZero()) {
3278 myexponent = exponent+16383; //bias
3279 mysignificand = significandParts()[0];
3280 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
3281 myexponent = 0; // denormal
3282 } else if (category==fcZero) {
3283 myexponent = 0;
3284 mysignificand = 0;
3285 } else if (category==fcInfinity) {
3286 myexponent = 0x7fff;
3287 mysignificand = 0x8000000000000000ULL;
3288 } else {
3289 assert(category == fcNaN && "Unknown category");
3290 myexponent = 0x7fff;
3291 mysignificand = significandParts()[0];
3292 }
3293
3294 uint64_t words[2];
3295 words[0] = mysignificand;
3296 words[1] = ((uint64_t)(sign & 1) << 15) |
3297 (myexponent & 0x7fffLL);
3298 return APInt(80, words);
3299}
3300
3301APInt IEEEFloat::convertPPCDoubleDoubleAPFloatToAPInt() const {
3302 assert(semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy);
3303 assert(partCount()==2);
3304
3305 uint64_t words[2];
3306 opStatus fs;
3307 bool losesInfo;
3308
3309 // Convert number to double. To avoid spurious underflows, we re-
3310 // normalize against the "double" minExponent first, and only *then*
3311 // truncate the mantissa. The result of that second conversion
3312 // may be inexact, but should never underflow.
3313 // Declare fltSemantics before APFloat that uses it (and
3314 // saves pointer to it) to ensure correct destruction order.
3315 fltSemantics extendedSemantics = *semantics;
3316 extendedSemantics.minExponent = semIEEEdouble.minExponent;
3317 IEEEFloat extended(*this);
3318 fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
3319 assert(fs == opOK && !losesInfo);
3320 (void)fs;
3321
3322 IEEEFloat u(extended);
3323 fs = u.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo);
3324 assert(fs == opOK || fs == opInexact);
3325 (void)fs;
3326 words[0] = *u.convertDoubleAPFloatToAPInt().getRawData();
3327
3328 // If conversion was exact or resulted in a special case, we're done;
3329 // just set the second double to zero. Otherwise, re-convert back to
3330 // the extended format and compute the difference. This now should
3331 // convert exactly to double.
3332 if (u.isFiniteNonZero() && losesInfo) {
3333 fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
3334 assert(fs == opOK && !losesInfo);
3335 (void)fs;
3336
3337 IEEEFloat v(extended);
3338 v.subtract(u, rmNearestTiesToEven);
3339 fs = v.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo);
3340 assert(fs == opOK && !losesInfo);
3341 (void)fs;
3342 words[1] = *v.convertDoubleAPFloatToAPInt().getRawData();
3343 } else {
3344 words[1] = 0;
3345 }
3346
3347 return APInt(128, words);
3348}
3349
3350APInt IEEEFloat::convertQuadrupleAPFloatToAPInt() const {
3351 assert(semantics == (const llvm::fltSemantics*)&semIEEEquad);
3352 assert(partCount()==2);
3353
3354 uint64_t myexponent, mysignificand, mysignificand2;
3355
3356 if (isFiniteNonZero()) {
3357 myexponent = exponent+16383; //bias
3358 mysignificand = significandParts()[0];
3359 mysignificand2 = significandParts()[1];
3360 if (myexponent==1 && !(mysignificand2 & 0x1000000000000LL))
3361 myexponent = 0; // denormal
3362 } else if (category==fcZero) {
3363 myexponent = 0;
3364 mysignificand = mysignificand2 = 0;
3365 } else if (category==fcInfinity) {
3366 myexponent = 0x7fff;
3367 mysignificand = mysignificand2 = 0;
3368 } else {
3369 assert(category == fcNaN && "Unknown category!");
3370 myexponent = 0x7fff;
3371 mysignificand = significandParts()[0];
3372 mysignificand2 = significandParts()[1];
3373 }
3374
3375 uint64_t words[2];
3376 words[0] = mysignificand;
3377 words[1] = ((uint64_t)(sign & 1) << 63) |
3378 ((myexponent & 0x7fff) << 48) |
3379 (mysignificand2 & 0xffffffffffffLL);
3380
3381 return APInt(128, words);
3382}
3383
3384APInt IEEEFloat::convertDoubleAPFloatToAPInt() const {
3385 assert(semantics == (const llvm::fltSemantics*)&semIEEEdouble);
3386 assert(partCount()==1);
3387
3388 uint64_t myexponent, mysignificand;
3389
3390 if (isFiniteNonZero()) {
3391 myexponent = exponent+1023; //bias
3392 mysignificand = *significandParts();
3393 if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
3394 myexponent = 0; // denormal
3395 } else if (category==fcZero) {
3396 myexponent = 0;
3397 mysignificand = 0;
3398 } else if (category==fcInfinity) {
3399 myexponent = 0x7ff;
3400 mysignificand = 0;
3401 } else {
3402 assert(category == fcNaN && "Unknown category!");
3403 myexponent = 0x7ff;
3404 mysignificand = *significandParts();
3405 }
3406
3407 return APInt(64, ((((uint64_t)(sign & 1) << 63) |
3408 ((myexponent & 0x7ff) << 52) |
3409 (mysignificand & 0xfffffffffffffLL))));
3410}
3411
3412APInt IEEEFloat::convertFloatAPFloatToAPInt() const {
3413 assert(semantics == (const llvm::fltSemantics*)&semIEEEsingle);
3414 assert(partCount()==1);
3415
3416 uint32_t myexponent, mysignificand;
3417
3418 if (isFiniteNonZero()) {
3419 myexponent = exponent+127; //bias
3420 mysignificand = (uint32_t)*significandParts();
3421 if (myexponent == 1 && !(mysignificand & 0x800000))
3422 myexponent = 0; // denormal
3423 } else if (category==fcZero) {
3424 myexponent = 0;
3425 mysignificand = 0;
3426 } else if (category==fcInfinity) {
3427 myexponent = 0xff;
3428 mysignificand = 0;
3429 } else {
3430 assert(category == fcNaN && "Unknown category!");
3431 myexponent = 0xff;
3432 mysignificand = (uint32_t)*significandParts();
3433 }
3434
3435 return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) |
3436 (mysignificand & 0x7fffff)));
3437}
3438
3439APInt IEEEFloat::convertBFloatAPFloatToAPInt() const {
3440 assert(semantics == (const llvm::fltSemantics *)&semBFloat);
3441 assert(partCount() == 1);
3442
3443 uint32_t myexponent, mysignificand;
3444
3445 if (isFiniteNonZero()) {
3446 myexponent = exponent + 127; // bias
3447 mysignificand = (uint32_t)*significandParts();
3448 if (myexponent == 1 && !(mysignificand & 0x80))
3449 myexponent = 0; // denormal
3450 } else if (category == fcZero) {
3451 myexponent = 0;
3452 mysignificand = 0;
3453 } else if (category == fcInfinity) {
3454 myexponent = 0xff;
3455 mysignificand = 0;
3456 } else {
3457 assert(category == fcNaN && "Unknown category!");
3458 myexponent = 0xff;
3459 mysignificand = (uint32_t)*significandParts();
3460 }
3461
3462 return APInt(16, (((sign & 1) << 15) | ((myexponent & 0xff) << 7) |
3463 (mysignificand & 0x7f)));
3464}
3465
3466APInt IEEEFloat::convertHalfAPFloatToAPInt() const {
3467 assert(semantics == (const llvm::fltSemantics*)&semIEEEhalf);
3468 assert(partCount()==1);
3469
3470 uint32_t myexponent, mysignificand;
3471
3472 if (isFiniteNonZero()) {
3473 myexponent = exponent+15; //bias
3474 mysignificand = (uint32_t)*significandParts();
3475 if (myexponent == 1 && !(mysignificand & 0x400))
3476 myexponent = 0; // denormal
3477 } else if (category==fcZero) {
3478 myexponent = 0;
3479 mysignificand = 0;
3480 } else if (category==fcInfinity) {
3481 myexponent = 0x1f;
3482 mysignificand = 0;
3483 } else {
3484 assert(category == fcNaN && "Unknown category!");
3485 myexponent = 0x1f;
3486 mysignificand = (uint32_t)*significandParts();
3487 }
3488
3489 return APInt(16, (((sign&1) << 15) | ((myexponent&0x1f) << 10) |
3490 (mysignificand & 0x3ff)));
3491}
3492
3493APInt IEEEFloat::convertFloat8E5M2APFloatToAPInt() const {
3494 assert(semantics == (const llvm::fltSemantics *)&semFloat8E5M2);
3495 assert(partCount() == 1);
3496
3497 uint32_t myexponent, mysignificand;
3498
3499 if (isFiniteNonZero()) {
3500 myexponent = exponent + 15; // bias
3501 mysignificand = (uint32_t)*significandParts();
3502 if (myexponent == 1 && !(mysignificand & 0x4))
3503 myexponent = 0; // denormal
3504 } else if (category == fcZero) {
3505 myexponent = 0;
3506 mysignificand = 0;
3507 } else if (category == fcInfinity) {
3508 myexponent = 0x1f;
3509 mysignificand = 0;
3510 } else {
3511 assert(category == fcNaN && "Unknown category!");
3512 myexponent = 0x1f;
3513 mysignificand = (uint32_t)*significandParts();
3514 }
3515
3516 return APInt(8, (((sign & 1) << 7) | ((myexponent & 0x1f) << 2) |
3517 (mysignificand & 0x3)));
3518}
3519
3520APInt IEEEFloat::convertFloat8E4M3FNAPFloatToAPInt() const {
3521 assert(semantics == (const llvm::fltSemantics *)&semFloat8E4M3FN);
3522 assert(partCount() == 1);
3523
3524 uint32_t myexponent, mysignificand;
3525
3526 if (isFiniteNonZero()) {
3527 myexponent = exponent + 7; // bias
3528 mysignificand = (uint32_t)*significandParts();
3529 if (myexponent == 1 && !(mysignificand & 0x8))
3530 myexponent = 0; // denormal
3531 } else if (category == fcZero) {
3532 myexponent = 0;
3533 mysignificand = 0;
3534 } else if (category == fcInfinity) {
3535 myexponent = 0xf;
3536 mysignificand = 0;
3537 } else {
3538 assert(category == fcNaN && "Unknown category!");
3539 myexponent = 0xf;
3540 mysignificand = (uint32_t)*significandParts();
3541 }
3542
3543 return APInt(8, (((sign & 1) << 7) | ((myexponent & 0xf) << 3) |
3544 (mysignificand & 0x7)));
3545}
3546
3547// This function creates an APInt that is just a bit map of the floating
3548// point constant as it would appear in memory. It is not a conversion,
3549// and treating the result as a normal integer is unlikely to be useful.
3550
3552 if (semantics == (const llvm::fltSemantics*)&semIEEEhalf)
3553 return convertHalfAPFloatToAPInt();
3554
3555 if (semantics == (const llvm::fltSemantics *)&semBFloat)
3556 return convertBFloatAPFloatToAPInt();
3557
3558 if (semantics == (const llvm::fltSemantics*)&semIEEEsingle)
3559 return convertFloatAPFloatToAPInt();
3560
3561 if (semantics == (const llvm::fltSemantics*)&semIEEEdouble)
3562 return convertDoubleAPFloatToAPInt();
3563
3564 if (semantics == (const llvm::fltSemantics*)&semIEEEquad)
3565 return convertQuadrupleAPFloatToAPInt();
3566
3567 if (semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy)
3568 return convertPPCDoubleDoubleAPFloatToAPInt();
3569
3570 if (semantics == (const llvm::fltSemantics *)&semFloat8E5M2)
3571 return convertFloat8E5M2APFloatToAPInt();
3572
3573 if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3FN)
3574 return convertFloat8E4M3FNAPFloatToAPInt();
3575
3576 assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended &&
3577 "unknown format!");
3578 return convertF80LongDoubleAPFloatToAPInt();
3579}
3580
3582 assert(semantics == (const llvm::fltSemantics*)&semIEEEsingle &&
3583 "Float semantics are not IEEEsingle");
3584 APInt api = bitcastToAPInt();
3585 return api.bitsToFloat();
3586}
3587
3589 assert(semantics == (const llvm::fltSemantics*)&semIEEEdouble &&
3590 "Float semantics are not IEEEdouble");
3591 APInt api = bitcastToAPInt();
3592 return api.bitsToDouble();
3593}
3594
3595/// Integer bit is explicit in this format. Intel hardware (387 and later)
3596/// does not support these bit patterns:
3597/// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
3598/// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
3599/// exponent!=0 nor all 1's, integer bit 0 ("unnormal")
3600/// exponent = 0, integer bit 1 ("pseudodenormal")
3601/// At the moment, the first three are treated as NaNs, the last one as Normal.
3602void IEEEFloat::initFromF80LongDoubleAPInt(const APInt &api) {
3603 uint64_t i1 = api.getRawData()[0];
3604 uint64_t i2 = api.getRawData()[1];
3605 uint64_t myexponent = (i2 & 0x7fff);
3606 uint64_t mysignificand = i1;
3607 uint8_t myintegerbit = mysignificand >> 63;
3608
3609 initialize(&semX87DoubleExtended);
3610 assert(partCount()==2);
3611
3612 sign = static_cast<unsigned int>(i2>>15);
3613 if (myexponent == 0 && mysignificand == 0) {
3614 makeZero(sign);
3615 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
3616 makeInf(sign);
3617 } else if ((myexponent == 0x7fff && mysignificand != 0x8000000000000000ULL) ||
3618 (myexponent != 0x7fff && myexponent != 0 && myintegerbit == 0)) {
3619 category = fcNaN;
3620 exponent = exponentNaN();
3621 significandParts()[0] = mysignificand;
3622 significandParts()[1] = 0;
3623 } else {
3624 category = fcNormal;
3625 exponent = myexponent - 16383;
3626 significandParts()[0] = mysignificand;
3627 significandParts()[1] = 0;
3628 if (myexponent==0) // denormal
3629 exponent = -16382;
3630 }
3631}
3632
3633void IEEEFloat::initFromPPCDoubleDoubleAPInt(const APInt &api) {
3634 uint64_t i1 = api.getRawData()[0];
3635 uint64_t i2 = api.getRawData()[1];
3636 opStatus fs;
3637 bool losesInfo;
3638
3639 // Get the first double and convert to our format.
3640 initFromDoubleAPInt(APInt(64, i1));
3642 assert(fs == opOK && !losesInfo);
3643 (void)fs;
3644
3645 // Unless we have a special case, add in second double.
3646 if (isFiniteNonZero()) {
3647 IEEEFloat v(semIEEEdouble, APInt(64, i2));
3648 fs = v.convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo);
3649 assert(fs == opOK && !losesInfo);
3650 (void)fs;
3651
3653 }
3654}
3655
3656void IEEEFloat::initFromQuadrupleAPInt(const APInt &api) {
3657 uint64_t i1 = api.getRawData()[0];
3658 uint64_t i2 = api.getRawData()[1];
3659 uint64_t myexponent = (i2 >> 48) & 0x7fff;
3660 uint64_t mysignificand = i1;
3661 uint64_t mysignificand2 = i2 & 0xffffffffffffLL;
3662
3663 initialize(&semIEEEquad);
3664 assert(partCount()==2);
3665
3666 sign = static_cast<unsigned int>(i2>>63);
3667 if (myexponent==0 &&
3668 (mysignificand==0 && mysignificand2==0)) {
3669 makeZero(sign);
3670 } else if (myexponent==0x7fff &&
3671 (mysignificand==0 && mysignificand2==0)) {
3672 makeInf(sign);
3673 } else if (myexponent==0x7fff &&
3674 (mysignificand!=0 || mysignificand2 !=0)) {
3675 category = fcNaN;
3676 exponent = exponentNaN();
3677 significandParts()[0] = mysignificand;
3678 significandParts()[1] = mysignificand2;
3679 } else {
3680 category = fcNormal;
3681 exponent = myexponent - 16383;
3682 significandParts()[0] = mysignificand;
3683 significandParts()[1] = mysignificand2;
3684 if (myexponent==0) // denormal
3685 exponent = -16382;
3686 else
3687 significandParts()[1] |= 0x1000000000000LL; // integer bit
3688 }
3689}
3690
3691void IEEEFloat::initFromDoubleAPInt(const APInt &api) {
3692 uint64_t i = *api.getRawData();
3693 uint64_t myexponent = (i >> 52) & 0x7ff;
3694 uint64_t mysignificand = i & 0xfffffffffffffLL;
3695
3696 initialize(&semIEEEdouble);
3697 assert(partCount()==1);
3698
3699 sign = static_cast<unsigned int>(i>>63);
3700 if (myexponent==0 && mysignificand==0) {
3701 makeZero(sign);
3702 } else if (myexponent==0x7ff && mysignificand==0) {
3703 makeInf(sign);
3704 } else if (myexponent==0x7ff && mysignificand!=0) {
3705 category = fcNaN;
3706 exponent = exponentNaN();
3707 *significandParts() = mysignificand;
3708 } else {
3709 category = fcNormal;
3710 exponent = myexponent - 1023;
3711 *significandParts() = mysignificand;
3712 if (myexponent==0) // denormal
3713 exponent = -1022;
3714 else
3715 *significandParts() |= 0x10000000000000LL; // integer bit
3716 }
3717}
3718
3719void IEEEFloat::initFromFloatAPInt(const APInt &api) {
3720 uint32_t i = (uint32_t)*api.getRawData();
3721 uint32_t myexponent = (i >> 23) & 0xff;
3722 uint32_t mysignificand = i & 0x7fffff;
3723
3724 initialize(&semIEEEsingle);
3725 assert(partCount()==1);
3726
3727 sign = i >> 31;
3728 if (myexponent==0 && mysignificand==0) {
3729 makeZero(sign);
3730 } else if (myexponent==0xff && mysignificand==0) {
3731 makeInf(sign);
3732 } else if (myexponent==0xff && mysignificand!=0) {
3733 category = fcNaN;
3734 exponent = exponentNaN();
3735 *significandParts() = mysignificand;
3736 } else {
3737 category = fcNormal;
3738 exponent = myexponent - 127; //bias
3739 *significandParts() = mysignificand;
3740 if (myexponent==0) // denormal
3741 exponent = -126;
3742 else
3743 *significandParts() |= 0x800000; // integer bit
3744 }
3745}
3746
3747void IEEEFloat::initFromBFloatAPInt(const APInt &api) {
3748 uint32_t i = (uint32_t)*api.getRawData();
3749 uint32_t myexponent = (i >> 7) & 0xff;
3750 uint32_t mysignificand = i & 0x7f;
3751
3752 initialize(&semBFloat);
3753 assert(partCount() == 1);
3754
3755 sign = i >> 15;
3756 if (myexponent == 0 && mysignificand == 0) {
3757 makeZero(sign);
3758 } else if (myexponent == 0xff && mysignificand == 0) {
3759 makeInf(sign);
3760 } else if (myexponent == 0xff && mysignificand != 0) {
3761 category = fcNaN;
3762 exponent = exponentNaN();
3763 *significandParts() = mysignificand;
3764 } else {
3765 category = fcNormal;
3766 exponent = myexponent - 127; // bias
3767 *significandParts() = mysignificand;
3768 if (myexponent == 0) // denormal
3769 exponent = -126;
3770 else
3771 *significandParts() |= 0x80; // integer bit
3772 }
3773}
3774
3775void IEEEFloat::initFromHalfAPInt(const APInt &api) {
3776 uint32_t i = (uint32_t)*api.getRawData();
3777 uint32_t myexponent = (i >> 10) & 0x1f;
3778 uint32_t mysignificand = i & 0x3ff;
3779
3780 initialize(&semIEEEhalf);
3781 assert(partCount()==1);
3782
3783 sign = i >> 15;
3784 if (myexponent==0 && mysignificand==0) {
3785 makeZero(sign);
3786 } else if (myexponent==0x1f && mysignificand==0) {
3787 makeInf(sign);
3788 } else if (myexponent==0x1f && mysignificand!=0) {
3789 category = fcNaN;
3790 exponent = exponentNaN();
3791 *significandParts() = mysignificand;
3792 } else {
3793 category = fcNormal;
3794 exponent = myexponent - 15; //bias
3795 *significandParts() = mysignificand;
3796 if (myexponent==0) // denormal
3797 exponent = -14;
3798 else
3799 *significandParts() |= 0x400; // integer bit
3800 }
3801}
3802
3803void IEEEFloat::initFromFloat8E5M2APInt(const APInt &api) {
3804 uint32_t i = (uint32_t)*api.getRawData();
3805 uint32_t myexponent = (i >> 2) & 0x1f;
3806 uint32_t mysignificand = i & 0x3;
3807
3808 initialize(&semFloat8E5M2);
3809 assert(partCount() == 1);
3810
3811 sign = i >> 7;
3812 if (myexponent == 0 && mysignificand == 0) {
3813 makeZero(sign);
3814 } else if (myexponent == 0x1f && mysignificand == 0) {
3815 makeInf(sign);
3816 } else if (myexponent == 0x1f && mysignificand != 0) {
3817 category = fcNaN;
3818 exponent = exponentNaN();
3819 *significandParts() = mysignificand;
3820 } else {
3821 category = fcNormal;
3822 exponent = myexponent - 15; // bias
3823 *significandParts() = mysignificand;
3824 if (myexponent == 0) // denormal
3825 exponent = -14;
3826 else
3827 *significandParts() |= 0x4; // integer bit
3828 }
3829}
3830
3831void IEEEFloat::initFromFloat8E4M3FNAPInt(const APInt &api) {
3832 uint32_t i = (uint32_t)*api.getRawData();
3833 uint32_t myexponent = (i >> 3) & 0xf;
3834 uint32_t mysignificand = i & 0x7;
3835
3836 initialize(&semFloat8E4M3FN);
3837 assert(partCount() == 1);
3838
3839 sign = i >> 7;
3840 if (myexponent == 0 && mysignificand == 0) {
3841 makeZero(sign);
3842 } else if (myexponent == 0xf && mysignificand == 7) {
3843 category = fcNaN;
3844 exponent = exponentNaN();
3845 *significandParts() = mysignificand;
3846 } else {
3847 category = fcNormal;
3848 exponent = myexponent - 7; // bias
3849 *significandParts() = mysignificand;
3850 if (myexponent == 0) // denormal
3851 exponent = -6;
3852 else
3853 *significandParts() |= 0x8; // integer bit
3854 }
3855}
3856
3857/// Treat api as containing the bits of a floating point number.
3858void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) {
3859 assert(api.getBitWidth() == Sem->sizeInBits);
3860 if (Sem == &semIEEEhalf)
3861 return initFromHalfAPInt(api);
3862 if (Sem == &semBFloat)
3863 return initFromBFloatAPInt(api);
3864 if (Sem == &semIEEEsingle)
3865 return initFromFloatAPInt(api);
3866 if (Sem == &semIEEEdouble)
3867 return initFromDoubleAPInt(api);
3868 if (Sem == &semX87DoubleExtended)
3869 return initFromF80LongDoubleAPInt(api);
3870 if (Sem == &semIEEEquad)
3871 return initFromQuadrupleAPInt(api);
3872 if (Sem == &semPPCDoubleDoubleLegacy)
3873 return initFromPPCDoubleDoubleAPInt(api);
3874 if (Sem == &semFloat8E5M2)
3875 return initFromFloat8E5M2APInt(api);
3876 if (Sem == &semFloat8E4M3FN)
3877 return initFromFloat8E4M3FNAPInt(api);
3878
3879 llvm_unreachable(nullptr);
3880}
3881
3882/// Make this number the largest magnitude normal number in the given
3883/// semantics.
3884void IEEEFloat::makeLargest(bool Negative) {
3885 // We want (in interchange format):
3886 // sign = {Negative}
3887 // exponent = 1..10
3888 // significand = 1..1
3889 category = fcNormal;
3890 sign = Negative;
3891 exponent = semantics->maxExponent;
3892
3893 // Use memset to set all but the highest integerPart to all ones.
3894 integerPart *significand = significandParts();
3895 unsigned PartCount = partCount();
3896 memset(significand, 0xFF, sizeof(integerPart)*(PartCount - 1));
3897
3898 // Set the high integerPart especially setting all unused top bits for
3899 // internal consistency.
3900 const unsigned NumUnusedHighBits =
3901 PartCount*integerPartWidth - semantics->precision;
3902 significand[PartCount - 1] = (NumUnusedHighBits < integerPartWidth)
3903 ? (~integerPart(0) >> NumUnusedHighBits)
3904 : 0;
3905
3907 significand[0] &= ~integerPart(1);
3908}
3909
3910/// Make this number the smallest magnitude denormal number in the given
3911/// semantics.
3912void IEEEFloat::makeSmallest(bool Negative) {
3913 // We want (in interchange format):
3914 // sign = {Negative}
3915 // exponent = 0..0
3916 // significand = 0..01
3917 category = fcNormal;
3918 sign = Negative;
3919 exponent = semantics->minExponent;
3920 APInt::tcSet(significandParts(), 1, partCount());
3921}
3922
3924 // We want (in interchange format):
3925 // sign = {Negative}
3926 // exponent = 0..0
3927 // significand = 10..0
3928
3929 category = fcNormal;
3930 zeroSignificand();
3931 sign = Negative;
3932 exponent = semantics->minExponent;
3933 APInt::tcSetBit(significandParts(), semantics->precision - 1);
3934}
3935
3936IEEEFloat::IEEEFloat(const fltSemantics &Sem, const APInt &API) {
3937 initFromAPInt(&Sem, API);
3938}
3939
3941 initFromAPInt(&semIEEEsingle, APInt::floatToBits(f));
3942}
3943
3945 initFromAPInt(&semIEEEdouble, APInt::doubleToBits(d));
3946}
3947
3948namespace {
3949 void append(SmallVectorImpl<char> &Buffer, StringRef Str) {
3950 Buffer.append(Str.begin(), Str.end());
3951 }
3952
3953 /// Removes data from the given significand until it is no more
3954 /// precise than is required for the desired precision.
3955 void AdjustToPrecision(APInt &significand,
3956 int &exp, unsigned FormatPrecision) {
3957 unsigned bits = significand.getActiveBits();
3958
3959 // 196/59 is a very slight overestimate of lg_2(10).
3960 unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59;
3961
3962 if (bits <= bitsRequired) return;
3963
3964 unsigned tensRemovable = (bits - bitsRequired) * 59 / 196;
3965 if (!tensRemovable) return;
3966
3967 exp += tensRemovable;
3968
3969 APInt divisor(significand.getBitWidth(), 1);
3970 APInt powten(significand.getBitWidth(), 10);
3971 while (true) {
3972 if (tensRemovable & 1)
3973 divisor *= powten;
3974 tensRemovable >>= 1;
3975 if (!tensRemovable) break;
3976 powten *= powten;
3977 }
3978
3979 significand = significand.udiv(divisor);
3980
3981 // Truncate the significand down to its active bit count.
3982 significand = significand.trunc(significand.getActiveBits());
3983 }
3984
3985
3986 void AdjustToPrecision(SmallVectorImpl<char> &buffer,
3987 int &exp, unsigned FormatPrecision) {
3988 unsigned N = buffer.size();
3989 if (N <= FormatPrecision) return;
3990
3991 // The most significant figures are the last ones in the buffer.
3992 unsigned FirstSignificant = N - FormatPrecision;
3993
3994 // Round.
3995 // FIXME: this probably shouldn't use 'round half up'.
3996
3997 // Rounding down is just a truncation, except we also want to drop
3998 // trailing zeros from the new result.
3999 if (buffer[FirstSignificant - 1] < '5') {
4000 while (FirstSignificant < N && buffer[FirstSignificant] == '0')
4001 FirstSignificant++;
4002
4003 exp += FirstSignificant;
4004 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
4005 return;
4006 }
4007
4008 // Rounding up requires a decimal add-with-carry. If we continue
4009 // the carry, the newly-introduced zeros will just be truncated.
4010 for (unsigned I = FirstSignificant; I != N; ++I) {
4011 if (buffer[I] == '9') {
4012 FirstSignificant++;
4013 } else {
4014 buffer[I]++;
4015 break;
4016 }
4017 }
4018
4019 // If we carried through, we have exactly one digit of precision.
4020 if (FirstSignificant == N) {
4021 exp += FirstSignificant;
4022 buffer.clear();
4023 buffer.push_back('1');
4024 return;
4025 }
4026
4027 exp += FirstSignificant;
4028 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
4029 }
4030} // namespace
4031
4032void IEEEFloat::toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision,
4033 unsigned FormatMaxPadding, bool TruncateZero) const {
4034 switch (category) {
4035 case fcInfinity:
4036 if (isNegative())
4037 return append(Str, "-Inf");
4038 else
4039 return append(Str, "+Inf");
4040
4041 case fcNaN: return append(Str, "NaN");
4042
4043 case fcZero:
4044 if (isNegative())
4045 Str.push_back('-');
4046
4047 if (!FormatMaxPadding) {
4048 if (TruncateZero)
4049 append(Str, "0.0E+0");
4050 else {
4051 append(Str, "0.0");
4052 if (FormatPrecision > 1)
4053 Str.append(FormatPrecision - 1, '0');
4054 append(Str, "e+00");
4055 }
4056 } else
4057 Str.push_back('0');
4058 return;
4059
4060 case fcNormal:
4061 break;
4062 }
4063
4064 if (isNegative())
4065 Str.push_back('-');
4066
4067 // Decompose the number into an APInt and an exponent.
4068 int exp = exponent - ((int) semantics->precision - 1);
4069 APInt significand(
4070 semantics->precision,
4071 ArrayRef(significandParts(), partCountForBits(semantics->precision)));
4072
4073 // Set FormatPrecision if zero. We want to do this before we
4074 // truncate trailing zeros, as those are part of the precision.
4075 if (!FormatPrecision) {
4076 // We use enough digits so the number can be round-tripped back to an
4077 // APFloat. The formula comes from "How to Print Floating-Point Numbers
4078 // Accurately" by Steele and White.
4079 // FIXME: Using a formula based purely on the precision is conservative;
4080 // we can print fewer digits depending on the actual value being printed.
4081
4082 // FormatPrecision = 2 + floor(significandBits / lg_2(10))
4083 FormatPrecision = 2 + semantics->precision * 59 / 196;
4084 }
4085
4086 // Ignore trailing binary zeros.
4087 int trailingZeros = significand.countTrailingZeros();
4088 exp += trailingZeros;
4089 significand.lshrInPlace(trailingZeros);
4090
4091 // Change the exponent from 2^e to 10^e.
4092 if (exp == 0) {
4093 // Nothing to do.
4094 } else if (exp > 0) {
4095 // Just shift left.
4096 significand = significand.zext(semantics->precision + exp);
4097 significand <<= exp;
4098 exp = 0;
4099 } else { /* exp < 0 */
4100 int texp = -exp;
4101
4102 // We transform this using the identity:
4103 // (N)(2^-e) == (N)(5^e)(10^-e)
4104 // This means we have to multiply N (the significand) by 5^e.
4105 // To avoid overflow, we have to operate on numbers large
4106 // enough to store N * 5^e:
4107 // log2(N * 5^e) == log2(N) + e * log2(5)
4108 // <= semantics->precision + e * 137 / 59
4109 // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59)
4110
4111 unsigned precision = semantics->precision + (137 * texp + 136) / 59;
4112
4113 // Multiply significand by 5^e.
4114 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
4115 significand = significand.zext(precision);
4116 APInt five_to_the_i(precision, 5);
4117 while (true) {
4118 if (texp & 1) significand *= five_to_the_i;
4119
4120 texp >>= 1;
4121 if (!texp) break;
4122 five_to_the_i *= five_to_the_i;
4123 }
4124 }
4125
4126 AdjustToPrecision(significand, exp, FormatPrecision);
4127
4129
4130 // Fill the buffer.
4131 unsigned precision = significand.getBitWidth();
4132 if (precision < 4) {
4133 // We need enough precision to store the value 10.
4134 precision = 4;
4135 significand = significand.zext(precision);
4136 }
4137 APInt ten(precision, 10);
4138 APInt digit(precision, 0);
4139
4140 bool inTrail = true;
4141 while (significand != 0) {
4142 // digit <- significand % 10
4143 // significand <- significand / 10
4144 APInt::udivrem(significand, ten, significand, digit);
4145
4146 unsigned d = digit.getZExtValue();
4147
4148 // Drop trailing zeros.
4149 if (inTrail && !d) exp++;
4150 else {
4151 buffer.push_back((char) ('0' + d));
4152 inTrail = false;
4153 }
4154 }
4155
4156 assert(!buffer.empty() && "no characters in buffer!");
4157
4158 // Drop down to FormatPrecision.
4159 // TODO: don't do more precise calculations above than are required.
4160 AdjustToPrecision(buffer, exp, FormatPrecision);
4161
4162 unsigned NDigits = buffer.size();
4163
4164 // Check whether we should use scientific notation.
4165 bool FormatScientific;
4166 if (!FormatMaxPadding)
4167 FormatScientific = true;
4168 else {
4169 if (exp >= 0) {
4170 // 765e3 --> 765000
4171 // ^^^
4172 // But we shouldn't make the number look more precise than it is.
4173 FormatScientific = ((unsigned) exp > FormatMaxPadding ||
4174 NDigits + (unsigned) exp > FormatPrecision);
4175 } else {
4176 // Power of the most significant digit.
4177 int MSD = exp + (int) (NDigits - 1);
4178 if (MSD >= 0) {
4179 // 765e-2 == 7.65
4180 FormatScientific = false;
4181 } else {
4182 // 765e-5 == 0.00765
4183 // ^ ^^
4184 FormatScientific = ((unsigned) -MSD) > FormatMaxPadding;
4185 }
4186 }
4187 }
4188
4189 // Scientific formatting is pretty straightforward.
4190 if (FormatScientific) {
4191 exp += (NDigits - 1);
4192
4193 Str.push_back(buffer[NDigits-1]);
4194 Str.push_back('.');
4195 if (NDigits == 1 && TruncateZero)
4196 Str.push_back('0');
4197 else
4198 for (unsigned I = 1; I != NDigits; ++I)
4199 Str.push_back(buffer[NDigits-1-I]);
4200 // Fill with zeros up to FormatPrecision.
4201 if (!TruncateZero && FormatPrecision > NDigits - 1)
4202 Str.append(FormatPrecision - NDigits + 1, '0');
4203 // For !TruncateZero we use lower 'e'.
4204 Str.push_back(TruncateZero ? 'E' : 'e');
4205
4206 Str.push_back(exp >= 0 ? '+' : '-');
4207 if (exp < 0) exp = -exp;
4208 SmallVector<char, 6> expbuf;
4209 do {
4210 expbuf.push_back((char) ('0' + (exp % 10)));
4211 exp /= 10;
4212 } while (exp);
4213 // Exponent always at least two digits if we do not truncate zeros.
4214 if (!TruncateZero && expbuf.size() < 2)
4215 expbuf.push_back('0');
4216 for (unsigned I = 0, E = expbuf.size(); I != E; ++I)
4217 Str.push_back(expbuf[E-1-I]);
4218 return;
4219 }
4220
4221 // Non-scientific, positive exponents.
4222 if (exp >= 0) {
4223 for (unsigned I = 0; I != NDigits; ++I)
4224 Str.push_back(buffer[NDigits-1-I]);
4225 for (unsigned I = 0; I != (unsigned) exp; ++I)
4226 Str.push_back('0');
4227 return;
4228 }
4229
4230 // Non-scientific, negative exponents.
4231
4232 // The number of digits to the left of the decimal point.
4233 int NWholeDigits = exp + (int) NDigits;
4234
4235 unsigned I = 0;
4236 if (NWholeDigits > 0) {
4237 for (; I != (unsigned) NWholeDigits; ++I)
4238 Str.push_back(buffer[NDigits-I-1]);
4239 Str.push_back('.');
4240 } else {
4241 unsigned NZeros = 1 + (unsigned) -NWholeDigits;
4242
4243 Str.push_back('0');
4244 Str.push_back('.');
4245 for (unsigned Z = 1; Z != NZeros; ++Z)
4246 Str.push_back('0');
4247 }
4248
4249 for (; I != NDigits; ++I)
4250 Str.push_back(buffer[NDigits-I-1]);
4251}
4252
4254 // Special floats and denormals have no exact inverse.
4255 if (!isFiniteNonZero())
4256 return false;
4257
4258 // Check that the number is a power of two by making sure that only the
4259 // integer bit is set in the significand.
4260 if (significandLSB() != semantics->precision - 1)
4261 return false;
4262
4263 // Get the inverse.
4264 IEEEFloat reciprocal(*semantics, 1ULL);
4265 if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK)
4266 return false;
4267
4268 // Avoid multiplication with a denormal, it is not safe on all platforms and
4269 // may be slower than a normal division.
4270 if (reciprocal.isDenormal())
4271 return false;
4272
4273 assert(reciprocal.isFiniteNonZero() &&
4274 reciprocal.significandLSB() == reciprocal.semantics->precision - 1);
4275
4276 if (inv)
4277 *inv = APFloat(reciprocal, *semantics);
4278
4279 return true;
4280}
4281
4283 if (!isNaN())
4284 return false;
4286 return false;
4287
4288 // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the
4289 // first bit of the trailing significand being 0.
4290 return !APInt::tcExtractBit(significandParts(), semantics->precision - 2);
4291}
4292
4293/// IEEE-754R 2008 5.3.1: nextUp/nextDown.
4294///
4295/// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with
4296/// appropriate sign switching before/after the computation.
4298 // If we are performing nextDown, swap sign so we have -x.
4299 if (nextDown)
4300 changeSign();
4301
4302 // Compute nextUp(x)
4303 opStatus result = opOK;
4304
4305 // Handle each float category separately.
4306 switch (category) {
4307 case fcInfinity:
4308 // nextUp(+inf) = +inf
4309 if (!isNegative())
4310 break;
4311 // nextUp(-inf) = -getLargest()
4312 makeLargest(true);
4313 break;
4314 case fcNaN:
4315 // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag.
4316 // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not
4317 // change the payload.
4318 if (isSignaling()) {
4319 result = opInvalidOp;
4320 // For consistency, propagate the sign of the sNaN to the qNaN.
4321 makeNaN(false, isNegative(), nullptr);
4322 }
4323 break;
4324 case fcZero:
4325 // nextUp(pm 0) = +getSmallest()
4326 makeSmallest(false);
4327 break;
4328 case fcNormal:
4329 // nextUp(-getSmallest()) = -0
4330 if (isSmallest() && isNegative()) {
4331 APInt::tcSet(significandParts(), 0, partCount());
4332 category = fcZero;
4333 exponent = 0;
4334 break;
4335 }
4336
4337 if (isLargest() && !isNegative()) {
4339 // nextUp(getLargest()) == NAN
4340 makeNaN();
4341 break;
4342 } else {
4343 // nextUp(getLargest()) == INFINITY
4344 APInt::tcSet(significandParts(), 0, partCount());
4345 category = fcInfinity;
4346 exponent = semantics->maxExponent + 1;
4347 break;
4348 }
4349 }
4350
4351 // nextUp(normal) == normal + inc.
4352 if (isNegative()) {
4353 // If we are negative, we need to decrement the significand.
4354
4355 // We only cross a binade boundary that requires adjusting the exponent
4356 // if:
4357 // 1. exponent != semantics->minExponent. This implies we are not in the
4358 // smallest binade or are dealing with denormals.
4359 // 2. Our significand excluding the integral bit is all zeros.
4360 bool WillCrossBinadeBoundary =
4361 exponent != semantics->minExponent && isSignificandAllZeros();
4362
4363 // Decrement the significand.
4364 //
4365 // We always do this since:
4366 // 1. If we are dealing with a non-binade decrement, by definition we
4367 // just decrement the significand.
4368 // 2. If we are dealing with a normal -> normal binade decrement, since
4369 // we have an explicit integral bit the fact that all bits but the
4370 // integral bit are zero implies that subtracting one will yield a
4371 // significand with 0 integral bit and 1 in all other spots. Thus we
4372 // must just adjust the exponent and set the integral bit to 1.
4373 // 3. If we are dealing with a normal -> denormal binade decrement,
4374 // since we set the integral bit to 0 when we represent denormals, we
4375 // just decrement the significand.
4376 integerPart *Parts = significandParts();
4377 APInt::tcDecrement(Parts, partCount());
4378
4379 if (WillCrossBinadeBoundary) {
4380 // Our result is a normal number. Do the following:
4381 // 1. Set the integral bit to 1.
4382 // 2. Decrement the exponent.
4383 APInt::tcSetBit(Parts, semantics->precision - 1);
4384 exponent--;
4385 }
4386 } else {
4387 // If we are positive, we need to increment the significand.
4388
4389 // We only cross a binade boundary that requires adjusting the exponent if
4390 // the input is not a denormal and all of said input's significand bits
4391 // are set. If all of said conditions are true: clear the significand, set
4392 // the integral bit to 1, and increment the exponent. If we have a
4393 // denormal always increment since moving denormals and the numbers in the
4394 // smallest normal binade have the same exponent in our representation.
4395 bool WillCrossBinadeBoundary = !isDenormal() && isSignificandAllOnes();
4396
4397 if (WillCrossBinadeBoundary) {
4398 integerPart *Parts = significandParts();
4399 APInt::tcSet(Parts, 0, partCount());
4400 APInt::tcSetBit(Parts, semantics->precision - 1);
4401 assert(exponent != semantics->maxExponent &&
4402 "We can not increment an exponent beyond the maxExponent allowed"
4403 " by the given floating point semantics.");
4404 exponent++;
4405 } else {
4406 incrementSignificand();
4407 }
4408 }
4409 break;
4410 }
4411
4412 // If we are performing nextDown, swap sign so we have -nextUp(-x)
4413 if (nextDown)
4414 changeSign();
4415
4416 return result;
4417}
4418
4419APFloatBase::ExponentType IEEEFloat::exponentNaN() const {
4421 return semantics->maxExponent;
4422 return semantics->maxExponent + 1;
4423}
4424
4425APFloatBase::ExponentType IEEEFloat::exponentInf() const {
4426 return semantics->maxExponent + 1;
4427}
4428
4429APFloatBase::ExponentType IEEEFloat::exponentZero() const {
4430 return semantics->minExponent - 1;
4431}
4432
4433void IEEEFloat::makeInf(bool Negative) {
4435 // There is no Inf, so make NaN instead.
4436 makeNaN(false, Negative);
4437 return;
4438 }
4439 category = fcInfinity;
4440 sign = Negative;
4441 exponent = exponentInf();
4442 APInt::tcSet(significandParts(), 0, partCount());
4443}
4444
4445void IEEEFloat::makeZero(bool Negative) {
4446 category = fcZero;
4447 sign = Negative;
4448 exponent = exponentZero();
4449 APInt::tcSet(significandParts(), 0, partCount());
4450}
4451
4453 assert(isNaN());
4455 APInt::tcSetBit(significandParts(), semantics->precision - 2);
4456}
4457
4458int ilogb(const IEEEFloat &Arg) {
4459 if (Arg.isNaN())
4460 return IEEEFloat::IEK_NaN;
4461 if (Arg.isZero())
4462 return IEEEFloat::IEK_Zero;
4463 if (Arg.isInfinity())
4464 return IEEEFloat::IEK_Inf;
4465 if (!Arg.isDenormal())
4466 return Arg.exponent;
4467
4468 IEEEFloat Normalized(Arg);
4469 int SignificandBits = Arg.getSemantics().precision - 1;
4470
4471 Normalized.exponent += SignificandBits;
4472 Normalized.normalize(IEEEFloat::rmNearestTiesToEven, lfExactlyZero);
4473 return Normalized.exponent - SignificandBits;
4474}
4475
4477 auto MaxExp = X.getSemantics().maxExponent;
4478 auto MinExp = X.getSemantics().minExponent;
4479
4480 // If Exp is wildly out-of-scale, simply adding it to X.exponent will
4481 // overflow; clamp it to a safe range before adding, but ensure that the range
4482 // is large enough that the clamp does not change the result. The range we
4483 // need to support is the difference between the largest possible exponent and
4484 // the normalized exponent of half the smallest denormal.
4485
4486 int SignificandBits = X.getSemantics().precision - 1;
4487 int MaxIncrement = MaxExp - (MinExp - SignificandBits) + 1;
4488
4489 // Clamp to one past the range ends to let normalize handle overlflow.
4490 X.exponent += std::clamp(Exp, -MaxIncrement - 1, MaxIncrement);
4491 X.normalize(RoundingMode, lfExactlyZero);
4492 if (X.isNaN())
4493 X.makeQuiet();
4494 return X;
4495}
4496
4498 Exp = ilogb(Val);
4499
4500 // Quiet signalling nans.
4501 if (Exp == IEEEFloat::IEK_NaN) {
4502 IEEEFloat Quiet(Val);
4503 Quiet.makeQuiet();
4504 return Quiet;
4505 }
4506
4507 if (Exp == IEEEFloat::IEK_Inf)
4508 return Val;
4509
4510 // 1 is added because frexp is defined to return a normalized fraction in
4511 // +/-[0.5, 1.0), rather than the usual +/-[1.0, 2.0).
4512 Exp = Exp == IEEEFloat::IEK_Zero ? 0 : Exp + 1;
4513 return scalbn(Val, -Exp, RM);
4514}
4515
4517 : Semantics(&S),
4519 assert(Semantics == &semPPCDoubleDouble);
4520}
4521
4523 : Semantics(&S),
4526 assert(Semantics == &semPPCDoubleDouble);
4527}
4528
4530 : Semantics(&S), Floats(new APFloat[2]{APFloat(semIEEEdouble, I),
4532 assert(Semantics == &semPPCDoubleDouble);
4533}
4534
4536 : Semantics(&S),
4537 Floats(new APFloat[2]{
4538 APFloat(semIEEEdouble, APInt(64, I.getRawData()[0])),
4539 APFloat(semIEEEdouble, APInt(64, I.getRawData()[1]))}) {
4540 assert(Semantics == &semPPCDoubleDouble);
4541}
4542
4544 APFloat &&Second)
4545 : Semantics(&S),
4546 Floats(new APFloat[2]{std::move(First), std::move(Second)}) {
4547 assert(Semantics == &semPPCDoubleDouble);
4548 assert(&Floats[0].getSemantics() == &semIEEEdouble);
4549 assert(&Floats[1].getSemantics() == &semIEEEdouble);
4550}
4551
4554 Floats(RHS.Floats ? new APFloat[2]{APFloat(RHS.Floats[0]),
4555 APFloat(RHS.Floats[1])}
4556 : nullptr) {
4557 assert(Semantics == &semPPCDoubleDouble);
4558}
4559
4561 : Semantics(RHS.Semantics), Floats(std::move(RHS.Floats)) {
4562 RHS.Semantics = &semBogus;
4564}
4565
4567 if (Semantics == RHS.Semantics && RHS.Floats) {
4568 Floats[0] = RHS.Floats[0];
4569 Floats[1] = RHS.Floats[1];
4570 } else if (this != &RHS) {
4571 this->~DoubleAPFloat();
4572 new (this) DoubleAPFloat(RHS);
4573 }
4574 return *this;
4575}
4576
4577// Implement addition, subtraction, multiplication and division based on:
4578// "Software for Doubled-Precision Floating-Point Computations",
4579// by Seppo Linnainmaa, ACM TOMS vol 7 no 3, September 1981, pages 272-283.
4580APFloat::opStatus DoubleAPFloat::addImpl(const APFloat &a, const APFloat &aa,
4581 const APFloat &c, const APFloat &cc,
4582 roundingMode RM) {
4583 int Status = opOK;
4584 APFloat z = a;
4585 Status |= z.add(c, RM);
4586 if (!z.isFinite()) {
4587 if (!z.isInfinity()) {
4588 Floats[0] = std::move(z);
4589 Floats[1].makeZero(/* Neg = */ false);
4590 return (opStatus)Status;
4591 }
4592 Status = opOK;
4593 auto AComparedToC = a.compareAbsoluteValue(c);
4594 z = cc;
4595 Status |= z.add(aa, RM);
4596 if (AComparedToC == APFloat::cmpGreaterThan) {
4597 // z = cc + aa + c + a;
4598 Status |= z.add(c, RM);
4599 Status |= z.add(a, RM);
4600 } else {
4601 // z = cc + aa + a + c;
4602 Status |= z.add(a, RM);
4603 Status |= z.add(c, RM);
4604 }
4605 if (!z.isFinite()) {
4606 Floats[0] = std::move(z);
4607 Floats[1].makeZero(/* Neg = */ false);
4608 return (opStatus)Status;
4609 }
4610 Floats[0] = z;
4611 APFloat zz = aa;
4612 Status |= zz.add(cc, RM);
4613 if (AComparedToC == APFloat::cmpGreaterThan) {
4614 // Floats[1] = a - z + c + zz;
4615 Floats[1] = a;
4616 Status |= Floats[1].subtract(z, RM);
4617 Status |= Floats[1].add(c, RM);
4618 Status |= Floats[1].add(zz, RM);
4619 } else {
4620 // Floats[1] = c - z + a + zz;
4621 Floats[1] = c;
4622 Status |= Floats[1].subtract(z, RM);
4623 Status |= Floats[1].add(a, RM);
4624 Status |= Floats[1].add(zz, RM);
4625 }
4626 } else {
4627 // q = a - z;
4628 APFloat q = a;
4629 Status |= q.subtract(z, RM);
4630
4631 // zz = q + c + (a - (q + z)) + aa + cc;
4632 // Compute a - (q + z) as -((q + z) - a) to avoid temporary copies.
4633 auto zz = q;
4634 Status |= zz.add(c, RM);
4635 Status |= q.add(z, RM);
4636 Status |= q.subtract(a, RM);
4637 q.changeSign();
4638 Status |= zz.add(q, RM);
4639 Status |= zz.add(aa, RM);
4640 Status |= zz.add(cc, RM);
4641 if (zz.isZero() && !zz.isNegative()) {
4642 Floats[0] = std::move(z);
4643 Floats[1].makeZero(/* Neg = */ false);
4644 return opOK;
4645 }
4646 Floats[0] = z;
4647 Status |= Floats[0].add(zz, RM);
4648 if (!Floats[0].isFinite()) {
4649 Floats[1].makeZero(/* Neg = */ false);
4650 return (opStatus)Status;
4651 }
4652 Floats[1] = std::move(z);
4653 Status |= Floats[1].subtract(Floats[0], RM);
4654 Status |= Floats[1].add(zz, RM);
4655 }
4656 return (opStatus)Status;
4657}
4658
4659APFloat::opStatus DoubleAPFloat::addWithSpecial(const DoubleAPFloat &LHS,
4660 const DoubleAPFloat &RHS,
4661 DoubleAPFloat &Out,
4662 roundingMode RM) {
4663 if (LHS.getCategory() == fcNaN) {
4664 Out = LHS;
4665 return opOK;
4666 }
4667 if (RHS.getCategory() == fcNaN) {
4668 Out = RHS;
4669 return opOK;
4670 }
4671 if (LHS.getCategory() == fcZero) {
4672 Out = RHS;
4673 return opOK;
4674 }
4675 if (RHS.getCategory() == fcZero) {
4676 Out = LHS;
4677 return opOK;
4678 }
4679 if (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcInfinity &&
4680 LHS.isNegative() != RHS.isNegative()) {
4681 Out.makeNaN(false, Out.isNegative(), nullptr);
4682 return opInvalidOp;
4683 }
4684 if (LHS.getCategory() == fcInfinity) {
4685 Out = LHS;
4686 return opOK;
4687 }
4688 if (RHS.getCategory() == fcInfinity) {
4689 Out = RHS;
4690 return opOK;
4691 }
4692 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal);
4693
4694 APFloat A(LHS.Floats[0]), AA(LHS.Floats[1]), C(RHS.Floats[0]),
4695 CC(RHS.Floats[1]);
4696 assert(&A.getSemantics() == &semIEEEdouble);
4697 assert(&AA.getSemantics() == &semIEEEdouble);
4698 assert(&C.getSemantics() == &semIEEEdouble);
4699 assert(&CC.getSemantics() == &semIEEEdouble);
4700 assert(&Out.Floats[0].getSemantics() == &semIEEEdouble);
4701 assert(&Out.Floats[1].getSemantics() == &semIEEEdouble);
4702 return Out.addImpl(A, AA, C, CC, RM);
4703}
4704
4706 roundingMode RM) {
4707 return addWithSpecial(*this, RHS, *this, RM);
4708}
4709
4711 roundingMode RM) {
4712 changeSign();
4713 auto Ret = add(RHS, RM);
4714 changeSign();
4715 return Ret;
4716}
4717
4720 const auto &LHS = *this;
4721 auto &Out = *this;
4722 /* Interesting observation: For special categories, finding the lowest
4723 common ancestor of the following layered graph gives the correct
4724 return category:
4725
4726 NaN
4727 / \
4728 Zero Inf
4729 \ /
4730 Normal
4731
4732 e.g. NaN * NaN = NaN
4733 Zero * Inf = NaN
4734 Normal * Zero = Zero
4735 Normal * Inf = Inf
4736 */
4737 if (LHS.getCategory() == fcNaN) {
4738 Out = LHS;
4739 return opOK;
4740 }
4741 if (RHS.getCategory() == fcNaN) {
4742 Out = RHS;
4743 return opOK;
4744 }
4745 if ((LHS.getCategory() == fcZero && RHS.getCategory() == fcInfinity) ||
4746 (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcZero)) {
4747 Out.makeNaN(false, false, nullptr);
4748 return opOK;
4749 }
4750 if (LHS.getCategory() == fcZero || LHS.getCategory() == fcInfinity) {
4751 Out = LHS;
4752 return opOK;
4753 }
4754 if (RHS.getCategory() == fcZero || RHS.getCategory() == fcInfinity) {
4755 Out = RHS;
4756 return opOK;
4757 }
4758 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal &&
4759 "Special cases not handled exhaustively");
4760
4761 int Status = opOK;
4762 APFloat A = Floats[0], B = Floats[1], C = RHS.Floats[0], D = RHS.Floats[1];
4763 // t = a * c
4764 APFloat T = A;
4765 Status |= T.multiply(C, RM);
4766 if (!T.isFiniteNonZero()) {
4767 Floats[0] = T;
4768 Floats[1].makeZero(/* Neg = */ false);
4769 return (opStatus)Status;
4770 }
4771
4772 // tau = fmsub(a, c, t), that is -fmadd(-a, c, t).
4773 APFloat Tau = A;
4774 T.changeSign();
4775 Status |= Tau.fusedMultiplyAdd(C, T, RM);
4776 T.changeSign();
4777 {
4778 // v = a * d
4779 APFloat V = A;
4780 Status |= V.multiply(D, RM);
4781 // w = b * c
4782 APFloat W = B;
4783 Status |= W.multiply(C, RM);
4784 Status |= V.add(W, RM);
4785 // tau += v + w
4786 Status |= Tau.add(V, RM);
4787 }
4788 // u = t + tau
4789 APFloat U = T;
4790 Status |= U.add(Tau, RM);
4791
4792 Floats[0] = U;
4793 if (!U.isFinite()) {
4794 Floats[1].makeZero(/* Neg = */ false);
4795 } else {
4796 // Floats[1] = (t - u) + tau
4797 Status |= T.subtract(U, RM);
4798 Status |= T.add(Tau, RM);
4799 Floats[1] = T;
4800 }
4801 return (opStatus)Status;
4802}
4803
4806 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4808 auto Ret =
4809 Tmp.divide(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()), RM);
4811 return Ret;
4812}
4813
4815 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4817 auto Ret =
4818 Tmp.remainder(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
4820 return Ret;
4821}
4822
4824 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4826 auto Ret = Tmp.mod(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
4828 return Ret;
4829}
4830
4833 const DoubleAPFloat &Addend,
4835 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4837 auto Ret = Tmp.fusedMultiplyAdd(
4841 return Ret;
4842}
4843
4845 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4847 auto Ret = Tmp.roundToIntegral(RM);
4849 return Ret;
4850}
4851
4853 Floats[0].changeSign();
4854 Floats[1].changeSign();
4855}
4856
4859 auto Result = Floats[0].compareAbsoluteValue(RHS.Floats[0]);
4860 if (Result != cmpEqual)
4861 return Result;
4862 Result = Floats[1].compareAbsoluteValue(RHS.Floats[1]);
4863 if (Result == cmpLessThan || Result == cmpGreaterThan) {
4864 auto Against = Floats[0].isNegative() ^ Floats[1].isNegative();
4865 auto RHSAgainst = RHS.Floats[0].isNegative() ^ RHS.Floats[1].isNegative();
4866 if (Against && !RHSAgainst)
4867 return cmpLessThan;
4868 if (!Against && RHSAgainst)
4869 return cmpGreaterThan;
4870 if (!Against && !RHSAgainst)
4871 return Result;
4872 if (Against && RHSAgainst)
4873 return (cmpResult)(cmpLessThan + cmpGreaterThan - Result);
4874 }
4875 return Result;
4876}
4877
4879 return Floats[0].getCategory();
4880}
4881
4882bool DoubleAPFloat::isNegative() const { return Floats[0].isNegative(); }
4883
4885 Floats[0].makeInf(Neg);
4886 Floats[1].makeZero(/* Neg = */ false);
4887}
4888
4890 Floats[0].makeZero(Neg);
4891 Floats[1].makeZero(/* Neg = */ false);
4892}
4893
4895 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4896 Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x7fefffffffffffffull));
4897 Floats[1] = APFloat(semIEEEdouble, APInt(64, 0x7c8ffffffffffffeull));
4898 if (Neg)
4899 changeSign();
4900}
4901
4903 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4904 Floats[0].makeSmallest(Neg);
4905 Floats[1].makeZero(/* Neg = */ false);
4906}
4907
4909 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4910 Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x0360000000000000ull));
4911 if (Neg)
4912 Floats[0].changeSign();
4913 Floats[1].makeZero(/* Neg = */ false);
4914}
4915
4916void DoubleAPFloat::makeNaN(bool SNaN, bool Neg, const APInt *fill) {
4917 Floats[0].makeNaN(SNaN, Neg, fill);
4918 Floats[1].makeZero(/* Neg = */ false);
4919}
4920
4922 auto Result = Floats[0].compare(RHS.Floats[0]);
4923 // |Float[0]| > |Float[1]|
4924 if (Result == APFloat::cmpEqual)
4925 return Floats[1].compare(RHS.Floats[1]);
4926 return Result;
4927}
4928
4930 return Floats[0].bitwiseIsEqual(RHS.Floats[0]) &&
4931 Floats[1].bitwiseIsEqual(RHS.Floats[1]);
4932}
4933
4935 if (Arg.Floats)
4936 return hash_combine(hash_value(Arg.Floats[0]), hash_value(Arg.Floats[1]));
4937 return hash_combine(Arg.Semantics);
4938}
4939
4941 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4942 uint64_t Data[] = {
4943 Floats[0].bitcastToAPInt().getRawData()[0],
4944 Floats[1].bitcastToAPInt().getRawData()[0],
4945 };
4946 return APInt(128, 2, Data);
4947}
4948
4950 roundingMode RM) {
4951 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4953 auto Ret = Tmp.convertFromString(S, RM);
4955 return Ret;
4956}
4957
4959 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4961 auto Ret = Tmp.next(nextDown);
4963 return Ret;
4964}
4965
4968 unsigned int Width, bool IsSigned,
4969 roundingMode RM, bool *IsExact) const {
4970 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4972 .convertToInteger(Input, Width, IsSigned, RM, IsExact);
4973}
4974
4976 bool IsSigned,
4977 roundingMode RM) {
4978 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4980 auto Ret = Tmp.convertFromAPInt(Input, IsSigned, RM);
4982 return Ret;
4983}
4984
4987 unsigned int InputSize,
4988 bool IsSigned, roundingMode RM) {
4989 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4991 auto Ret = Tmp.convertFromSignExtendedInteger(Input, InputSize, IsSigned, RM);
4993 return Ret;
4994}
4995
4998 unsigned int InputSize,
4999 bool IsSigned, roundingMode RM) {
5000 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5002 auto Ret = Tmp.convertFromZeroExtendedInteger(Input, InputSize, IsSigned, RM);
5004 return Ret;
5005}
5006
5008 unsigned int HexDigits,
5009 bool UpperCase,
5010 roundingMode RM) const {
5011 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5013 .convertToHexString(DST, HexDigits, UpperCase, RM);
5014}
5015
5017 return getCategory() == fcNormal &&
5018 (Floats[0].isDenormal() || Floats[1].isDenormal() ||
5019 // (double)(Hi + Lo) == Hi defines a normal number.
5020 Floats[0] != Floats[0] + Floats[1]);
5021}
5022
5024 if (getCategory() != fcNormal)
5025 return false;
5026 DoubleAPFloat Tmp(*this);
5027 Tmp.makeSmallest(this->isNegative());
5028 return Tmp.compare(*this) == cmpEqual;
5029}
5030
5032 if (getCategory() != fcNormal)
5033 return false;
5034
5035 DoubleAPFloat Tmp(*this);
5037 return Tmp.compare(*this) == cmpEqual;
5038}
5039
5041 if (getCategory() != fcNormal)
5042 return false;
5043 DoubleAPFloat Tmp(*this);
5044 Tmp.makeLargest(this->isNegative());
5045 return Tmp.compare(*this) == cmpEqual;
5046}
5047
5049 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5050 return Floats[0].isInteger() && Floats[1].isInteger();
5051}
5052
5054 unsigned FormatPrecision,
5055 unsigned FormatMaxPadding,
5056 bool TruncateZero) const {
5057 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5059 .toString(Str, FormatPrecision, FormatMaxPadding, TruncateZero);
5060}
5061
5063 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5065 if (!inv)
5066 return Tmp.getExactInverse(nullptr);
5068 auto Ret = Tmp.getExactInverse(&Inv);
5070 return Ret;
5071}
5072
5075 assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5076 return DoubleAPFloat(semPPCDoubleDouble, scalbn(Arg.Floats[0], Exp, RM),
5077 scalbn(Arg.Floats[1], Exp, RM));
5078}
5079
5082 assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5083 APFloat First = frexp(Arg.Floats[0], Exp, RM);
5084 APFloat Second = Arg.Floats[1];
5085 if (Arg.getCategory() == APFloat::fcNormal)
5086 Second = scalbn(Second, -Exp, RM);
5087 return DoubleAPFloat(semPPCDoubleDouble, std::move(First), std::move(Second));
5088}
5089
5090} // namespace detail
5091
5092APFloat::Storage::Storage(IEEEFloat F, const fltSemantics &Semantics) {
5093 if (usesLayout<IEEEFloat>(Semantics)) {
5094 new (&IEEE) IEEEFloat(std::move(F));
5095 return;
5096 }
5097 if (usesLayout<DoubleAPFloat>(Semantics)) {
5098 const fltSemantics& S = F.getSemantics();
5099 new (&Double)
5100 DoubleAPFloat(Semantics, APFloat(std::move(F), S),