LLVM 19.0.0git
APFloat.cpp
Go to the documentation of this file.
1//===-- APFloat.cpp - Implement APFloat class -----------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements a class to represent arbitrary precision floating
10// point values and provide a variety of arithmetic operations on them.
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/ADT/APFloat.h"
15#include "llvm/ADT/APSInt.h"
16#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/FoldingSet.h"
19#include "llvm/ADT/Hashing.h"
20#include "llvm/ADT/STLExtras.h"
22#include "llvm/ADT/StringRef.h"
23#include "llvm/Config/llvm-config.h"
24#include "llvm/Support/Debug.h"
25#include "llvm/Support/Error.h"
28#include <cstring>
29#include <limits.h>
30
31#define APFLOAT_DISPATCH_ON_SEMANTICS(METHOD_CALL) \
32 do { \
33 if (usesLayout<IEEEFloat>(getSemantics())) \
34 return U.IEEE.METHOD_CALL; \
35 if (usesLayout<DoubleAPFloat>(getSemantics())) \
36 return U.Double.METHOD_CALL; \
37 llvm_unreachable("Unexpected semantics"); \
38 } while (false)
39
40using namespace llvm;
41
42/// A macro used to combine two fcCategory enums into one key which can be used
43/// in a switch statement to classify how the interaction of two APFloat's
44/// categories affects an operation.
45///
46/// TODO: If clang source code is ever allowed to use constexpr in its own
47/// codebase, change this into a static inline function.
48#define PackCategoriesIntoKey(_lhs, _rhs) ((_lhs) * 4 + (_rhs))
49
50/* Assumed in hexadecimal significand parsing, and conversion to
51 hexadecimal strings. */
52static_assert(APFloatBase::integerPartWidth % 4 == 0, "Part width must be divisible by 4!");
53
54namespace llvm {
55
56// How the nonfinite values Inf and NaN are represented.
58 // Represents standard IEEE 754 behavior. A value is nonfinite if the
59 // exponent field is all 1s. In such cases, a value is Inf if the
60 // significand bits are all zero, and NaN otherwise
61 IEEE754,
62
63 // This behavior is present in the Float8ExMyFN* types (Float8E4M3FN,
64 // Float8E5M2FNUZ, Float8E4M3FNUZ, and Float8E4M3B11FNUZ). There is no
65 // representation for Inf, and operations that would ordinarily produce Inf
66 // produce NaN instead.
67 // The details of the NaN representation(s) in this form are determined by the
68 // `fltNanEncoding` enum. We treat all NaNs as quiet, as the available
69 // encodings do not distinguish between signalling and quiet NaN.
70 NanOnly,
71};
72
73// How NaN values are represented. This is curently only used in combination
74// with fltNonfiniteBehavior::NanOnly, and using a variant other than IEEE
75// while having IEEE non-finite behavior is liable to lead to unexpected
76// results.
77enum class fltNanEncoding {
78 // Represents the standard IEEE behavior where a value is NaN if its
79 // exponent is all 1s and the significand is non-zero.
80 IEEE,
81
82 // Represents the behavior in the Float8E4M3 floating point type where NaN is
83 // represented by having the exponent and mantissa set to all 1s.
84 // This behavior matches the FP8 E4M3 type described in
85 // https://arxiv.org/abs/2209.05433. We treat both signed and unsigned NaNs
86 // as non-signalling, although the paper does not state whether the NaN
87 // values are signalling or not.
88 AllOnes,
89
90 // Represents the behavior in Float8E{5,4}E{2,3}FNUZ floating point types
91 // where NaN is represented by a sign bit of 1 and all 0s in the exponent
92 // and mantissa (i.e. the negative zero encoding in a IEEE float). Since
93 // there is only one NaN value, it is treated as quiet NaN. This matches the
94 // behavior described in https://arxiv.org/abs/2206.02915 .
96};
97
98/* Represents floating point arithmetic semantics. */
100 /* The largest E such that 2^E is representable; this matches the
101 definition of IEEE 754. */
103
104 /* The smallest E such that 2^E is a normalized number; this
105 matches the definition of IEEE 754. */
107
108 /* Number of bits in the significand. This includes the integer
109 bit. */
110 unsigned int precision;
111
112 /* Number of bits actually used in the semantics. */
113 unsigned int sizeInBits;
114
116
118 // Returns true if any number described by this semantics can be precisely
119 // represented by the specified semantics. Does not take into account
120 // the value of fltNonfiniteBehavior.
121 bool isRepresentableBy(const fltSemantics &S) const {
122 return maxExponent <= S.maxExponent && minExponent >= S.minExponent &&
123 precision <= S.precision;
124 }
125};
126
127static constexpr fltSemantics semIEEEhalf = {15, -14, 11, 16};
128static constexpr fltSemantics semBFloat = {127, -126, 8, 16};
129static constexpr fltSemantics semIEEEsingle = {127, -126, 24, 32};
130static constexpr fltSemantics semIEEEdouble = {1023, -1022, 53, 64};
131static constexpr fltSemantics semIEEEquad = {16383, -16382, 113, 128};
132static constexpr fltSemantics semFloat8E5M2 = {15, -14, 3, 8};
133static constexpr fltSemantics semFloat8E5M2FNUZ = {
135static constexpr fltSemantics semFloat8E4M3FN = {
137static constexpr fltSemantics semFloat8E4M3FNUZ = {
141static constexpr fltSemantics semFloatTF32 = {127, -126, 11, 19};
142static constexpr fltSemantics semX87DoubleExtended = {16383, -16382, 64, 80};
143static constexpr fltSemantics semBogus = {0, 0, 0, 0};
144
145/* The IBM double-double semantics. Such a number consists of a pair of IEEE
146 64-bit doubles (Hi, Lo), where |Hi| > |Lo|, and if normal,
147 (double)(Hi + Lo) == Hi. The numeric value it's modeling is Hi + Lo.
148 Therefore it has two 53-bit mantissa parts that aren't necessarily adjacent
149 to each other, and two 11-bit exponents.
150
151 Note: we need to make the value different from semBogus as otherwise
152 an unsafe optimization may collapse both values to a single address,
153 and we heavily rely on them having distinct addresses. */
154static constexpr fltSemantics semPPCDoubleDouble = {-1, 0, 0, 128};
155
156/* These are legacy semantics for the fallback, inaccrurate implementation of
157 IBM double-double, if the accurate semPPCDoubleDouble doesn't handle the
158 operation. It's equivalent to having an IEEE number with consecutive 106
159 bits of mantissa and 11 bits of exponent.
160
161 It's not equivalent to IBM double-double. For example, a legit IBM
162 double-double, 1 + epsilon:
163
164 1 + epsilon = 1 + (1 >> 1076)
165
166 is not representable by a consecutive 106 bits of mantissa.
167
168 Currently, these semantics are used in the following way:
169
170 semPPCDoubleDouble -> (IEEEdouble, IEEEdouble) ->
171 (64-bit APInt, 64-bit APInt) -> (128-bit APInt) ->
172 semPPCDoubleDoubleLegacy -> IEEE operations
173
174 We use bitcastToAPInt() to get the bit representation (in APInt) of the
175 underlying IEEEdouble, then use the APInt constructor to construct the
176 legacy IEEE float.
177
178 TODO: Implement all operations in semPPCDoubleDouble, and delete these
179 semantics. */
180static constexpr fltSemantics semPPCDoubleDoubleLegacy = {1023, -1022 + 53,
181 53 + 53, 128};
182
184 switch (S) {
185 case S_IEEEhalf:
186 return IEEEhalf();
187 case S_BFloat:
188 return BFloat();
189 case S_IEEEsingle:
190 return IEEEsingle();
191 case S_IEEEdouble:
192 return IEEEdouble();
193 case S_IEEEquad:
194 return IEEEquad();
196 return PPCDoubleDouble();
197 case S_Float8E5M2:
198 return Float8E5M2();
199 case S_Float8E5M2FNUZ:
200 return Float8E5M2FNUZ();
201 case S_Float8E4M3FN:
202 return Float8E4M3FN();
203 case S_Float8E4M3FNUZ:
204 return Float8E4M3FNUZ();
206 return Float8E4M3B11FNUZ();
207 case S_FloatTF32:
208 return FloatTF32();
210 return x87DoubleExtended();
211 }
212 llvm_unreachable("Unrecognised floating semantics");
213}
214
217 if (&Sem == &llvm::APFloat::IEEEhalf())
218 return S_IEEEhalf;
219 else if (&Sem == &llvm::APFloat::BFloat())
220 return S_BFloat;
221 else if (&Sem == &llvm::APFloat::IEEEsingle())
222 return S_IEEEsingle;
223 else if (&Sem == &llvm::APFloat::IEEEdouble())
224 return S_IEEEdouble;
225 else if (&Sem == &llvm::APFloat::IEEEquad())
226 return S_IEEEquad;
227 else if (&Sem == &llvm::APFloat::PPCDoubleDouble())
228 return S_PPCDoubleDouble;
229 else if (&Sem == &llvm::APFloat::Float8E5M2())
230 return S_Float8E5M2;
231 else if (&Sem == &llvm::APFloat::Float8E5M2FNUZ())
232 return S_Float8E5M2FNUZ;
233 else if (&Sem == &llvm::APFloat::Float8E4M3FN())
234 return S_Float8E4M3FN;
235 else if (&Sem == &llvm::APFloat::Float8E4M3FNUZ())
236 return S_Float8E4M3FNUZ;
237 else if (&Sem == &llvm::APFloat::Float8E4M3B11FNUZ())
238 return S_Float8E4M3B11FNUZ;
239 else if (&Sem == &llvm::APFloat::FloatTF32())
240 return S_FloatTF32;
241 else if (&Sem == &llvm::APFloat::x87DoubleExtended())
242 return S_x87DoubleExtended;
243 else
244 llvm_unreachable("Unknown floating semantics");
245}
246
253 return semPPCDoubleDouble;
254}
261}
265}
267
273
274/* A tight upper bound on number of parts required to hold the value
275 pow(5, power) is
276
277 power * 815 / (351 * integerPartWidth) + 1
278
279 However, whilst the result may require only this many parts,
280 because we are multiplying two values to get it, the
281 multiplication may require an extra part with the excess part
282 being zero (consider the trivial case of 1 * 1, tcFullMultiply
283 requires two parts to hold the single-part result). So we add an
284 extra one to guarantee enough space whilst multiplying. */
285const unsigned int maxExponent = 16383;
286const unsigned int maxPrecision = 113;
288const unsigned int maxPowerOfFiveParts =
289 2 +
291
292unsigned int APFloatBase::semanticsPrecision(const fltSemantics &semantics) {
293 return semantics.precision;
294}
297 return semantics.maxExponent;
298}
301 return semantics.minExponent;
302}
303unsigned int APFloatBase::semanticsSizeInBits(const fltSemantics &semantics) {
304 return semantics.sizeInBits;
305}
307 bool isSigned) {
308 // The max FP value is pow(2, MaxExponent) * (1 + MaxFraction), so we need
309 // at least one more bit than the MaxExponent to hold the max FP value.
310 unsigned int MinBitWidth = semanticsMaxExponent(semantics) + 1;
311 // Extra sign bit needed.
312 if (isSigned)
313 ++MinBitWidth;
314 return MinBitWidth;
315}
316
318 const fltSemantics &Dst) {
319 // Exponent range must be larger.
320 if (Src.maxExponent >= Dst.maxExponent || Src.minExponent <= Dst.minExponent)
321 return false;
322
323 // If the mantissa is long enough, the result value could still be denormal
324 // with a larger exponent range.
325 //
326 // FIXME: This condition is probably not accurate but also shouldn't be a
327 // practical concern with existing types.
328 return Dst.precision >= Src.precision;
329}
330
332 return Sem.sizeInBits;
333}
334
335static constexpr APFloatBase::ExponentType
336exponentZero(const fltSemantics &semantics) {
337 return semantics.minExponent - 1;
338}
339
340static constexpr APFloatBase::ExponentType
341exponentInf(const fltSemantics &semantics) {
342 return semantics.maxExponent + 1;
343}
344
345static constexpr APFloatBase::ExponentType
346exponentNaN(const fltSemantics &semantics) {
349 return exponentZero(semantics);
350 return semantics.maxExponent;
351 }
352 return semantics.maxExponent + 1;
353}
354
355/* A bunch of private, handy routines. */
356
357static inline Error createError(const Twine &Err) {
358 return make_error<StringError>(Err, inconvertibleErrorCode());
359}
360
361static constexpr inline unsigned int partCountForBits(unsigned int bits) {
363}
364
365/* Returns 0U-9U. Return values >= 10U are not digits. */
366static inline unsigned int
367decDigitValue(unsigned int c)
368{
369 return c - '0';
370}
371
372/* Return the value of a decimal exponent of the form
373 [+-]ddddddd.
374
375 If the exponent overflows, returns a large exponent with the
376 appropriate sign. */
379 bool isNegative;
380 unsigned int absExponent;
381 const unsigned int overlargeExponent = 24000; /* FIXME. */
382 StringRef::iterator p = begin;
383
384 // Treat no exponent as 0 to match binutils
385 if (p == end || ((*p == '-' || *p == '+') && (p + 1) == end)) {
386 return 0;
387 }
388
389 isNegative = (*p == '-');
390 if (*p == '-' || *p == '+') {
391 p++;
392 if (p == end)
393 return createError("Exponent has no digits");
394 }
395
396 absExponent = decDigitValue(*p++);
397 if (absExponent >= 10U)
398 return createError("Invalid character in exponent");
399
400 for (; p != end; ++p) {
401 unsigned int value;
402
403 value = decDigitValue(*p);
404 if (value >= 10U)
405 return createError("Invalid character in exponent");
406
407 absExponent = absExponent * 10U + value;
408 if (absExponent >= overlargeExponent) {
409 absExponent = overlargeExponent;
410 break;
411 }
412 }
413
414 if (isNegative)
415 return -(int) absExponent;
416 else
417 return (int) absExponent;
418}
419
420/* This is ugly and needs cleaning up, but I don't immediately see
421 how whilst remaining safe. */
424 int exponentAdjustment) {
425 int unsignedExponent;
426 bool negative, overflow;
427 int exponent = 0;
428
429 if (p == end)
430 return createError("Exponent has no digits");
431
432 negative = *p == '-';
433 if (*p == '-' || *p == '+') {
434 p++;
435 if (p == end)
436 return createError("Exponent has no digits");
437 }
438
439 unsignedExponent = 0;
440 overflow = false;
441 for (; p != end; ++p) {
442 unsigned int value;
443
444 value = decDigitValue(*p);
445 if (value >= 10U)
446 return createError("Invalid character in exponent");
447
448 unsignedExponent = unsignedExponent * 10 + value;
449 if (unsignedExponent > 32767) {
450 overflow = true;
451 break;
452 }
453 }
454
455 if (exponentAdjustment > 32767 || exponentAdjustment < -32768)
456 overflow = true;
457
458 if (!overflow) {
459 exponent = unsignedExponent;
460 if (negative)
461 exponent = -exponent;
462 exponent += exponentAdjustment;
463 if (exponent > 32767 || exponent < -32768)
464 overflow = true;
465 }
466
467 if (overflow)
468 exponent = negative ? -32768: 32767;
469
470 return exponent;
471}
472
475 StringRef::iterator *dot) {
476 StringRef::iterator p = begin;
477 *dot = end;
478 while (p != end && *p == '0')
479 p++;
480
481 if (p != end && *p == '.') {
482 *dot = p++;
483
484 if (end - begin == 1)
485 return createError("Significand has no digits");
486
487 while (p != end && *p == '0')
488 p++;
489 }
490
491 return p;
492}
493
494/* Given a normal decimal floating point number of the form
495
496 dddd.dddd[eE][+-]ddd
497
498 where the decimal point and exponent are optional, fill out the
499 structure D. Exponent is appropriate if the significand is
500 treated as an integer, and normalizedExponent if the significand
501 is taken to have the decimal point after a single leading
502 non-zero digit.
503
504 If the value is zero, V->firstSigDigit points to a non-digit, and
505 the return exponent is zero.
506*/
508 const char *firstSigDigit;
509 const char *lastSigDigit;
512};
513
516 StringRef::iterator dot = end;
517
518 auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot);
519 if (!PtrOrErr)
520 return PtrOrErr.takeError();
521 StringRef::iterator p = *PtrOrErr;
522
523 D->firstSigDigit = p;
524 D->exponent = 0;
525 D->normalizedExponent = 0;
526
527 for (; p != end; ++p) {
528 if (*p == '.') {
529 if (dot != end)
530 return createError("String contains multiple dots");
531 dot = p++;
532 if (p == end)
533 break;
534 }
535 if (decDigitValue(*p) >= 10U)
536 break;
537 }
538
539 if (p != end) {
540 if (*p != 'e' && *p != 'E')
541 return createError("Invalid character in significand");
542 if (p == begin)
543 return createError("Significand has no digits");
544 if (dot != end && p - begin == 1)
545 return createError("Significand has no digits");
546
547 /* p points to the first non-digit in the string */
548 auto ExpOrErr = readExponent(p + 1, end);
549 if (!ExpOrErr)
550 return ExpOrErr.takeError();
551 D->exponent = *ExpOrErr;
552
553 /* Implied decimal point? */
554 if (dot == end)
555 dot = p;
556 }
557
558 /* If number is all zeroes accept any exponent. */
559 if (p != D->firstSigDigit) {
560 /* Drop insignificant trailing zeroes. */
561 if (p != begin) {
562 do
563 do
564 p--;
565 while (p != begin && *p == '0');
566 while (p != begin && *p == '.');
567 }
568
569 /* Adjust the exponents for any decimal point. */
570 D->exponent += static_cast<APFloat::ExponentType>((dot - p) - (dot > p));
571 D->normalizedExponent = (D->exponent +
572 static_cast<APFloat::ExponentType>((p - D->firstSigDigit)
573 - (dot > D->firstSigDigit && dot < p)));
574 }
575
576 D->lastSigDigit = p;
577 return Error::success();
578}
579
580/* Return the trailing fraction of a hexadecimal number.
581 DIGITVALUE is the first hex digit of the fraction, P points to
582 the next digit. */
585 unsigned int digitValue) {
586 unsigned int hexDigit;
587
588 /* If the first trailing digit isn't 0 or 8 we can work out the
589 fraction immediately. */
590 if (digitValue > 8)
591 return lfMoreThanHalf;
592 else if (digitValue < 8 && digitValue > 0)
593 return lfLessThanHalf;
594
595 // Otherwise we need to find the first non-zero digit.
596 while (p != end && (*p == '0' || *p == '.'))
597 p++;
598
599 if (p == end)
600 return createError("Invalid trailing hexadecimal fraction!");
601
602 hexDigit = hexDigitValue(*p);
603
604 /* If we ran off the end it is exactly zero or one-half, otherwise
605 a little more. */
606 if (hexDigit == UINT_MAX)
607 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
608 else
609 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
610}
611
612/* Return the fraction lost were a bignum truncated losing the least
613 significant BITS bits. */
614static lostFraction
616 unsigned int partCount,
617 unsigned int bits)
618{
619 unsigned int lsb;
620
621 lsb = APInt::tcLSB(parts, partCount);
622
623 /* Note this is guaranteed true if bits == 0, or LSB == UINT_MAX. */
624 if (bits <= lsb)
625 return lfExactlyZero;
626 if (bits == lsb + 1)
627 return lfExactlyHalf;
628 if (bits <= partCount * APFloatBase::integerPartWidth &&
629 APInt::tcExtractBit(parts, bits - 1))
630 return lfMoreThanHalf;
631
632 return lfLessThanHalf;
633}
634
635/* Shift DST right BITS bits noting lost fraction. */
636static lostFraction
637shiftRight(APFloatBase::integerPart *dst, unsigned int parts, unsigned int bits)
638{
639 lostFraction lost_fraction;
640
641 lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
642
643 APInt::tcShiftRight(dst, parts, bits);
644
645 return lost_fraction;
646}
647
648/* Combine the effect of two lost fractions. */
649static lostFraction
651 lostFraction lessSignificant)
652{
653 if (lessSignificant != lfExactlyZero) {
654 if (moreSignificant == lfExactlyZero)
655 moreSignificant = lfLessThanHalf;
656 else if (moreSignificant == lfExactlyHalf)
657 moreSignificant = lfMoreThanHalf;
658 }
659
660 return moreSignificant;
661}
662
663/* The error from the true value, in half-ulps, on multiplying two
664 floating point numbers, which differ from the value they
665 approximate by at most HUE1 and HUE2 half-ulps, is strictly less
666 than the returned value.
667
668 See "How to Read Floating Point Numbers Accurately" by William D
669 Clinger. */
670static unsigned int
671HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
672{
673 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
674
675 if (HUerr1 + HUerr2 == 0)
676 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */
677 else
678 return inexactMultiply + 2 * (HUerr1 + HUerr2);
679}
680
681/* The number of ulps from the boundary (zero, or half if ISNEAREST)
682 when the least significant BITS are truncated. BITS cannot be
683 zero. */
685ulpsFromBoundary(const APFloatBase::integerPart *parts, unsigned int bits,
686 bool isNearest) {
687 unsigned int count, partBits;
688 APFloatBase::integerPart part, boundary;
689
690 assert(bits != 0);
691
692 bits--;
694 partBits = bits % APFloatBase::integerPartWidth + 1;
695
696 part = parts[count] & (~(APFloatBase::integerPart) 0 >> (APFloatBase::integerPartWidth - partBits));
697
698 if (isNearest)
699 boundary = (APFloatBase::integerPart) 1 << (partBits - 1);
700 else
701 boundary = 0;
702
703 if (count == 0) {
704 if (part - boundary <= boundary - part)
705 return part - boundary;
706 else
707 return boundary - part;
708 }
709
710 if (part == boundary) {
711 while (--count)
712 if (parts[count])
713 return ~(APFloatBase::integerPart) 0; /* A lot. */
714
715 return parts[0];
716 } else if (part == boundary - 1) {
717 while (--count)
718 if (~parts[count])
719 return ~(APFloatBase::integerPart) 0; /* A lot. */
720
721 return -parts[0];
722 }
723
724 return ~(APFloatBase::integerPart) 0; /* A lot. */
725}
726
727/* Place pow(5, power) in DST, and return the number of parts used.
728 DST must be at least one part larger than size of the answer. */
729static unsigned int
730powerOf5(APFloatBase::integerPart *dst, unsigned int power) {
731 static const APFloatBase::integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125, 15625, 78125 };
733 pow5s[0] = 78125 * 5;
734
735 unsigned int partsCount[16] = { 1 };
736 APFloatBase::integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
737 unsigned int result;
738 assert(power <= maxExponent);
739
740 p1 = dst;
741 p2 = scratch;
742
743 *p1 = firstEightPowers[power & 7];
744 power >>= 3;
745
746 result = 1;
747 pow5 = pow5s;
748
749 for (unsigned int n = 0; power; power >>= 1, n++) {
750 unsigned int pc;
751
752 pc = partsCount[n];
753
754 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */
755 if (pc == 0) {
756 pc = partsCount[n - 1];
757 APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc);
758 pc *= 2;
759 if (pow5[pc - 1] == 0)
760 pc--;
761 partsCount[n] = pc;
762 }
763
764 if (power & 1) {
766
767 APInt::tcFullMultiply(p2, p1, pow5, result, pc);
768 result += pc;
769 if (p2[result - 1] == 0)
770 result--;
771
772 /* Now result is in p1 with partsCount parts and p2 is scratch
773 space. */
774 tmp = p1;
775 p1 = p2;
776 p2 = tmp;
777 }
778
779 pow5 += pc;
780 }
781
782 if (p1 != dst)
783 APInt::tcAssign(dst, p1, result);
784
785 return result;
786}
787
788/* Zero at the end to avoid modular arithmetic when adding one; used
789 when rounding up during hexadecimal output. */
790static const char hexDigitsLower[] = "0123456789abcdef0";
791static const char hexDigitsUpper[] = "0123456789ABCDEF0";
792static const char infinityL[] = "infinity";
793static const char infinityU[] = "INFINITY";
794static const char NaNL[] = "nan";
795static const char NaNU[] = "NAN";
796
797/* Write out an integerPart in hexadecimal, starting with the most
798 significant nibble. Write out exactly COUNT hexdigits, return
799 COUNT. */
800static unsigned int
801partAsHex (char *dst, APFloatBase::integerPart part, unsigned int count,
802 const char *hexDigitChars)
803{
804 unsigned int result = count;
805
807
808 part >>= (APFloatBase::integerPartWidth - 4 * count);
809 while (count--) {
810 dst[count] = hexDigitChars[part & 0xf];
811 part >>= 4;
812 }
813
814 return result;
815}
816
817/* Write out an unsigned decimal integer. */
818static char *
819writeUnsignedDecimal (char *dst, unsigned int n)
820{
821 char buff[40], *p;
822
823 p = buff;
824 do
825 *p++ = '0' + n % 10;
826 while (n /= 10);
827
828 do
829 *dst++ = *--p;
830 while (p != buff);
831
832 return dst;
833}
834
835/* Write out a signed decimal integer. */
836static char *
837writeSignedDecimal (char *dst, int value)
838{
839 if (value < 0) {
840 *dst++ = '-';
841 dst = writeUnsignedDecimal(dst, -(unsigned) value);
842 } else
843 dst = writeUnsignedDecimal(dst, value);
844
845 return dst;
846}
847
848namespace detail {
849/* Constructors. */
850void IEEEFloat::initialize(const fltSemantics *ourSemantics) {
851 unsigned int count;
852
853 semantics = ourSemantics;
854 count = partCount();
855 if (count > 1)
856 significand.parts = new integerPart[count];
857}
858
859void IEEEFloat::freeSignificand() {
860 if (needsCleanup())
861 delete [] significand.parts;
862}
863
864void IEEEFloat::assign(const IEEEFloat &rhs) {
865 assert(semantics == rhs.semantics);
866
867 sign = rhs.sign;
868 category = rhs.category;
869 exponent = rhs.exponent;
870 if (isFiniteNonZero() || category == fcNaN)
871 copySignificand(rhs);
872}
873
874void IEEEFloat::copySignificand(const IEEEFloat &rhs) {
875 assert(isFiniteNonZero() || category == fcNaN);
876 assert(rhs.partCount() >= partCount());
877
878 APInt::tcAssign(significandParts(), rhs.significandParts(),
879 partCount());
880}
881
882/* Make this number a NaN, with an arbitrary but deterministic value
883 for the significand. If double or longer, this is a signalling NaN,
884 which may not be ideal. If float, this is QNaN(0). */
885void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) {
886 category = fcNaN;
887 sign = Negative;
888 exponent = exponentNaN();
889
890 integerPart *significand = significandParts();
891 unsigned numParts = partCount();
892
893 APInt fill_storage;
895 // Finite-only types do not distinguish signalling and quiet NaN, so
896 // make them all signalling.
897 SNaN = false;
898 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
899 sign = true;
900 fill_storage = APInt::getZero(semantics->precision - 1);
901 } else {
902 fill_storage = APInt::getAllOnes(semantics->precision - 1);
903 }
904 fill = &fill_storage;
905 }
906
907 // Set the significand bits to the fill.
908 if (!fill || fill->getNumWords() < numParts)
909 APInt::tcSet(significand, 0, numParts);
910 if (fill) {
911 APInt::tcAssign(significand, fill->getRawData(),
912 std::min(fill->getNumWords(), numParts));
913
914 // Zero out the excess bits of the significand.
915 unsigned bitsToPreserve = semantics->precision - 1;
916 unsigned part = bitsToPreserve / 64;
917 bitsToPreserve %= 64;
918 significand[part] &= ((1ULL << bitsToPreserve) - 1);
919 for (part++; part != numParts; ++part)
920 significand[part] = 0;
921 }
922
923 unsigned QNaNBit = semantics->precision - 2;
924
925 if (SNaN) {
926 // We always have to clear the QNaN bit to make it an SNaN.
927 APInt::tcClearBit(significand, QNaNBit);
928
929 // If there are no bits set in the payload, we have to set
930 // *something* to make it a NaN instead of an infinity;
931 // conventionally, this is the next bit down from the QNaN bit.
932 if (APInt::tcIsZero(significand, numParts))
933 APInt::tcSetBit(significand, QNaNBit - 1);
934 } else if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
935 // The only NaN is a quiet NaN, and it has no bits sets in the significand.
936 // Do nothing.
937 } else {
938 // We always have to set the QNaN bit to make it a QNaN.
939 APInt::tcSetBit(significand, QNaNBit);
940 }
941
942 // For x87 extended precision, we want to make a NaN, not a
943 // pseudo-NaN. Maybe we should expose the ability to make
944 // pseudo-NaNs?
945 if (semantics == &semX87DoubleExtended)
946 APInt::tcSetBit(significand, QNaNBit + 1);
947}
948
950 if (this != &rhs) {
951 if (semantics != rhs.semantics) {
952 freeSignificand();
953 initialize(rhs.semantics);
954 }
955 assign(rhs);
956 }
957
958 return *this;
959}
960
962 freeSignificand();
963
964 semantics = rhs.semantics;
965 significand = rhs.significand;
966 exponent = rhs.exponent;
967 category = rhs.category;
968 sign = rhs.sign;
969
970 rhs.semantics = &semBogus;
971 return *this;
972}
973
975 return isFiniteNonZero() && (exponent == semantics->minExponent) &&
976 (APInt::tcExtractBit(significandParts(),
977 semantics->precision - 1) == 0);
978}
979
981 // The smallest number by magnitude in our format will be the smallest
982 // denormal, i.e. the floating point number with exponent being minimum
983 // exponent and significand bitwise equal to 1 (i.e. with MSB equal to 0).
984 return isFiniteNonZero() && exponent == semantics->minExponent &&
985 significandMSB() == 0;
986}
987
989 return getCategory() == fcNormal && exponent == semantics->minExponent &&
990 isSignificandAllZerosExceptMSB();
991}
992
993bool IEEEFloat::isSignificandAllOnes() const {
994 // Test if the significand excluding the integral bit is all ones. This allows
995 // us to test for binade boundaries.
996 const integerPart *Parts = significandParts();
997 const unsigned PartCount = partCountForBits(semantics->precision);
998 for (unsigned i = 0; i < PartCount - 1; i++)
999 if (~Parts[i])
1000 return false;
1001
1002 // Set the unused high bits to all ones when we compare.
1003 const unsigned NumHighBits =
1004 PartCount*integerPartWidth - semantics->precision + 1;
1005 assert(NumHighBits <= integerPartWidth && NumHighBits > 0 &&
1006 "Can not have more high bits to fill than integerPartWidth");
1007 const integerPart HighBitFill =
1008 ~integerPart(0) << (integerPartWidth - NumHighBits);
1009 if (~(Parts[PartCount - 1] | HighBitFill))
1010 return false;
1011
1012 return true;
1013}
1014
1015bool IEEEFloat::isSignificandAllOnesExceptLSB() const {
1016 // Test if the significand excluding the integral bit is all ones except for
1017 // the least significant bit.
1018 const integerPart *Parts = significandParts();
1019
1020 if (Parts[0] & 1)
1021 return false;
1022
1023 const unsigned PartCount = partCountForBits(semantics->precision);
1024 for (unsigned i = 0; i < PartCount - 1; i++) {
1025 if (~Parts[i] & ~unsigned{!i})
1026 return false;
1027 }
1028
1029 // Set the unused high bits to all ones when we compare.
1030 const unsigned NumHighBits =
1031 PartCount * integerPartWidth - semantics->precision + 1;
1032 assert(NumHighBits <= integerPartWidth && NumHighBits > 0 &&
1033 "Can not have more high bits to fill than integerPartWidth");
1034 const integerPart HighBitFill = ~integerPart(0)
1035 << (integerPartWidth - NumHighBits);
1036 if (~(Parts[PartCount - 1] | HighBitFill | 0x1))
1037 return false;
1038
1039 return true;
1040}
1041
1042bool IEEEFloat::isSignificandAllZeros() const {
1043 // Test if the significand excluding the integral bit is all zeros. This
1044 // allows us to test for binade boundaries.
1045 const integerPart *Parts = significandParts();
1046 const unsigned PartCount = partCountForBits(semantics->precision);
1047
1048 for (unsigned i = 0; i < PartCount - 1; i++)
1049 if (Parts[i])
1050 return false;
1051
1052 // Compute how many bits are used in the final word.
1053 const unsigned NumHighBits =
1054 PartCount*integerPartWidth - semantics->precision + 1;
1055 assert(NumHighBits < integerPartWidth && "Can not have more high bits to "
1056 "clear than integerPartWidth");
1057 const integerPart HighBitMask = ~integerPart(0) >> NumHighBits;
1058
1059 if (Parts[PartCount - 1] & HighBitMask)
1060 return false;
1061
1062 return true;
1063}
1064
1065bool IEEEFloat::isSignificandAllZerosExceptMSB() const {
1066 const integerPart *Parts = significandParts();
1067 const unsigned PartCount = partCountForBits(semantics->precision);
1068
1069 for (unsigned i = 0; i < PartCount - 1; i++) {
1070 if (Parts[i])
1071 return false;
1072 }
1073
1074 const unsigned NumHighBits =
1075 PartCount * integerPartWidth - semantics->precision + 1;
1076 return Parts[PartCount - 1] == integerPart(1)
1077 << (integerPartWidth - NumHighBits);
1078}
1079
1082 semantics->nanEncoding == fltNanEncoding::AllOnes) {
1083 // The largest number by magnitude in our format will be the floating point
1084 // number with maximum exponent and with significand that is all ones except
1085 // the LSB.
1086 return isFiniteNonZero() && exponent == semantics->maxExponent &&
1087 isSignificandAllOnesExceptLSB();
1088 } else {
1089 // The largest number by magnitude in our format will be the floating point
1090 // number with maximum exponent and with significand that is all ones.
1091 return isFiniteNonZero() && exponent == semantics->maxExponent &&
1092 isSignificandAllOnes();
1093 }
1094}
1095
1097 // This could be made more efficient; I'm going for obviously correct.
1098 if (!isFinite()) return false;
1099 IEEEFloat truncated = *this;
1100 truncated.roundToIntegral(rmTowardZero);
1101 return compare(truncated) == cmpEqual;
1102}
1103
1104bool IEEEFloat::bitwiseIsEqual(const IEEEFloat &rhs) const {
1105 if (this == &rhs)
1106 return true;
1107 if (semantics != rhs.semantics ||
1108 category != rhs.category ||
1109 sign != rhs.sign)
1110 return false;
1111 if (category==fcZero || category==fcInfinity)
1112 return true;
1113
1114 if (isFiniteNonZero() && exponent != rhs.exponent)
1115 return false;
1116
1117 return std::equal(significandParts(), significandParts() + partCount(),
1118 rhs.significandParts());
1119}
1120
1122 initialize(&ourSemantics);
1123 sign = 0;
1124 category = fcNormal;
1125 zeroSignificand();
1126 exponent = ourSemantics.precision - 1;
1127 significandParts()[0] = value;
1129}
1130
1132 initialize(&ourSemantics);
1133 makeZero(false);
1134}
1135
1136// Delegate to the previous constructor, because later copy constructor may
1137// actually inspects category, which can't be garbage.
1139 : IEEEFloat(ourSemantics) {}
1140
1142 initialize(rhs.semantics);
1143 assign(rhs);
1144}
1145
1147 *this = std::move(rhs);
1148}
1149
1150IEEEFloat::~IEEEFloat() { freeSignificand(); }
1151
1152unsigned int IEEEFloat::partCount() const {
1153 return partCountForBits(semantics->precision + 1);
1154}
1155
1156const IEEEFloat::integerPart *IEEEFloat::significandParts() const {
1157 return const_cast<IEEEFloat *>(this)->significandParts();
1158}
1159
1160IEEEFloat::integerPart *IEEEFloat::significandParts() {
1161 if (partCount() > 1)
1162 return significand.parts;
1163 else
1164 return &significand.part;
1165}
1166
1167void IEEEFloat::zeroSignificand() {
1168 APInt::tcSet(significandParts(), 0, partCount());
1169}
1170
1171/* Increment an fcNormal floating point number's significand. */
1172void IEEEFloat::incrementSignificand() {
1173 integerPart carry;
1174
1175 carry = APInt::tcIncrement(significandParts(), partCount());
1176
1177 /* Our callers should never cause us to overflow. */
1178 assert(carry == 0);
1179 (void)carry;
1180}
1181
1182/* Add the significand of the RHS. Returns the carry flag. */
1183IEEEFloat::integerPart IEEEFloat::addSignificand(const IEEEFloat &rhs) {
1184 integerPart *parts;
1185
1186 parts = significandParts();
1187
1188 assert(semantics == rhs.semantics);
1189 assert(exponent == rhs.exponent);
1190
1191 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
1192}
1193
1194/* Subtract the significand of the RHS with a borrow flag. Returns
1195 the borrow flag. */
1196IEEEFloat::integerPart IEEEFloat::subtractSignificand(const IEEEFloat &rhs,
1197 integerPart borrow) {
1198 integerPart *parts;
1199
1200 parts = significandParts();
1201
1202 assert(semantics == rhs.semantics);
1203 assert(exponent == rhs.exponent);
1204
1205 return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
1206 partCount());
1207}
1208
1209/* Multiply the significand of the RHS. If ADDEND is non-NULL, add it
1210 on to the full-precision result of the multiplication. Returns the
1211 lost fraction. */
1212lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs,
1213 IEEEFloat addend) {
1214 unsigned int omsb; // One, not zero, based MSB.
1215 unsigned int partsCount, newPartsCount, precision;
1216 integerPart *lhsSignificand;
1217 integerPart scratch[4];
1218 integerPart *fullSignificand;
1219 lostFraction lost_fraction;
1220 bool ignored;
1221
1222 assert(semantics == rhs.semantics);
1223
1224 precision = semantics->precision;
1225
1226 // Allocate space for twice as many bits as the original significand, plus one
1227 // extra bit for the addition to overflow into.
1228 newPartsCount = partCountForBits(precision * 2 + 1);
1229
1230 if (newPartsCount > 4)
1231 fullSignificand = new integerPart[newPartsCount];
1232 else
1233 fullSignificand = scratch;
1234
1235 lhsSignificand = significandParts();
1236 partsCount = partCount();
1237
1238 APInt::tcFullMultiply(fullSignificand, lhsSignificand,
1239 rhs.significandParts(), partsCount, partsCount);
1240
1241 lost_fraction = lfExactlyZero;
1242 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
1243 exponent += rhs.exponent;
1244
1245 // Assume the operands involved in the multiplication are single-precision
1246 // FP, and the two multiplicants are:
1247 // *this = a23 . a22 ... a0 * 2^e1
1248 // rhs = b23 . b22 ... b0 * 2^e2
1249 // the result of multiplication is:
1250 // *this = c48 c47 c46 . c45 ... c0 * 2^(e1+e2)
1251 // Note that there are three significant bits at the left-hand side of the
1252 // radix point: two for the multiplication, and an overflow bit for the
1253 // addition (that will always be zero at this point). Move the radix point
1254 // toward left by two bits, and adjust exponent accordingly.
1255 exponent += 2;
1256
1257 if (addend.isNonZero()) {
1258 // The intermediate result of the multiplication has "2 * precision"
1259 // signicant bit; adjust the addend to be consistent with mul result.
1260 //
1261 Significand savedSignificand = significand;
1262 const fltSemantics *savedSemantics = semantics;
1263 fltSemantics extendedSemantics;
1265 unsigned int extendedPrecision;
1266
1267 // Normalize our MSB to one below the top bit to allow for overflow.
1268 extendedPrecision = 2 * precision + 1;
1269 if (omsb != extendedPrecision - 1) {
1270 assert(extendedPrecision > omsb);
1271 APInt::tcShiftLeft(fullSignificand, newPartsCount,
1272 (extendedPrecision - 1) - omsb);
1273 exponent -= (extendedPrecision - 1) - omsb;
1274 }
1275
1276 /* Create new semantics. */
1277 extendedSemantics = *semantics;
1278 extendedSemantics.precision = extendedPrecision;
1279
1280 if (newPartsCount == 1)
1281 significand.part = fullSignificand[0];
1282 else
1283 significand.parts = fullSignificand;
1284 semantics = &extendedSemantics;
1285
1286 // Make a copy so we can convert it to the extended semantics.
1287 // Note that we cannot convert the addend directly, as the extendedSemantics
1288 // is a local variable (which we take a reference to).
1289 IEEEFloat extendedAddend(addend);
1290 status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored);
1291 assert(status == opOK);
1292 (void)status;
1293
1294 // Shift the significand of the addend right by one bit. This guarantees
1295 // that the high bit of the significand is zero (same as fullSignificand),
1296 // so the addition will overflow (if it does overflow at all) into the top bit.
1297 lost_fraction = extendedAddend.shiftSignificandRight(1);
1298 assert(lost_fraction == lfExactlyZero &&
1299 "Lost precision while shifting addend for fused-multiply-add.");
1300
1301 lost_fraction = addOrSubtractSignificand(extendedAddend, false);
1302
1303 /* Restore our state. */
1304 if (newPartsCount == 1)
1305 fullSignificand[0] = significand.part;
1306 significand = savedSignificand;
1307 semantics = savedSemantics;
1308
1309 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
1310 }
1311
1312 // Convert the result having "2 * precision" significant-bits back to the one
1313 // having "precision" significant-bits. First, move the radix point from
1314 // poision "2*precision - 1" to "precision - 1". The exponent need to be
1315 // adjusted by "2*precision - 1" - "precision - 1" = "precision".
1316 exponent -= precision + 1;
1317
1318 // In case MSB resides at the left-hand side of radix point, shift the
1319 // mantissa right by some amount to make sure the MSB reside right before
1320 // the radix point (i.e. "MSB . rest-significant-bits").
1321 //
1322 // Note that the result is not normalized when "omsb < precision". So, the
1323 // caller needs to call IEEEFloat::normalize() if normalized value is
1324 // expected.
1325 if (omsb > precision) {
1326 unsigned int bits, significantParts;
1327 lostFraction lf;
1328
1329 bits = omsb - precision;
1330 significantParts = partCountForBits(omsb);
1331 lf = shiftRight(fullSignificand, significantParts, bits);
1332 lost_fraction = combineLostFractions(lf, lost_fraction);
1333 exponent += bits;
1334 }
1335
1336 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
1337
1338 if (newPartsCount > 4)
1339 delete [] fullSignificand;
1340
1341 return lost_fraction;
1342}
1343
1344lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs) {
1345 return multiplySignificand(rhs, IEEEFloat(*semantics));
1346}
1347
1348/* Multiply the significands of LHS and RHS to DST. */
1349lostFraction IEEEFloat::divideSignificand(const IEEEFloat &rhs) {
1350 unsigned int bit, i, partsCount;
1351 const integerPart *rhsSignificand;
1352 integerPart *lhsSignificand, *dividend, *divisor;
1353 integerPart scratch[4];
1354 lostFraction lost_fraction;
1355
1356 assert(semantics == rhs.semantics);
1357
1358 lhsSignificand = significandParts();
1359 rhsSignificand = rhs.significandParts();
1360 partsCount = partCount();
1361
1362 if (partsCount > 2)
1363 dividend = new integerPart[partsCount * 2];
1364 else
1365 dividend = scratch;
1366
1367 divisor = dividend + partsCount;
1368
1369 /* Copy the dividend and divisor as they will be modified in-place. */
1370 for (i = 0; i < partsCount; i++) {
1371 dividend[i] = lhsSignificand[i];
1372 divisor[i] = rhsSignificand[i];
1373 lhsSignificand[i] = 0;
1374 }
1375
1376 exponent -= rhs.exponent;
1377
1378 unsigned int precision = semantics->precision;
1379
1380 /* Normalize the divisor. */
1381 bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
1382 if (bit) {
1383 exponent += bit;
1384 APInt::tcShiftLeft(divisor, partsCount, bit);
1385 }
1386
1387 /* Normalize the dividend. */
1388 bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
1389 if (bit) {
1390 exponent -= bit;
1391 APInt::tcShiftLeft(dividend, partsCount, bit);
1392 }
1393
1394 /* Ensure the dividend >= divisor initially for the loop below.
1395 Incidentally, this means that the division loop below is
1396 guaranteed to set the integer bit to one. */
1397 if (APInt::tcCompare(dividend, divisor, partsCount) < 0) {
1398 exponent--;
1399 APInt::tcShiftLeft(dividend, partsCount, 1);
1400 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
1401 }
1402
1403 /* Long division. */
1404 for (bit = precision; bit; bit -= 1) {
1405 if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
1406 APInt::tcSubtract(dividend, divisor, 0, partsCount);
1407 APInt::tcSetBit(lhsSignificand, bit - 1);
1408 }
1409
1410 APInt::tcShiftLeft(dividend, partsCount, 1);
1411 }
1412
1413 /* Figure out the lost fraction. */
1414 int cmp = APInt::tcCompare(dividend, divisor, partsCount);
1415
1416 if (cmp > 0)
1417 lost_fraction = lfMoreThanHalf;
1418 else if (cmp == 0)
1419 lost_fraction = lfExactlyHalf;
1420 else if (APInt::tcIsZero(dividend, partsCount))
1421 lost_fraction = lfExactlyZero;
1422 else
1423 lost_fraction = lfLessThanHalf;
1424
1425 if (partsCount > 2)
1426 delete [] dividend;
1427
1428 return lost_fraction;
1429}
1430
1431unsigned int IEEEFloat::significandMSB() const {
1432 return APInt::tcMSB(significandParts(), partCount());
1433}
1434
1435unsigned int IEEEFloat::significandLSB() const {
1436 return APInt::tcLSB(significandParts(), partCount());
1437}
1438
1439/* Note that a zero result is NOT normalized to fcZero. */
1440lostFraction IEEEFloat::shiftSignificandRight(unsigned int bits) {
1441 /* Our exponent should not overflow. */
1442 assert((ExponentType) (exponent + bits) >= exponent);
1443
1444 exponent += bits;
1445
1446 return shiftRight(significandParts(), partCount(), bits);
1447}
1448
1449/* Shift the significand left BITS bits, subtract BITS from its exponent. */
1450void IEEEFloat::shiftSignificandLeft(unsigned int bits) {
1451 assert(bits < semantics->precision);
1452
1453 if (bits) {
1454 unsigned int partsCount = partCount();
1455
1456 APInt::tcShiftLeft(significandParts(), partsCount, bits);
1457 exponent -= bits;
1458
1459 assert(!APInt::tcIsZero(significandParts(), partsCount));
1460 }
1461}
1462
1465 int compare;
1466
1467 assert(semantics == rhs.semantics);
1469 assert(rhs.isFiniteNonZero());
1470
1471 compare = exponent - rhs.exponent;
1472
1473 /* If exponents are equal, do an unsigned bignum comparison of the
1474 significands. */
1475 if (compare == 0)
1476 compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
1477 partCount());
1478
1479 if (compare > 0)
1480 return cmpGreaterThan;
1481 else if (compare < 0)
1482 return cmpLessThan;
1483 else
1484 return cmpEqual;
1485}
1486
1487/* Set the least significant BITS bits of a bignum, clear the
1488 rest. */
1489static void tcSetLeastSignificantBits(APInt::WordType *dst, unsigned parts,
1490 unsigned bits) {
1491 unsigned i = 0;
1492 while (bits > APInt::APINT_BITS_PER_WORD) {
1493 dst[i++] = ~(APInt::WordType)0;
1495 }
1496
1497 if (bits)
1498 dst[i++] = ~(APInt::WordType)0 >> (APInt::APINT_BITS_PER_WORD - bits);
1499
1500 while (i < parts)
1501 dst[i++] = 0;
1502}
1503
1504/* Handle overflow. Sign is preserved. We either become infinity or
1505 the largest finite number. */
1506IEEEFloat::opStatus IEEEFloat::handleOverflow(roundingMode rounding_mode) {
1507 /* Infinity? */
1508 if (rounding_mode == rmNearestTiesToEven ||
1509 rounding_mode == rmNearestTiesToAway ||
1510 (rounding_mode == rmTowardPositive && !sign) ||
1511 (rounding_mode == rmTowardNegative && sign)) {
1513 makeNaN(false, sign);
1514 else
1515 category = fcInfinity;
1516 return (opStatus) (opOverflow | opInexact);
1517 }
1518
1519 /* Otherwise we become the largest finite number. */
1520 category = fcNormal;
1521 exponent = semantics->maxExponent;
1522 tcSetLeastSignificantBits(significandParts(), partCount(),
1523 semantics->precision);
1526 APInt::tcClearBit(significandParts(), 0);
1527
1528 return opInexact;
1529}
1530
1531/* Returns TRUE if, when truncating the current number, with BIT the
1532 new LSB, with the given lost fraction and rounding mode, the result
1533 would need to be rounded away from zero (i.e., by increasing the
1534 signficand). This routine must work for fcZero of both signs, and
1535 fcNormal numbers. */
1536bool IEEEFloat::roundAwayFromZero(roundingMode rounding_mode,
1537 lostFraction lost_fraction,
1538 unsigned int bit) const {
1539 /* NaNs and infinities should not have lost fractions. */
1540 assert(isFiniteNonZero() || category == fcZero);
1541
1542 /* Current callers never pass this so we don't handle it. */
1543 assert(lost_fraction != lfExactlyZero);
1544
1545 switch (rounding_mode) {
1547 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
1548
1550 if (lost_fraction == lfMoreThanHalf)
1551 return true;
1552
1553 /* Our zeroes don't have a significand to test. */
1554 if (lost_fraction == lfExactlyHalf && category != fcZero)
1555 return APInt::tcExtractBit(significandParts(), bit);
1556
1557 return false;
1558
1559 case rmTowardZero:
1560 return false;
1561
1562 case rmTowardPositive:
1563 return !sign;
1564
1565 case rmTowardNegative:
1566 return sign;
1567
1568 default:
1569 break;
1570 }
1571 llvm_unreachable("Invalid rounding mode found");
1572}
1573
1574IEEEFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode,
1575 lostFraction lost_fraction) {
1576 unsigned int omsb; /* One, not zero, based MSB. */
1577 int exponentChange;
1578
1579 if (!isFiniteNonZero())
1580 return opOK;
1581
1582 /* Before rounding normalize the exponent of fcNormal numbers. */
1583 omsb = significandMSB() + 1;
1584
1585 if (omsb) {
1586 /* OMSB is numbered from 1. We want to place it in the integer
1587 bit numbered PRECISION if possible, with a compensating change in
1588 the exponent. */
1589 exponentChange = omsb - semantics->precision;
1590
1591 /* If the resulting exponent is too high, overflow according to
1592 the rounding mode. */
1593 if (exponent + exponentChange > semantics->maxExponent)
1594 return handleOverflow(rounding_mode);
1595
1596 /* Subnormal numbers have exponent minExponent, and their MSB
1597 is forced based on that. */
1598 if (exponent + exponentChange < semantics->minExponent)
1599 exponentChange = semantics->minExponent - exponent;
1600
1601 /* Shifting left is easy as we don't lose precision. */
1602 if (exponentChange < 0) {
1603 assert(lost_fraction == lfExactlyZero);
1604
1605 shiftSignificandLeft(-exponentChange);
1606
1607 return opOK;
1608 }
1609
1610 if (exponentChange > 0) {
1611 lostFraction lf;
1612
1613 /* Shift right and capture any new lost fraction. */
1614 lf = shiftSignificandRight(exponentChange);
1615
1616 lost_fraction = combineLostFractions(lf, lost_fraction);
1617
1618 /* Keep OMSB up-to-date. */
1619 if (omsb > (unsigned) exponentChange)
1620 omsb -= exponentChange;
1621 else
1622 omsb = 0;
1623 }
1624 }
1625
1626 // The all-ones values is an overflow if NaN is all ones. If NaN is
1627 // represented by negative zero, then it is a valid finite value.
1629 semantics->nanEncoding == fltNanEncoding::AllOnes &&
1630 exponent == semantics->maxExponent && isSignificandAllOnes())
1631 return handleOverflow(rounding_mode);
1632
1633 /* Now round the number according to rounding_mode given the lost
1634 fraction. */
1635
1636 /* As specified in IEEE 754, since we do not trap we do not report
1637 underflow for exact results. */
1638 if (lost_fraction == lfExactlyZero) {
1639 /* Canonicalize zeroes. */
1640 if (omsb == 0) {
1641 category = fcZero;
1642 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
1643 sign = false;
1644 }
1645
1646 return opOK;
1647 }
1648
1649 /* Increment the significand if we're rounding away from zero. */
1650 if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
1651 if (omsb == 0)
1652 exponent = semantics->minExponent;
1653
1654 incrementSignificand();
1655 omsb = significandMSB() + 1;
1656
1657 /* Did the significand increment overflow? */
1658 if (omsb == (unsigned) semantics->precision + 1) {
1659 /* Renormalize by incrementing the exponent and shifting our
1660 significand right one. However if we already have the
1661 maximum exponent we overflow to infinity. */
1662 if (exponent == semantics->maxExponent)
1663 // Invoke overflow handling with a rounding mode that will guarantee
1664 // that the result gets turned into the correct infinity representation.
1665 // This is needed instead of just setting the category to infinity to
1666 // account for 8-bit floating point types that have no inf, only NaN.
1667 return handleOverflow(sign ? rmTowardNegative : rmTowardPositive);
1668
1669 shiftSignificandRight(1);
1670
1671 return opInexact;
1672 }
1673
1674 // The all-ones values is an overflow if NaN is all ones. If NaN is
1675 // represented by negative zero, then it is a valid finite value.
1677 semantics->nanEncoding == fltNanEncoding::AllOnes &&
1678 exponent == semantics->maxExponent && isSignificandAllOnes())
1679 return handleOverflow(rounding_mode);
1680 }
1681
1682 /* The normal case - we were and are not denormal, and any
1683 significand increment above didn't overflow. */
1684 if (omsb == semantics->precision)
1685 return opInexact;
1686
1687 /* We have a non-zero denormal. */
1688 assert(omsb < semantics->precision);
1689
1690 /* Canonicalize zeroes. */
1691 if (omsb == 0) {
1692 category = fcZero;
1693 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
1694 sign = false;
1695 }
1696
1697 /* The fcZero case is a denormal that underflowed to zero. */
1698 return (opStatus) (opUnderflow | opInexact);
1699}
1700
1701IEEEFloat::opStatus IEEEFloat::addOrSubtractSpecials(const IEEEFloat &rhs,
1702 bool subtract) {
1703 switch (PackCategoriesIntoKey(category, rhs.category)) {
1704 default:
1705 llvm_unreachable(nullptr);
1706
1710 assign(rhs);
1711 [[fallthrough]];
1716 if (isSignaling()) {
1717 makeQuiet();
1718 return opInvalidOp;
1719 }
1720 return rhs.isSignaling() ? opInvalidOp : opOK;
1721
1725 return opOK;
1726
1729 category = fcInfinity;
1730 sign = rhs.sign ^ subtract;
1731 return opOK;
1732
1734 assign(rhs);
1735 sign = rhs.sign ^ subtract;
1736 return opOK;
1737
1739 /* Sign depends on rounding mode; handled by caller. */
1740 return opOK;
1741
1743 /* Differently signed infinities can only be validly
1744 subtracted. */
1745 if (((sign ^ rhs.sign)!=0) != subtract) {
1746 makeNaN();
1747 return opInvalidOp;
1748 }
1749
1750 return opOK;
1751
1753 return opDivByZero;
1754 }
1755}
1756
1757/* Add or subtract two normal numbers. */
1758lostFraction IEEEFloat::addOrSubtractSignificand(const IEEEFloat &rhs,
1759 bool subtract) {
1760 integerPart carry;
1761 lostFraction lost_fraction;
1762 int bits;
1763
1764 /* Determine if the operation on the absolute values is effectively
1765 an addition or subtraction. */
1766 subtract ^= static_cast<bool>(sign ^ rhs.sign);
1767
1768 /* Are we bigger exponent-wise than the RHS? */
1769 bits = exponent - rhs.exponent;
1770
1771 /* Subtraction is more subtle than one might naively expect. */
1772 if (subtract) {
1773 IEEEFloat temp_rhs(rhs);
1774
1775 if (bits == 0)
1776 lost_fraction = lfExactlyZero;
1777 else if (bits > 0) {
1778 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
1779 shiftSignificandLeft(1);
1780 } else {
1781 lost_fraction = shiftSignificandRight(-bits - 1);
1782 temp_rhs.shiftSignificandLeft(1);
1783 }
1784
1785 // Should we reverse the subtraction.
1786 if (compareAbsoluteValue(temp_rhs) == cmpLessThan) {
1787 carry = temp_rhs.subtractSignificand
1788 (*this, lost_fraction != lfExactlyZero);
1789 copySignificand(temp_rhs);
1790 sign = !sign;
1791 } else {
1792 carry = subtractSignificand
1793 (temp_rhs, lost_fraction != lfExactlyZero);
1794 }
1795
1796 /* Invert the lost fraction - it was on the RHS and
1797 subtracted. */
1798 if (lost_fraction == lfLessThanHalf)
1799 lost_fraction = lfMoreThanHalf;
1800 else if (lost_fraction == lfMoreThanHalf)
1801 lost_fraction = lfLessThanHalf;
1802
1803 /* The code above is intended to ensure that no borrow is
1804 necessary. */
1805 assert(!carry);
1806 (void)carry;
1807 } else {
1808 if (bits > 0) {
1809 IEEEFloat temp_rhs(rhs);
1810
1811 lost_fraction = temp_rhs.shiftSignificandRight(bits);
1812 carry = addSignificand(temp_rhs);
1813 } else {
1814 lost_fraction = shiftSignificandRight(-bits);
1815 carry = addSignificand(rhs);
1816 }
1817
1818 /* We have a guard bit; generating a carry cannot happen. */
1819 assert(!carry);
1820 (void)carry;
1821 }
1822
1823 return lost_fraction;
1824}
1825
1826IEEEFloat::opStatus IEEEFloat::multiplySpecials(const IEEEFloat &rhs) {
1827 switch (PackCategoriesIntoKey(category, rhs.category)) {
1828 default:
1829 llvm_unreachable(nullptr);
1830
1834 assign(rhs);
1835 sign = false;
1836 [[fallthrough]];
1841 sign ^= rhs.sign; // restore the original sign
1842 if (isSignaling()) {
1843 makeQuiet();
1844 return opInvalidOp;
1845 }
1846 return rhs.isSignaling() ? opInvalidOp : opOK;
1847
1851 category = fcInfinity;
1852 return opOK;
1853
1857 category = fcZero;
1858 return opOK;
1859
1862 makeNaN();
1863 return opInvalidOp;
1864
1866 return opOK;
1867 }
1868}
1869
1870IEEEFloat::opStatus IEEEFloat::divideSpecials(const IEEEFloat &rhs) {
1871 switch (PackCategoriesIntoKey(category, rhs.category)) {
1872 default:
1873 llvm_unreachable(nullptr);
1874
1878 assign(rhs);
1879 sign = false;
1880 [[fallthrough]];
1885 sign ^= rhs.sign; // restore the original sign
1886 if (isSignaling()) {
1887 makeQuiet();
1888 return opInvalidOp;
1889 }
1890 return rhs.isSignaling() ? opInvalidOp : opOK;
1891
1896 return opOK;
1897
1899 category = fcZero;
1900 return opOK;
1901
1903 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly)
1904 makeNaN(false, sign);
1905 else
1906 category = fcInfinity;
1907 return opDivByZero;
1908
1911 makeNaN();
1912 return opInvalidOp;
1913
1915 return opOK;
1916 }
1917}
1918
1919IEEEFloat::opStatus IEEEFloat::modSpecials(const IEEEFloat &rhs) {
1920 switch (PackCategoriesIntoKey(category, rhs.category)) {
1921 default:
1922 llvm_unreachable(nullptr);
1923
1927 assign(rhs);
1928 [[fallthrough]];
1933 if (isSignaling()) {
1934 makeQuiet();
1935 return opInvalidOp;
1936 }
1937 return rhs.isSignaling() ? opInvalidOp : opOK;
1938
1942 return opOK;
1943
1949 makeNaN();
1950 return opInvalidOp;
1951
1953 return opOK;
1954 }
1955}
1956
1957IEEEFloat::opStatus IEEEFloat::remainderSpecials(const IEEEFloat &rhs) {
1958 switch (PackCategoriesIntoKey(category, rhs.category)) {
1959 default:
1960 llvm_unreachable(nullptr);
1961
1965 assign(rhs);
1966 [[fallthrough]];
1971 if (isSignaling()) {
1972 makeQuiet();
1973 return opInvalidOp;
1974 }
1975 return rhs.isSignaling() ? opInvalidOp : opOK;
1976
1980 return opOK;
1981
1987 makeNaN();
1988 return opInvalidOp;
1989
1991 return opDivByZero; // fake status, indicating this is not a special case
1992 }
1993}
1994
1995/* Change sign. */
1997 // With NaN-as-negative-zero, neither NaN or negative zero can change
1998 // their signs.
1999 if (semantics->nanEncoding == fltNanEncoding::NegativeZero &&
2000 (isZero() || isNaN()))
2001 return;
2002 /* Look mummy, this one's easy. */
2003 sign = !sign;
2004}
2005
2006/* Normalized addition or subtraction. */
2007IEEEFloat::opStatus IEEEFloat::addOrSubtract(const IEEEFloat &rhs,
2008 roundingMode rounding_mode,
2009 bool subtract) {
2010 opStatus fs;
2011
2012 fs = addOrSubtractSpecials(rhs, subtract);
2013
2014 /* This return code means it was not a simple case. */
2015 if (fs == opDivByZero) {
2016 lostFraction lost_fraction;
2017
2018 lost_fraction = addOrSubtractSignificand(rhs, subtract);
2019 fs = normalize(rounding_mode, lost_fraction);
2020
2021 /* Can only be zero if we lost no fraction. */
2022 assert(category != fcZero || lost_fraction == lfExactlyZero);
2023 }
2024
2025 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
2026 positive zero unless rounding to minus infinity, except that
2027 adding two like-signed zeroes gives that zero. */
2028 if (category == fcZero) {
2029 if (rhs.category != fcZero || (sign == rhs.sign) == subtract)
2030 sign = (rounding_mode == rmTowardNegative);
2031 // NaN-in-negative-zero means zeros need to be normalized to +0.
2032 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2033 sign = false;
2034 }
2035
2036 return fs;
2037}
2038
2039/* Normalized addition. */
2041 roundingMode rounding_mode) {
2042 return addOrSubtract(rhs, rounding_mode, false);
2043}
2044
2045/* Normalized subtraction. */
2047 roundingMode rounding_mode) {
2048 return addOrSubtract(rhs, rounding_mode, true);
2049}
2050
2051/* Normalized multiply. */
2053 roundingMode rounding_mode) {
2054 opStatus fs;
2055
2056 sign ^= rhs.sign;
2057 fs = multiplySpecials(rhs);
2058
2059 if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero)
2060 sign = false;
2061 if (isFiniteNonZero()) {
2062 lostFraction lost_fraction = multiplySignificand(rhs);
2063 fs = normalize(rounding_mode, lost_fraction);
2064 if (lost_fraction != lfExactlyZero)
2065 fs = (opStatus) (fs | opInexact);
2066 }
2067
2068 return fs;
2069}
2070
2071/* Normalized divide. */
2073 roundingMode rounding_mode) {
2074 opStatus fs;
2075
2076 sign ^= rhs.sign;
2077 fs = divideSpecials(rhs);
2078
2079 if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero)
2080 sign = false;
2081 if (isFiniteNonZero()) {
2082 lostFraction lost_fraction = divideSignificand(rhs);
2083 fs = normalize(rounding_mode, lost_fraction);
2084 if (lost_fraction != lfExactlyZero)
2085 fs = (opStatus) (fs | opInexact);
2086 }
2087
2088 return fs;
2089}
2090
2091/* Normalized remainder. */
2093 opStatus fs;
2094 unsigned int origSign = sign;
2095
2096 // First handle the special cases.
2097 fs = remainderSpecials(rhs);
2098 if (fs != opDivByZero)
2099 return fs;
2100
2101 fs = opOK;
2102
2103 // Make sure the current value is less than twice the denom. If the addition
2104 // did not succeed (an overflow has happened), which means that the finite
2105 // value we currently posses must be less than twice the denom (as we are
2106 // using the same semantics).
2107 IEEEFloat P2 = rhs;
2108 if (P2.add(rhs, rmNearestTiesToEven) == opOK) {
2109 fs = mod(P2);
2110 assert(fs == opOK);
2111 }
2112
2113 // Lets work with absolute numbers.
2114 IEEEFloat P = rhs;
2115 P.sign = false;
2116 sign = false;
2117
2118 //
2119 // To calculate the remainder we use the following scheme.
2120 //
2121 // The remainder is defained as follows:
2122 //
2123 // remainder = numer - rquot * denom = x - r * p
2124 //
2125 // Where r is the result of: x/p, rounded toward the nearest integral value
2126 // (with halfway cases rounded toward the even number).
2127 //
2128 // Currently, (after x mod 2p):
2129 // r is the number of 2p's present inside x, which is inherently, an even
2130 // number of p's.
2131 //
2132 // We may split the remaining calculation into 4 options:
2133 // - if x < 0.5p then we round to the nearest number with is 0, and are done.
2134 // - if x == 0.5p then we round to the nearest even number which is 0, and we
2135 // are done as well.
2136 // - if 0.5p < x < p then we round to nearest number which is 1, and we have
2137 // to subtract 1p at least once.
2138 // - if x >= p then we must subtract p at least once, as x must be a
2139 // remainder.
2140 //
2141 // By now, we were done, or we added 1 to r, which in turn, now an odd number.
2142 //
2143 // We can now split the remaining calculation to the following 3 options:
2144 // - if x < 0.5p then we round to the nearest number with is 0, and are done.
2145 // - if x == 0.5p then we round to the nearest even number. As r is odd, we
2146 // must round up to the next even number. so we must subtract p once more.
2147 // - if x > 0.5p (and inherently x < p) then we must round r up to the next
2148 // integral, and subtract p once more.
2149 //
2150
2151 // Extend the semantics to prevent an overflow/underflow or inexact result.
2152 bool losesInfo;
2153 fltSemantics extendedSemantics = *semantics;
2154 extendedSemantics.maxExponent++;
2155 extendedSemantics.minExponent--;
2156 extendedSemantics.precision += 2;
2157
2158 IEEEFloat VEx = *this;
2159 fs = VEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2160 assert(fs == opOK && !losesInfo);
2161 IEEEFloat PEx = P;
2162 fs = PEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2163 assert(fs == opOK && !losesInfo);
2164
2165 // It is simpler to work with 2x instead of 0.5p, and we do not need to lose
2166 // any fraction.
2167 fs = VEx.add(VEx, rmNearestTiesToEven);
2168 assert(fs == opOK);
2169
2170 if (VEx.compare(PEx) == cmpGreaterThan) {
2172 assert(fs == opOK);
2173
2174 // Make VEx = this.add(this), but because we have different semantics, we do
2175 // not want to `convert` again, so we just subtract PEx twice (which equals
2176 // to the desired value).
2177 fs = VEx.subtract(PEx, rmNearestTiesToEven);
2178 assert(fs == opOK);
2179 fs = VEx.subtract(PEx, rmNearestTiesToEven);
2180 assert(fs == opOK);
2181
2182 cmpResult result = VEx.compare(PEx);
2183 if (result == cmpGreaterThan || result == cmpEqual) {
2185 assert(fs == opOK);
2186 }
2187 }
2188
2189 if (isZero()) {
2190 sign = origSign; // IEEE754 requires this
2191 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2192 // But some 8-bit floats only have positive 0.
2193 sign = false;
2194 }
2195
2196 else
2197 sign ^= origSign;
2198 return fs;
2199}
2200
2201/* Normalized llvm frem (C fmod). */
2203 opStatus fs;
2204 fs = modSpecials(rhs);
2205 unsigned int origSign = sign;
2206
2207 while (isFiniteNonZero() && rhs.isFiniteNonZero() &&
2209 int Exp = ilogb(*this) - ilogb(rhs);
2210 IEEEFloat V = scalbn(rhs, Exp, rmNearestTiesToEven);
2211 // V can overflow to NaN with fltNonfiniteBehavior::NanOnly, so explicitly
2212 // check for it.
2213 if (V.isNaN() || compareAbsoluteValue(V) == cmpLessThan)
2214 V = scalbn(rhs, Exp - 1, rmNearestTiesToEven);
2215 V.sign = sign;
2216
2218 assert(fs==opOK);
2219 }
2220 if (isZero()) {
2221 sign = origSign; // fmod requires this
2222 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2223 sign = false;
2224 }
2225 return fs;
2226}
2227
2228/* Normalized fused-multiply-add. */
2230 const IEEEFloat &addend,
2231 roundingMode rounding_mode) {
2232 opStatus fs;
2233
2234 /* Post-multiplication sign, before addition. */
2235 sign ^= multiplicand.sign;
2236
2237 /* If and only if all arguments are normal do we need to do an
2238 extended-precision calculation. */
2239 if (isFiniteNonZero() &&
2240 multiplicand.isFiniteNonZero() &&
2241 addend.isFinite()) {
2242 lostFraction lost_fraction;
2243
2244 lost_fraction = multiplySignificand(multiplicand, addend);
2245 fs = normalize(rounding_mode, lost_fraction);
2246 if (lost_fraction != lfExactlyZero)
2247 fs = (opStatus) (fs | opInexact);
2248
2249 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
2250 positive zero unless rounding to minus infinity, except that
2251 adding two like-signed zeroes gives that zero. */
2252 if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign) {
2253 sign = (rounding_mode == rmTowardNegative);
2254 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2255 sign = false;
2256 }
2257 } else {
2258 fs = multiplySpecials(multiplicand);
2259
2260 /* FS can only be opOK or opInvalidOp. There is no more work
2261 to do in the latter case. The IEEE-754R standard says it is
2262 implementation-defined in this case whether, if ADDEND is a
2263 quiet NaN, we raise invalid op; this implementation does so.
2264
2265 If we need to do the addition we can do so with normal
2266 precision. */
2267 if (fs == opOK)
2268 fs = addOrSubtract(addend, rounding_mode, false);
2269 }
2270
2271 return fs;
2272}
2273
2274/* Rounding-mode correct round to integral value. */
2276 opStatus fs;
2277
2278 if (isInfinity())
2279 // [IEEE Std 754-2008 6.1]:
2280 // The behavior of infinity in floating-point arithmetic is derived from the
2281 // limiting cases of real arithmetic with operands of arbitrarily
2282 // large magnitude, when such a limit exists.
2283 // ...
2284 // Operations on infinite operands are usually exact and therefore signal no
2285 // exceptions ...
2286 return opOK;
2287
2288 if (isNaN()) {
2289 if (isSignaling()) {
2290 // [IEEE Std 754-2008 6.2]:
2291 // Under default exception handling, any operation signaling an invalid
2292 // operation exception and for which a floating-point result is to be
2293 // delivered shall deliver a quiet NaN.
2294 makeQuiet();
2295 // [IEEE Std 754-2008 6.2]:
2296 // Signaling NaNs shall be reserved operands that, under default exception
2297 // handling, signal the invalid operation exception(see 7.2) for every
2298 // general-computational and signaling-computational operation except for
2299 // the conversions described in 5.12.
2300 return opInvalidOp;
2301 } else {
2302 // [IEEE Std 754-2008 6.2]:
2303 // For an operation with quiet NaN inputs, other than maximum and minimum
2304 // operations, if a floating-point result is to be delivered the result
2305 // shall be a quiet NaN which should be one of the input NaNs.
2306 // ...
2307 // Every general-computational and quiet-computational operation involving
2308 // one or more input NaNs, none of them signaling, shall signal no
2309 // exception, except fusedMultiplyAdd might signal the invalid operation
2310 // exception(see 7.2).
2311 return opOK;
2312 }
2313 }
2314
2315 if (isZero()) {
2316 // [IEEE Std 754-2008 6.3]:
2317 // ... the sign of the result of conversions, the quantize operation, the
2318 // roundToIntegral operations, and the roundToIntegralExact(see 5.3.1) is
2319 // the sign of the first or only operand.
2320 return opOK;
2321 }
2322
2323 // If the exponent is large enough, we know that this value is already
2324 // integral, and the arithmetic below would potentially cause it to saturate
2325 // to +/-Inf. Bail out early instead.
2326 if (exponent+1 >= (int)semanticsPrecision(*semantics))
2327 return opOK;
2328
2329 // The algorithm here is quite simple: we add 2^(p-1), where p is the
2330 // precision of our format, and then subtract it back off again. The choice
2331 // of rounding modes for the addition/subtraction determines the rounding mode
2332 // for our integral rounding as well.
2333 // NOTE: When the input value is negative, we do subtraction followed by
2334 // addition instead.
2335 APInt IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics)), 1);
2336 IntegerConstant <<= semanticsPrecision(*semantics)-1;
2337 IEEEFloat MagicConstant(*semantics);
2338 fs = MagicConstant.convertFromAPInt(IntegerConstant, false,
2340 assert(fs == opOK);
2341 MagicConstant.sign = sign;
2342
2343 // Preserve the input sign so that we can handle the case of zero result
2344 // correctly.
2345 bool inputSign = isNegative();
2346
2347 fs = add(MagicConstant, rounding_mode);
2348
2349 // Current value and 'MagicConstant' are both integers, so the result of the
2350 // subtraction is always exact according to Sterbenz' lemma.
2351 subtract(MagicConstant, rounding_mode);
2352
2353 // Restore the input sign.
2354 if (inputSign != isNegative())
2355 changeSign();
2356
2357 return fs;
2358}
2359
2360
2361/* Comparison requires normalized numbers. */
2363 cmpResult result;
2364
2365 assert(semantics == rhs.semantics);
2366
2367 switch (PackCategoriesIntoKey(category, rhs.category)) {
2368 default:
2369 llvm_unreachable(nullptr);
2370
2378 return cmpUnordered;
2379
2383 if (sign)
2384 return cmpLessThan;
2385 else
2386 return cmpGreaterThan;
2387
2391 if (rhs.sign)
2392 return cmpGreaterThan;
2393 else
2394 return cmpLessThan;
2395
2397 if (sign == rhs.sign)
2398 return cmpEqual;
2399 else if (sign)
2400 return cmpLessThan;
2401 else
2402 return cmpGreaterThan;
2403
2405 return cmpEqual;
2406
2408 break;
2409 }
2410
2411 /* Two normal numbers. Do they have the same sign? */
2412 if (sign != rhs.sign) {
2413 if (sign)
2414 result = cmpLessThan;
2415 else
2416 result = cmpGreaterThan;
2417 } else {
2418 /* Compare absolute values; invert result if negative. */
2419 result = compareAbsoluteValue(rhs);
2420
2421 if (sign) {
2422 if (result == cmpLessThan)
2423 result = cmpGreaterThan;
2424 else if (result == cmpGreaterThan)
2425 result = cmpLessThan;
2426 }
2427 }
2428
2429 return result;
2430}
2431
2432/// IEEEFloat::convert - convert a value of one floating point type to another.
2433/// The return value corresponds to the IEEE754 exceptions. *losesInfo
2434/// records whether the transformation lost information, i.e. whether
2435/// converting the result back to the original type will produce the
2436/// original value (this is almost the same as return value==fsOK, but there
2437/// are edge cases where this is not so).
2438
2440 roundingMode rounding_mode,
2441 bool *losesInfo) {
2443 unsigned int newPartCount, oldPartCount;
2444 opStatus fs;
2445 int shift;
2446 const fltSemantics &fromSemantics = *semantics;
2447 bool is_signaling = isSignaling();
2448
2450 newPartCount = partCountForBits(toSemantics.precision + 1);
2451 oldPartCount = partCount();
2452 shift = toSemantics.precision - fromSemantics.precision;
2453
2454 bool X86SpecialNan = false;
2455 if (&fromSemantics == &semX87DoubleExtended &&
2456 &toSemantics != &semX87DoubleExtended && category == fcNaN &&
2457 (!(*significandParts() & 0x8000000000000000ULL) ||
2458 !(*significandParts() & 0x4000000000000000ULL))) {
2459 // x86 has some unusual NaNs which cannot be represented in any other
2460 // format; note them here.
2461 X86SpecialNan = true;
2462 }
2463
2464 // If this is a truncation of a denormal number, and the target semantics
2465 // has larger exponent range than the source semantics (this can happen
2466 // when truncating from PowerPC double-double to double format), the
2467 // right shift could lose result mantissa bits. Adjust exponent instead
2468 // of performing excessive shift.
2469 // Also do a similar trick in case shifting denormal would produce zero
2470 // significand as this case isn't handled correctly by normalize.
2471 if (shift < 0 && isFiniteNonZero()) {
2472 int omsb = significandMSB() + 1;
2473 int exponentChange = omsb - fromSemantics.precision;
2474 if (exponent + exponentChange < toSemantics.minExponent)
2475 exponentChange = toSemantics.minExponent - exponent;
2476 if (exponentChange < shift)
2477 exponentChange = shift;
2478 if (exponentChange < 0) {
2479 shift -= exponentChange;
2480 exponent += exponentChange;
2481 } else if (omsb <= -shift) {
2482 exponentChange = omsb + shift - 1; // leave at least one bit set
2483 shift -= exponentChange;
2484 exponent += exponentChange;
2485 }
2486 }
2487
2488 // If this is a truncation, perform the shift before we narrow the storage.
2489 if (shift < 0 && (isFiniteNonZero() ||
2490 (category == fcNaN && semantics->nonFiniteBehavior !=
2492 lostFraction = shiftRight(significandParts(), oldPartCount, -shift);
2493
2494 // Fix the storage so it can hold to new value.
2495 if (newPartCount > oldPartCount) {
2496 // The new type requires more storage; make it available.
2497 integerPart *newParts;
2498 newParts = new integerPart[newPartCount];
2499 APInt::tcSet(newParts, 0, newPartCount);
2500 if (isFiniteNonZero() || category==fcNaN)
2501 APInt::tcAssign(newParts, significandParts(), oldPartCount);
2502 freeSignificand();
2503 significand.parts = newParts;
2504 } else if (newPartCount == 1 && oldPartCount != 1) {
2505 // Switch to built-in storage for a single part.
2506 integerPart newPart = 0;
2507 if (isFiniteNonZero() || category==fcNaN)
2508 newPart = significandParts()[0];
2509 freeSignificand();
2510 significand.part = newPart;
2511 }
2512
2513 // Now that we have the right storage, switch the semantics.
2514 semantics = &toSemantics;
2515
2516 // If this is an extension, perform the shift now that the storage is
2517 // available.
2518 if (shift > 0 && (isFiniteNonZero() || category==fcNaN))
2519 APInt::tcShiftLeft(significandParts(), newPartCount, shift);
2520
2521 if (isFiniteNonZero()) {
2522 fs = normalize(rounding_mode, lostFraction);
2523 *losesInfo = (fs != opOK);
2524 } else if (category == fcNaN) {
2526 *losesInfo =
2528 makeNaN(false, sign);
2529 return is_signaling ? opInvalidOp : opOK;
2530 }
2531
2532 // If NaN is negative zero, we need to create a new NaN to avoid converting
2533 // NaN to -Inf.
2534 if (fromSemantics.nanEncoding == fltNanEncoding::NegativeZero &&
2536 makeNaN(false, false);
2537
2538 *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan;
2539
2540 // For x87 extended precision, we want to make a NaN, not a special NaN if
2541 // the input wasn't special either.
2542 if (!X86SpecialNan && semantics == &semX87DoubleExtended)
2543 APInt::tcSetBit(significandParts(), semantics->precision - 1);
2544
2545 // Convert of sNaN creates qNaN and raises an exception (invalid op).
2546 // This also guarantees that a sNaN does not become Inf on a truncation
2547 // that loses all payload bits.
2548 if (is_signaling) {
2549 makeQuiet();
2550 fs = opInvalidOp;
2551 } else {
2552 fs = opOK;
2553 }
2554 } else if (category == fcInfinity &&
2556 makeNaN(false, sign);
2557 *losesInfo = true;
2558 fs = opInexact;
2559 } else if (category == fcZero &&
2561 // Negative zero loses info, but positive zero doesn't.
2562 *losesInfo =
2563 fromSemantics.nanEncoding != fltNanEncoding::NegativeZero && sign;
2564 fs = *losesInfo ? opInexact : opOK;
2565 // NaN is negative zero means -0 -> +0, which can lose information
2566 sign = false;
2567 } else {
2568 *losesInfo = false;
2569 fs = opOK;
2570 }
2571
2572 return fs;
2573}
2574
2575/* Convert a floating point number to an integer according to the
2576 rounding mode. If the rounded integer value is out of range this
2577 returns an invalid operation exception and the contents of the
2578 destination parts are unspecified. If the rounded value is in
2579 range but the floating point number is not the exact integer, the C
2580 standard doesn't require an inexact exception to be raised. IEEE
2581 854 does require it so we do that.
2582
2583 Note that for conversions to integer type the C standard requires
2584 round-to-zero to always be used. */
2585IEEEFloat::opStatus IEEEFloat::convertToSignExtendedInteger(
2586 MutableArrayRef<integerPart> parts, unsigned int width, bool isSigned,
2587 roundingMode rounding_mode, bool *isExact) const {
2588 lostFraction lost_fraction;
2589 const integerPart *src;
2590 unsigned int dstPartsCount, truncatedBits;
2591
2592 *isExact = false;
2593
2594 /* Handle the three special cases first. */
2595 if (category == fcInfinity || category == fcNaN)
2596 return opInvalidOp;
2597
2598 dstPartsCount = partCountForBits(width);
2599 assert(dstPartsCount <= parts.size() && "Integer too big");
2600
2601 if (category == fcZero) {
2602 APInt::tcSet(parts.data(), 0, dstPartsCount);
2603 // Negative zero can't be represented as an int.
2604 *isExact = !sign;
2605 return opOK;
2606 }
2607
2608 src = significandParts();
2609
2610 /* Step 1: place our absolute value, with any fraction truncated, in
2611 the destination. */
2612 if (exponent < 0) {
2613 /* Our absolute value is less than one; truncate everything. */
2614 APInt::tcSet(parts.data(), 0, dstPartsCount);
2615 /* For exponent -1 the integer bit represents .5, look at that.
2616 For smaller exponents leftmost truncated bit is 0. */
2617 truncatedBits = semantics->precision -1U - exponent;
2618 } else {
2619 /* We want the most significant (exponent + 1) bits; the rest are
2620 truncated. */
2621 unsigned int bits = exponent + 1U;
2622
2623 /* Hopelessly large in magnitude? */
2624 if (bits > width)
2625 return opInvalidOp;
2626
2627 if (bits < semantics->precision) {
2628 /* We truncate (semantics->precision - bits) bits. */
2629 truncatedBits = semantics->precision - bits;
2630 APInt::tcExtract(parts.data(), dstPartsCount, src, bits, truncatedBits);
2631 } else {
2632 /* We want at least as many bits as are available. */
2633 APInt::tcExtract(parts.data(), dstPartsCount, src, semantics->precision,
2634 0);
2635 APInt::tcShiftLeft(parts.data(), dstPartsCount,
2636 bits - semantics->precision);
2637 truncatedBits = 0;
2638 }
2639 }
2640
2641 /* Step 2: work out any lost fraction, and increment the absolute
2642 value if we would round away from zero. */
2643 if (truncatedBits) {
2644 lost_fraction = lostFractionThroughTruncation(src, partCount(),
2645 truncatedBits);
2646 if (lost_fraction != lfExactlyZero &&
2647 roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) {
2648 if (APInt::tcIncrement(parts.data(), dstPartsCount))
2649 return opInvalidOp; /* Overflow. */
2650 }
2651 } else {
2652 lost_fraction = lfExactlyZero;
2653 }
2654
2655 /* Step 3: check if we fit in the destination. */
2656 unsigned int omsb = APInt::tcMSB(parts.data(), dstPartsCount) + 1;
2657
2658 if (sign) {
2659 if (!isSigned) {
2660 /* Negative numbers cannot be represented as unsigned. */
2661 if (omsb != 0)
2662 return opInvalidOp;
2663 } else {
2664 /* It takes omsb bits to represent the unsigned integer value.
2665 We lose a bit for the sign, but care is needed as the
2666 maximally negative integer is a special case. */
2667 if (omsb == width &&
2668 APInt::tcLSB(parts.data(), dstPartsCount) + 1 != omsb)
2669 return opInvalidOp;
2670
2671 /* This case can happen because of rounding. */
2672 if (omsb > width)
2673 return opInvalidOp;
2674 }
2675
2676 APInt::tcNegate (parts.data(), dstPartsCount);
2677 } else {
2678 if (omsb >= width + !isSigned)
2679 return opInvalidOp;
2680 }
2681
2682 if (lost_fraction == lfExactlyZero) {
2683 *isExact = true;
2684 return opOK;
2685 } else
2686 return opInexact;
2687}
2688
2689/* Same as convertToSignExtendedInteger, except we provide
2690 deterministic values in case of an invalid operation exception,
2691 namely zero for NaNs and the minimal or maximal value respectively
2692 for underflow or overflow.
2693 The *isExact output tells whether the result is exact, in the sense
2694 that converting it back to the original floating point type produces
2695 the original value. This is almost equivalent to result==opOK,
2696 except for negative zeroes.
2697*/
2700 unsigned int width, bool isSigned,
2701 roundingMode rounding_mode, bool *isExact) const {
2702 opStatus fs;
2703
2704 fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode,
2705 isExact);
2706
2707 if (fs == opInvalidOp) {
2708 unsigned int bits, dstPartsCount;
2709
2710 dstPartsCount = partCountForBits(width);
2711 assert(dstPartsCount <= parts.size() && "Integer too big");
2712
2713 if (category == fcNaN)
2714 bits = 0;
2715 else if (sign)
2716 bits = isSigned;
2717 else
2718 bits = width - isSigned;
2719
2720 tcSetLeastSignificantBits(parts.data(), dstPartsCount, bits);
2721 if (sign && isSigned)
2722 APInt::tcShiftLeft(parts.data(), dstPartsCount, width - 1);
2723 }
2724
2725 return fs;
2726}
2727
2728/* Convert an unsigned integer SRC to a floating point number,
2729 rounding according to ROUNDING_MODE. The sign of the floating
2730 point number is not modified. */
2731IEEEFloat::opStatus IEEEFloat::convertFromUnsignedParts(
2732 const integerPart *src, unsigned int srcCount, roundingMode rounding_mode) {
2733 unsigned int omsb, precision, dstCount;
2734 integerPart *dst;
2735 lostFraction lost_fraction;
2736
2737 category = fcNormal;
2738 omsb = APInt::tcMSB(src, srcCount) + 1;
2739 dst = significandParts();
2740 dstCount = partCount();
2741 precision = semantics->precision;
2742
2743 /* We want the most significant PRECISION bits of SRC. There may not
2744 be that many; extract what we can. */
2745 if (precision <= omsb) {
2746 exponent = omsb - 1;
2747 lost_fraction = lostFractionThroughTruncation(src, srcCount,
2748 omsb - precision);
2749 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);
2750 } else {
2751 exponent = precision - 1;
2752 lost_fraction = lfExactlyZero;
2753 APInt::tcExtract(dst, dstCount, src, omsb, 0);
2754 }
2755
2756 return normalize(rounding_mode, lost_fraction);
2757}
2758
2760 roundingMode rounding_mode) {
2761 unsigned int partCount = Val.getNumWords();
2762 APInt api = Val;
2763
2764 sign = false;
2765 if (isSigned && api.isNegative()) {
2766 sign = true;
2767 api = -api;
2768 }
2769
2770 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2771}
2772
2773/* Convert a two's complement integer SRC to a floating point number,
2774 rounding according to ROUNDING_MODE. ISSIGNED is true if the
2775 integer is signed, in which case it must be sign-extended. */
2778 unsigned int srcCount, bool isSigned,
2779 roundingMode rounding_mode) {
2780 opStatus status;
2781
2782 if (isSigned &&
2783 APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) {
2785
2786 /* If we're signed and negative negate a copy. */
2787 sign = true;
2788 copy = new integerPart[srcCount];
2789 APInt::tcAssign(copy, src, srcCount);
2790 APInt::tcNegate(copy, srcCount);
2791 status = convertFromUnsignedParts(copy, srcCount, rounding_mode);
2792 delete [] copy;
2793 } else {
2794 sign = false;
2795 status = convertFromUnsignedParts(src, srcCount, rounding_mode);
2796 }
2797
2798 return status;
2799}
2800
2801/* FIXME: should this just take a const APInt reference? */
2804 unsigned int width, bool isSigned,
2805 roundingMode rounding_mode) {
2806 unsigned int partCount = partCountForBits(width);
2807 APInt api = APInt(width, ArrayRef(parts, partCount));
2808
2809 sign = false;
2810 if (isSigned && APInt::tcExtractBit(parts, width - 1)) {
2811 sign = true;
2812 api = -api;
2813 }
2814
2815 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2816}
2817
2819IEEEFloat::convertFromHexadecimalString(StringRef s,
2820 roundingMode rounding_mode) {
2821 lostFraction lost_fraction = lfExactlyZero;
2822
2823 category = fcNormal;
2824 zeroSignificand();
2825 exponent = 0;
2826
2827 integerPart *significand = significandParts();
2828 unsigned partsCount = partCount();
2829 unsigned bitPos = partsCount * integerPartWidth;
2830 bool computedTrailingFraction = false;
2831
2832 // Skip leading zeroes and any (hexa)decimal point.
2833 StringRef::iterator begin = s.begin();
2834 StringRef::iterator end = s.end();
2836 auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot);
2837 if (!PtrOrErr)
2838 return PtrOrErr.takeError();
2839 StringRef::iterator p = *PtrOrErr;
2840 StringRef::iterator firstSignificantDigit = p;
2841
2842 while (p != end) {
2843 integerPart hex_value;
2844
2845 if (*p == '.') {
2846 if (dot != end)
2847 return createError("String contains multiple dots");
2848 dot = p++;
2849 continue;
2850 }
2851
2852 hex_value = hexDigitValue(*p);
2853 if (hex_value == UINT_MAX)
2854 break;
2855
2856 p++;
2857
2858 // Store the number while we have space.
2859 if (bitPos) {
2860 bitPos -= 4;
2861 hex_value <<= bitPos % integerPartWidth;
2862 significand[bitPos / integerPartWidth] |= hex_value;
2863 } else if (!computedTrailingFraction) {
2864 auto FractOrErr = trailingHexadecimalFraction(p, end, hex_value);
2865 if (!FractOrErr)
2866 return FractOrErr.takeError();
2867 lost_fraction = *FractOrErr;
2868 computedTrailingFraction = true;
2869 }
2870 }
2871
2872 /* Hex floats require an exponent but not a hexadecimal point. */
2873 if (p == end)
2874 return createError("Hex strings require an exponent");
2875 if (*p != 'p' && *p != 'P')
2876 return createError("Invalid character in significand");
2877 if (p == begin)
2878 return createError("Significand has no digits");
2879 if (dot != end && p - begin == 1)
2880 return createError("Significand has no digits");
2881
2882 /* Ignore the exponent if we are zero. */
2883 if (p != firstSignificantDigit) {
2884 int expAdjustment;
2885
2886 /* Implicit hexadecimal point? */
2887 if (dot == end)
2888 dot = p;
2889
2890 /* Calculate the exponent adjustment implicit in the number of
2891 significant digits. */
2892 expAdjustment = static_cast<int>(dot - firstSignificantDigit);
2893 if (expAdjustment < 0)
2894 expAdjustment++;
2895 expAdjustment = expAdjustment * 4 - 1;
2896
2897 /* Adjust for writing the significand starting at the most
2898 significant nibble. */
2899 expAdjustment += semantics->precision;
2900 expAdjustment -= partsCount * integerPartWidth;
2901
2902 /* Adjust for the given exponent. */
2903 auto ExpOrErr = totalExponent(p + 1, end, expAdjustment);
2904 if (!ExpOrErr)
2905 return ExpOrErr.takeError();
2906 exponent = *ExpOrErr;
2907 }
2908
2909 return normalize(rounding_mode, lost_fraction);
2910}
2911
2913IEEEFloat::roundSignificandWithExponent(const integerPart *decSigParts,
2914 unsigned sigPartCount, int exp,
2915 roundingMode rounding_mode) {
2916 unsigned int parts, pow5PartCount;
2917 fltSemantics calcSemantics = { 32767, -32767, 0, 0 };
2919 bool isNearest;
2920
2921 isNearest = (rounding_mode == rmNearestTiesToEven ||
2922 rounding_mode == rmNearestTiesToAway);
2923
2924 parts = partCountForBits(semantics->precision + 11);
2925
2926 /* Calculate pow(5, abs(exp)). */
2927 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp);
2928
2929 for (;; parts *= 2) {
2930 opStatus sigStatus, powStatus;
2931 unsigned int excessPrecision, truncatedBits;
2932
2933 calcSemantics.precision = parts * integerPartWidth - 1;
2934 excessPrecision = calcSemantics.precision - semantics->precision;
2935 truncatedBits = excessPrecision;
2936
2937 IEEEFloat decSig(calcSemantics, uninitialized);
2938 decSig.makeZero(sign);
2939 IEEEFloat pow5(calcSemantics);
2940
2941 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount,
2943 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
2945 /* Add exp, as 10^n = 5^n * 2^n. */
2946 decSig.exponent += exp;
2947
2948 lostFraction calcLostFraction;
2949 integerPart HUerr, HUdistance;
2950 unsigned int powHUerr;
2951
2952 if (exp >= 0) {
2953 /* multiplySignificand leaves the precision-th bit set to 1. */
2954 calcLostFraction = decSig.multiplySignificand(pow5);
2955 powHUerr = powStatus != opOK;
2956 } else {
2957 calcLostFraction = decSig.divideSignificand(pow5);
2958 /* Denormal numbers have less precision. */
2959 if (decSig.exponent < semantics->minExponent) {
2960 excessPrecision += (semantics->minExponent - decSig.exponent);
2961 truncatedBits = excessPrecision;
2962 if (excessPrecision > calcSemantics.precision)
2963 excessPrecision = calcSemantics.precision;
2964 }
2965 /* Extra half-ulp lost in reciprocal of exponent. */
2966 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2;
2967 }
2968
2969 /* Both multiplySignificand and divideSignificand return the
2970 result with the integer bit set. */
2972 (decSig.significandParts(), calcSemantics.precision - 1) == 1);
2973
2974 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
2975 powHUerr);
2976 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
2977 excessPrecision, isNearest);
2978
2979 /* Are we guaranteed to round correctly if we truncate? */
2980 if (HUdistance >= HUerr) {
2981 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
2982 calcSemantics.precision - excessPrecision,
2983 excessPrecision);
2984 /* Take the exponent of decSig. If we tcExtract-ed less bits
2985 above we must adjust our exponent to compensate for the
2986 implicit right shift. */
2987 exponent = (decSig.exponent + semantics->precision
2988 - (calcSemantics.precision - excessPrecision));
2989 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
2990 decSig.partCount(),
2991 truncatedBits);
2992 return normalize(rounding_mode, calcLostFraction);
2993 }
2994 }
2995}
2996
2998IEEEFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) {
2999 decimalInfo D;
3000 opStatus fs;
3001
3002 /* Scan the text. */
3003 StringRef::iterator p = str.begin();
3004 if (Error Err = interpretDecimal(p, str.end(), &D))
3005 return std::move(Err);
3006
3007 /* Handle the quick cases. First the case of no significant digits,
3008 i.e. zero, and then exponents that are obviously too large or too
3009 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp
3010 definitely overflows if
3011
3012 (exp - 1) * L >= maxExponent
3013
3014 and definitely underflows to zero where
3015
3016 (exp + 1) * L <= minExponent - precision
3017
3018 With integer arithmetic the tightest bounds for L are
3019
3020 93/28 < L < 196/59 [ numerator <= 256 ]
3021 42039/12655 < L < 28738/8651 [ numerator <= 65536 ]
3022 */
3023
3024 // Test if we have a zero number allowing for strings with no null terminators
3025 // and zero decimals with non-zero exponents.
3026 //
3027 // We computed firstSigDigit by ignoring all zeros and dots. Thus if
3028 // D->firstSigDigit equals str.end(), every digit must be a zero and there can
3029 // be at most one dot. On the other hand, if we have a zero with a non-zero
3030 // exponent, then we know that D.firstSigDigit will be non-numeric.
3031 if (D.firstSigDigit == str.end() || decDigitValue(*D.firstSigDigit) >= 10U) {
3032 category = fcZero;
3033 fs = opOK;
3034 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
3035 sign = false;
3036
3037 /* Check whether the normalized exponent is high enough to overflow
3038 max during the log-rebasing in the max-exponent check below. */
3039 } else if (D.normalizedExponent - 1 > INT_MAX / 42039) {
3040 fs = handleOverflow(rounding_mode);
3041
3042 /* If it wasn't, then it also wasn't high enough to overflow max
3043 during the log-rebasing in the min-exponent check. Check that it
3044 won't overflow min in either check, then perform the min-exponent
3045 check. */
3046 } else if (D.normalizedExponent - 1 < INT_MIN / 42039 ||
3047 (D.normalizedExponent + 1) * 28738 <=
3048 8651 * (semantics->minExponent - (int) semantics->precision)) {
3049 /* Underflow to zero and round. */
3050 category = fcNormal;
3051 zeroSignificand();
3052 fs = normalize(rounding_mode, lfLessThanHalf);
3053
3054 /* We can finally safely perform the max-exponent check. */
3055 } else if ((D.normalizedExponent - 1) * 42039
3056 >= 12655 * semantics->maxExponent) {
3057 /* Overflow and round. */
3058 fs = handleOverflow(rounding_mode);
3059 } else {
3060 integerPart *decSignificand;
3061 unsigned int partCount;
3062
3063 /* A tight upper bound on number of bits required to hold an
3064 N-digit decimal integer is N * 196 / 59. Allocate enough space
3065 to hold the full significand, and an extra part required by
3066 tcMultiplyPart. */
3067 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1;
3068 partCount = partCountForBits(1 + 196 * partCount / 59);
3069 decSignificand = new integerPart[partCount + 1];
3070 partCount = 0;
3071
3072 /* Convert to binary efficiently - we do almost all multiplication
3073 in an integerPart. When this would overflow do we do a single
3074 bignum multiplication, and then revert again to multiplication
3075 in an integerPart. */
3076 do {
3077 integerPart decValue, val, multiplier;
3078
3079 val = 0;
3080 multiplier = 1;
3081
3082 do {
3083 if (*p == '.') {
3084 p++;
3085 if (p == str.end()) {
3086 break;
3087 }
3088 }
3089 decValue = decDigitValue(*p++);
3090 if (decValue >= 10U) {
3091 delete[] decSignificand;
3092 return createError("Invalid character in significand");
3093 }
3094 multiplier *= 10;
3095 val = val * 10 + decValue;
3096 /* The maximum number that can be multiplied by ten with any
3097 digit added without overflowing an integerPart. */
3098 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);
3099
3100 /* Multiply out the current part. */
3101 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
3102 partCount, partCount + 1, false);
3103
3104 /* If we used another part (likely but not guaranteed), increase
3105 the count. */
3106 if (decSignificand[partCount])
3107 partCount++;
3108 } while (p <= D.lastSigDigit);
3109
3110 category = fcNormal;
3111 fs = roundSignificandWithExponent(decSignificand, partCount,
3112 D.exponent, rounding_mode);
3113
3114 delete [] decSignificand;
3115 }
3116
3117 return fs;
3118}
3119
3120bool IEEEFloat::convertFromStringSpecials(StringRef str) {
3121 const size_t MIN_NAME_SIZE = 3;
3122
3123 if (str.size() < MIN_NAME_SIZE)
3124 return false;
3125
3126 if (str.equals("inf") || str.equals("INFINITY") || str.equals("+Inf")) {
3127 makeInf(false);
3128 return true;
3129 }
3130
3131 bool IsNegative = str.front() == '-';
3132 if (IsNegative) {
3133 str = str.drop_front();
3134 if (str.size() < MIN_NAME_SIZE)
3135 return false;
3136
3137 if (str.equals("inf") || str.equals("INFINITY") || str.equals("Inf")) {
3138 makeInf(true);
3139 return true;
3140 }
3141 }
3142
3143 // If we have a 's' (or 'S') prefix, then this is a Signaling NaN.
3144 bool IsSignaling = str.front() == 's' || str.front() == 'S';
3145 if (IsSignaling) {
3146 str = str.drop_front();
3147 if (str.size() < MIN_NAME_SIZE)
3148 return false;
3149 }
3150
3151 if (str.starts_with("nan") || str.starts_with("NaN")) {
3152 str = str.drop_front(3);
3153
3154 // A NaN without payload.
3155 if (str.empty()) {
3156 makeNaN(IsSignaling, IsNegative);
3157 return true;
3158 }
3159
3160 // Allow the payload to be inside parentheses.
3161 if (str.front() == '(') {
3162 // Parentheses should be balanced (and not empty).
3163 if (str.size() <= 2 || str.back() != ')')
3164 return false;
3165
3166 str = str.slice(1, str.size() - 1);
3167 }
3168
3169 // Determine the payload number's radix.
3170 unsigned Radix = 10;
3171 if (str[0] == '0') {
3172 if (str.size() > 1 && tolower(str[1]) == 'x') {
3173 str = str.drop_front(2);
3174 Radix = 16;
3175 } else
3176 Radix = 8;
3177 }
3178
3179 // Parse the payload and make the NaN.
3180 APInt Payload;
3181 if (!str.getAsInteger(Radix, Payload)) {
3182 makeNaN(IsSignaling, IsNegative, &Payload);
3183 return true;
3184 }
3185 }
3186
3187 return false;
3188}
3189
3192 if (str.empty())
3193 return createError("Invalid string length");
3194
3195 // Handle special cases.
3196 if (convertFromStringSpecials(str))
3197 return opOK;
3198
3199 /* Handle a leading minus sign. */
3200 StringRef::iterator p = str.begin();
3201 size_t slen = str.size();
3202 sign = *p == '-' ? 1 : 0;
3203 if (*p == '-' || *p == '+') {
3204 p++;
3205 slen--;
3206 if (!slen)
3207 return createError("String has no digits");
3208 }
3209
3210 if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
3211 if (slen == 2)
3212 return createError("Invalid string");
3213 return convertFromHexadecimalString(StringRef(p + 2, slen - 2),
3214 rounding_mode);
3215 }
3216
3217 return convertFromDecimalString(StringRef(p, slen), rounding_mode);
3218}
3219
3220/* Write out a hexadecimal representation of the floating point value
3221 to DST, which must be of sufficient size, in the C99 form
3222 [-]0xh.hhhhp[+-]d. Return the number of characters written,
3223 excluding the terminating NUL.
3224
3225 If UPPERCASE, the output is in upper case, otherwise in lower case.
3226
3227 HEXDIGITS digits appear altogether, rounding the value if
3228 necessary. If HEXDIGITS is 0, the minimal precision to display the
3229 number precisely is used instead. If nothing would appear after
3230 the decimal point it is suppressed.
3231
3232 The decimal exponent is always printed and has at least one digit.
3233 Zero values display an exponent of zero. Infinities and NaNs
3234 appear as "infinity" or "nan" respectively.
3235
3236 The above rules are as specified by C99. There is ambiguity about
3237 what the leading hexadecimal digit should be. This implementation
3238 uses whatever is necessary so that the exponent is displayed as
3239 stored. This implies the exponent will fall within the IEEE format
3240 range, and the leading hexadecimal digit will be 0 (for denormals),
3241 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
3242 any other digits zero).
3243*/
3244unsigned int IEEEFloat::convertToHexString(char *dst, unsigned int hexDigits,
3245 bool upperCase,
3246 roundingMode rounding_mode) const {
3247 char *p;
3248
3249 p = dst;
3250 if (sign)
3251 *dst++ = '-';
3252
3253 switch (category) {
3254 case fcInfinity:
3255 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);
3256 dst += sizeof infinityL - 1;
3257 break;
3258
3259 case fcNaN:
3260 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);
3261 dst += sizeof NaNU - 1;
3262 break;
3263
3264 case fcZero:
3265 *dst++ = '0';
3266 *dst++ = upperCase ? 'X': 'x';
3267 *dst++ = '0';
3268 if (hexDigits > 1) {
3269 *dst++ = '.';
3270 memset (dst, '0', hexDigits - 1);
3271 dst += hexDigits - 1;
3272 }
3273 *dst++ = upperCase ? 'P': 'p';
3274 *dst++ = '0';
3275 break;
3276
3277 case fcNormal:
3278 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
3279 break;
3280 }
3281
3282 *dst = 0;
3283
3284 return static_cast<unsigned int>(dst - p);
3285}
3286
3287/* Does the hard work of outputting the correctly rounded hexadecimal
3288 form of a normal floating point number with the specified number of
3289 hexadecimal digits. If HEXDIGITS is zero the minimum number of
3290 digits necessary to print the value precisely is output. */
3291char *IEEEFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
3292 bool upperCase,
3293 roundingMode rounding_mode) const {
3294 unsigned int count, valueBits, shift, partsCount, outputDigits;
3295 const char *hexDigitChars;
3296 const integerPart *significand;
3297 char *p;
3298 bool roundUp;
3299
3300 *dst++ = '0';
3301 *dst++ = upperCase ? 'X': 'x';
3302
3303 roundUp = false;
3304 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower;
3305
3306 significand = significandParts();
3307 partsCount = partCount();
3308
3309 /* +3 because the first digit only uses the single integer bit, so
3310 we have 3 virtual zero most-significant-bits. */
3311 valueBits = semantics->precision + 3;
3312 shift = integerPartWidth - valueBits % integerPartWidth;
3313
3314 /* The natural number of digits required ignoring trailing
3315 insignificant zeroes. */
3316 outputDigits = (valueBits - significandLSB () + 3) / 4;
3317
3318 /* hexDigits of zero means use the required number for the
3319 precision. Otherwise, see if we are truncating. If we are,
3320 find out if we need to round away from zero. */
3321 if (hexDigits) {
3322 if (hexDigits < outputDigits) {
3323 /* We are dropping non-zero bits, so need to check how to round.
3324 "bits" is the number of dropped bits. */
3325 unsigned int bits;
3326 lostFraction fraction;
3327
3328 bits = valueBits - hexDigits * 4;
3329 fraction = lostFractionThroughTruncation (significand, partsCount, bits);
3330 roundUp = roundAwayFromZero(rounding_mode, fraction, bits);
3331 }
3332 outputDigits = hexDigits;
3333 }
3334
3335 /* Write the digits consecutively, and start writing in the location
3336 of the hexadecimal point. We move the most significant digit
3337 left and add the hexadecimal point later. */
3338 p = ++dst;
3339
3340 count = (valueBits + integerPartWidth - 1) / integerPartWidth;
3341
3342 while (outputDigits && count) {
3343 integerPart part;
3344
3345 /* Put the most significant integerPartWidth bits in "part". */
3346 if (--count == partsCount)
3347 part = 0; /* An imaginary higher zero part. */
3348 else
3349 part = significand[count] << shift;
3350
3351 if (count && shift)
3352 part |= significand[count - 1] >> (integerPartWidth - shift);
3353
3354 /* Convert as much of "part" to hexdigits as we can. */
3355 unsigned int curDigits = integerPartWidth / 4;
3356
3357 if (curDigits > outputDigits)
3358 curDigits = outputDigits;
3359 dst += partAsHex (dst, part, curDigits, hexDigitChars);
3360 outputDigits -= curDigits;
3361 }
3362
3363 if (roundUp) {
3364 char *q = dst;
3365
3366 /* Note that hexDigitChars has a trailing '0'. */
3367 do {
3368 q--;
3369 *q = hexDigitChars[hexDigitValue (*q) + 1];
3370 } while (*q == '0');
3371 assert(q >= p);
3372 } else {
3373 /* Add trailing zeroes. */
3374 memset (dst, '0', outputDigits);
3375 dst += outputDigits;
3376 }
3377
3378 /* Move the most significant digit to before the point, and if there
3379 is something after the decimal point add it. This must come
3380 after rounding above. */
3381 p[-1] = p[0];
3382 if (dst -1 == p)
3383 dst--;
3384 else
3385 p[0] = '.';
3386
3387 /* Finally output the exponent. */
3388 *dst++ = upperCase ? 'P': 'p';
3389
3390 return writeSignedDecimal (dst, exponent);
3391}
3392
3394 if (!Arg.isFiniteNonZero())
3395 return hash_combine((uint8_t)Arg.category,
3396 // NaN has no sign, fix it at zero.
3397 Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign,
3398 Arg.semantics->precision);
3399
3400 // Normal floats need their exponent and significand hashed.
3401 return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign,
3402 Arg.semantics->precision, Arg.exponent,
3404 Arg.significandParts(),
3405 Arg.significandParts() + Arg.partCount()));
3406}
3407
3408// Conversion from APFloat to/from host float/double. It may eventually be
3409// possible to eliminate these and have everybody deal with APFloats, but that
3410// will take a while. This approach will not easily extend to long double.
3411// Current implementation requires integerPartWidth==64, which is correct at
3412// the moment but could be made more general.
3413
3414// Denormals have exponent minExponent in APFloat, but minExponent-1 in
3415// the actual IEEE respresentations. We compensate for that here.
3416
3417APInt IEEEFloat::convertF80LongDoubleAPFloatToAPInt() const {
3418 assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended);
3419 assert(partCount()==2);
3420
3421 uint64_t myexponent, mysignificand;
3422
3423 if (isFiniteNonZero()) {
3424 myexponent = exponent+16383; //bias
3425 mysignificand = significandParts()[0];
3426 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
3427 myexponent = 0; // denormal
3428 } else if (category==fcZero) {
3429 myexponent = 0;
3430 mysignificand = 0;
3431 } else if (category==fcInfinity) {
3432 myexponent = 0x7fff;
3433 mysignificand = 0x8000000000000000ULL;
3434 } else {
3435 assert(category == fcNaN && "Unknown category");
3436 myexponent = 0x7fff;
3437 mysignificand = significandParts()[0];
3438 }
3439
3440 uint64_t words[2];
3441 words[0] = mysignificand;
3442 words[1] = ((uint64_t)(sign & 1) << 15) |
3443 (myexponent & 0x7fffLL);
3444 return APInt(80, words);
3445}
3446
3447APInt IEEEFloat::convertPPCDoubleDoubleAPFloatToAPInt() const {
3448 assert(semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy);
3449 assert(partCount()==2);
3450
3451 uint64_t words[2];
3452 opStatus fs;
3453 bool losesInfo;
3454
3455 // Convert number to double. To avoid spurious underflows, we re-
3456 // normalize against the "double" minExponent first, and only *then*
3457 // truncate the mantissa. The result of that second conversion
3458 // may be inexact, but should never underflow.
3459 // Declare fltSemantics before APFloat that uses it (and
3460 // saves pointer to it) to ensure correct destruction order.
3461 fltSemantics extendedSemantics = *semantics;
3462 extendedSemantics.minExponent = semIEEEdouble.minExponent;
3463 IEEEFloat extended(*this);
3464 fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
3465 assert(fs == opOK && !losesInfo);
3466 (void)fs;
3467
3468 IEEEFloat u(extended);
3469 fs = u.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo);
3470 assert(fs == opOK || fs == opInexact);
3471 (void)fs;
3472 words[0] = *u.convertDoubleAPFloatToAPInt().getRawData();
3473
3474 // If conversion was exact or resulted in a special case, we're done;
3475 // just set the second double to zero. Otherwise, re-convert back to
3476 // the extended format and compute the difference. This now should
3477 // convert exactly to double.
3478 if (u.isFiniteNonZero() && losesInfo) {
3479 fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
3480 assert(fs == opOK && !losesInfo);
3481 (void)fs;
3482
3483 IEEEFloat v(extended);
3484 v.subtract(u, rmNearestTiesToEven);
3485 fs = v.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo);
3486 assert(fs == opOK && !losesInfo);
3487 (void)fs;
3488 words[1] = *v.convertDoubleAPFloatToAPInt().getRawData();
3489 } else {
3490 words[1] = 0;
3491 }
3492
3493 return APInt(128, words);
3494}
3495
3496template <const fltSemantics &S>
3497APInt IEEEFloat::convertIEEEFloatToAPInt() const {
3498 assert(semantics == &S);
3499
3500 constexpr int bias = -(S.minExponent - 1);
3501 constexpr unsigned int trailing_significand_bits = S.precision - 1;
3502 constexpr int integer_bit_part = trailing_significand_bits / integerPartWidth;
3503 constexpr integerPart integer_bit =
3504 integerPart{1} << (trailing_significand_bits % integerPartWidth);
3505 constexpr uint64_t significand_mask = integer_bit - 1;
3506 constexpr unsigned int exponent_bits =
3507 S.sizeInBits - 1 - trailing_significand_bits;
3508 static_assert(exponent_bits < 64);
3509 constexpr uint64_t exponent_mask = (uint64_t{1} << exponent_bits) - 1;
3510
3511 uint64_t myexponent;
3512 std::array<integerPart, partCountForBits(trailing_significand_bits)>
3513 mysignificand;
3514
3515 if (isFiniteNonZero()) {
3516 myexponent = exponent + bias;
3517 std::copy_n(significandParts(), mysignificand.size(),
3518 mysignificand.begin());
3519 if (myexponent == 1 &&
3520 !(significandParts()[integer_bit_part] & integer_bit))
3521 myexponent = 0; // denormal
3522 } else if (category == fcZero) {
3523 myexponent = ::exponentZero(S) + bias;
3524 mysignificand.fill(0);
3525 } else if (category == fcInfinity) {
3526 if (S.nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
3527 llvm_unreachable("semantics don't support inf!");
3528 }
3529 myexponent = ::exponentInf(S) + bias;
3530 mysignificand.fill(0);
3531 } else {
3532 assert(category == fcNaN && "Unknown category!");
3533 myexponent = ::exponentNaN(S) + bias;
3534 std::copy_n(significandParts(), mysignificand.size(),
3535 mysignificand.begin());
3536 }
3537 std::array<uint64_t, (S.sizeInBits + 63) / 64> words;
3538 auto words_iter =
3539 std::copy_n(mysignificand.begin(), mysignificand.size(), words.begin());
3540 if constexpr (significand_mask != 0) {
3541 // Clear the integer bit.
3542 words[mysignificand.size() - 1] &= significand_mask;
3543 }
3544 std::fill(words_iter, words.end(), uint64_t{0});
3545 constexpr size_t last_word = words.size() - 1;
3546 uint64_t shifted_sign = static_cast<uint64_t>(sign & 1)
3547 << ((S.sizeInBits - 1) % 64);
3548 words[last_word] |= shifted_sign;
3549 uint64_t shifted_exponent = (myexponent & exponent_mask)
3550 << (trailing_significand_bits % 64);
3551 words[last_word] |= shifted_exponent;
3552 if constexpr (last_word == 0) {
3553 return APInt(S.sizeInBits, words[0]);
3554 }
3555 return APInt(S.sizeInBits, words);
3556}
3557
3558APInt IEEEFloat::convertQuadrupleAPFloatToAPInt() const {
3559 assert(partCount() == 2);
3560 return convertIEEEFloatToAPInt<semIEEEquad>();
3561}
3562
3563APInt IEEEFloat::convertDoubleAPFloatToAPInt() const {
3564 assert(partCount()==1);
3565 return convertIEEEFloatToAPInt<semIEEEdouble>();
3566}
3567
3568APInt IEEEFloat::convertFloatAPFloatToAPInt() const {
3569 assert(partCount()==1);
3570 return convertIEEEFloatToAPInt<semIEEEsingle>();
3571}
3572
3573APInt IEEEFloat::convertBFloatAPFloatToAPInt() const {
3574 assert(partCount() == 1);
3575 return convertIEEEFloatToAPInt<semBFloat>();
3576}
3577
3578APInt IEEEFloat::convertHalfAPFloatToAPInt() const {
3579 assert(partCount()==1);
3580 return convertIEEEFloatToAPInt<semIEEEhalf>();
3581}
3582
3583APInt IEEEFloat::convertFloat8E5M2APFloatToAPInt() const {
3584 assert(partCount() == 1);
3585 return convertIEEEFloatToAPInt<semFloat8E5M2>();
3586}
3587
3588APInt IEEEFloat::convertFloat8E5M2FNUZAPFloatToAPInt() const {
3589 assert(partCount() == 1);
3590 return convertIEEEFloatToAPInt<semFloat8E5M2FNUZ>();
3591}
3592
3593APInt IEEEFloat::convertFloat8E4M3FNAPFloatToAPInt() const {
3594 assert(partCount() == 1);
3595 return convertIEEEFloatToAPInt<semFloat8E4M3FN>();
3596}
3597
3598APInt IEEEFloat::convertFloat8E4M3FNUZAPFloatToAPInt() const {
3599 assert(partCount() == 1);
3600 return convertIEEEFloatToAPInt<semFloat8E4M3FNUZ>();
3601}
3602
3603APInt IEEEFloat::convertFloat8E4M3B11FNUZAPFloatToAPInt() const {
3604 assert(partCount() == 1);
3605 return convertIEEEFloatToAPInt<semFloat8E4M3B11FNUZ>();
3606}
3607
3608APInt IEEEFloat::convertFloatTF32APFloatToAPInt() const {
3609 assert(partCount() == 1);
3610 return convertIEEEFloatToAPInt<semFloatTF32>();
3611}
3612
3613// This function creates an APInt that is just a bit map of the floating
3614// point constant as it would appear in memory. It is not a conversion,
3615// and treating the result as a normal integer is unlikely to be useful.
3616
3618 if (semantics == (const llvm::fltSemantics*)&semIEEEhalf)
3619 return convertHalfAPFloatToAPInt();
3620
3621 if (semantics == (const llvm::fltSemantics *)&semBFloat)
3622 return convertBFloatAPFloatToAPInt();
3623
3624 if (semantics == (const llvm::fltSemantics*)&semIEEEsingle)
3625 return convertFloatAPFloatToAPInt();
3626
3627 if (semantics == (const llvm::fltSemantics*)&semIEEEdouble)
3628 return convertDoubleAPFloatToAPInt();
3629
3630 if (semantics == (const llvm::fltSemantics*)&semIEEEquad)
3631 return convertQuadrupleAPFloatToAPInt();
3632
3633 if (semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy)
3634 return convertPPCDoubleDoubleAPFloatToAPInt();
3635
3636 if (semantics == (const llvm::fltSemantics *)&semFloat8E5M2)
3637 return convertFloat8E5M2APFloatToAPInt();
3638
3639 if (semantics == (const llvm::fltSemantics *)&semFloat8E5M2FNUZ)
3640 return convertFloat8E5M2FNUZAPFloatToAPInt();
3641
3642 if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3FN)
3643 return convertFloat8E4M3FNAPFloatToAPInt();
3644
3645 if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3FNUZ)
3646 return convertFloat8E4M3FNUZAPFloatToAPInt();
3647
3648 if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3B11FNUZ)
3649 return convertFloat8E4M3B11FNUZAPFloatToAPInt();
3650
3651 if (semantics == (const llvm::fltSemantics *)&semFloatTF32)
3652 return convertFloatTF32APFloatToAPInt();
3653
3654 assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended &&
3655 "unknown format!");
3656 return convertF80LongDoubleAPFloatToAPInt();
3657}
3658
3660 assert(semantics == (const llvm::fltSemantics*)&semIEEEsingle &&
3661 "Float semantics are not IEEEsingle");
3662 APInt api = bitcastToAPInt();
3663 return api.bitsToFloat();
3664}
3665
3667 assert(semantics == (const llvm::fltSemantics*)&semIEEEdouble &&
3668 "Float semantics are not IEEEdouble");
3669 APInt api = bitcastToAPInt();
3670 return api.bitsToDouble();
3671}
3672
3673/// Integer bit is explicit in this format. Intel hardware (387 and later)
3674/// does not support these bit patterns:
3675/// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
3676/// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
3677/// exponent!=0 nor all 1's, integer bit 0 ("unnormal")
3678/// exponent = 0, integer bit 1 ("pseudodenormal")
3679/// At the moment, the first three are treated as NaNs, the last one as Normal.
3680void IEEEFloat::initFromF80LongDoubleAPInt(const APInt &api) {
3681 uint64_t i1 = api.getRawData()[0];
3682 uint64_t i2 = api.getRawData()[1];
3683 uint64_t myexponent = (i2 & 0x7fff);
3684 uint64_t mysignificand = i1;
3685 uint8_t myintegerbit = mysignificand >> 63;
3686
3687 initialize(&semX87DoubleExtended);
3688 assert(partCount()==2);
3689
3690 sign = static_cast<unsigned int>(i2>>15);
3691 if (myexponent == 0 && mysignificand == 0) {
3692 makeZero(sign);
3693 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
3694 makeInf(sign);
3695 } else if ((myexponent == 0x7fff && mysignificand != 0x8000000000000000ULL) ||
3696 (myexponent != 0x7fff && myexponent != 0 && myintegerbit == 0)) {
3697 category = fcNaN;
3698 exponent = exponentNaN();
3699 significandParts()[0] = mysignificand;
3700 significandParts()[1] = 0;
3701 } else {
3702 category = fcNormal;
3703 exponent = myexponent - 16383;
3704 significandParts()[0] = mysignificand;
3705 significandParts()[1] = 0;
3706 if (myexponent==0) // denormal
3707 exponent = -16382;
3708 }
3709}
3710
3711void IEEEFloat::initFromPPCDoubleDoubleAPInt(const APInt &api) {
3712 uint64_t i1 = api.getRawData()[0];
3713 uint64_t i2 = api.getRawData()[1];
3714 opStatus fs;
3715 bool losesInfo;
3716
3717 // Get the first double and convert to our format.
3718 initFromDoubleAPInt(APInt(64, i1));
3720 assert(fs == opOK && !losesInfo);
3721 (void)fs;
3722
3723 // Unless we have a special case, add in second double.
3724 if (isFiniteNonZero()) {
3725 IEEEFloat v(semIEEEdouble, APInt(64, i2));
3726 fs = v.convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo);
3727 assert(fs == opOK && !losesInfo);
3728 (void)fs;
3729
3731 }
3732}
3733
3734template <const fltSemantics &S>
3735void IEEEFloat::initFromIEEEAPInt(const APInt &api) {
3736 assert(api.getBitWidth() == S.sizeInBits);
3737 constexpr integerPart integer_bit = integerPart{1}
3738 << ((S.precision - 1) % integerPartWidth);
3739 constexpr uint64_t significand_mask = integer_bit - 1;
3740 constexpr unsigned int trailing_significand_bits = S.precision - 1;
3741 constexpr unsigned int stored_significand_parts =
3742 partCountForBits(trailing_significand_bits);
3743 constexpr unsigned int exponent_bits =
3744 S.sizeInBits - 1 - trailing_significand_bits;
3745 static_assert(exponent_bits < 64);
3746 constexpr uint64_t exponent_mask = (uint64_t{1} << exponent_bits) - 1;
3747 constexpr int bias = -(S.minExponent - 1);
3748
3749 // Copy the bits of the significand. We need to clear out the exponent and
3750 // sign bit in the last word.
3751 std::array<integerPart, stored_significand_parts> mysignificand;
3752 std::copy_n(api.getRawData(), mysignificand.size(), mysignificand.begin());
3753 if constexpr (significand_mask != 0) {
3754 mysignificand[mysignificand.size() - 1] &= significand_mask;
3755 }
3756
3757 // We assume the last word holds the sign bit, the exponent, and potentially
3758 // some of the trailing significand field.
3759 uint64_t last_word = api.getRawData()[api.getNumWords() - 1];
3760 uint64_t myexponent =
3761 (last_word >> (trailing_significand_bits % 64)) & exponent_mask;
3762
3763 initialize(&S);
3764 assert(partCount() == mysignificand.size());
3765
3766 sign = static_cast<unsigned int>(last_word >> ((S.sizeInBits - 1) % 64));
3767
3768 bool all_zero_significand =
3769 llvm::all_of(mysignificand, [](integerPart bits) { return bits == 0; });
3770
3771 bool is_zero = myexponent == 0 && all_zero_significand;
3772
3773 if constexpr (S.nonFiniteBehavior == fltNonfiniteBehavior::IEEE754) {
3774 if (myexponent - bias == ::exponentInf(S) && all_zero_significand) {
3775 makeInf(sign);
3776 return;
3777 }
3778 }
3779
3780 bool is_nan = false;
3781
3782 if constexpr (S.nanEncoding == fltNanEncoding::IEEE) {
3783 is_nan = myexponent - bias == ::exponentNaN(S) && !all_zero_significand;
3784 } else if constexpr (S.nanEncoding == fltNanEncoding::AllOnes) {
3785 bool all_ones_significand =
3786 std::all_of(mysignificand.begin(), mysignificand.end() - 1,
3787 [](integerPart bits) { return bits == ~integerPart{0}; }) &&
3788 (!significand_mask ||
3789 mysignificand[mysignificand.size() - 1] == significand_mask);
3790 is_nan = myexponent - bias == ::exponentNaN(S) && all_ones_significand;
3791 } else if constexpr (S.nanEncoding == fltNanEncoding::NegativeZero) {
3792 is_nan = is_zero && sign;
3793 }
3794
3795 if (is_nan) {
3796 category = fcNaN;
3797 exponent = ::exponentNaN(S);
3798 std::copy_n(mysignificand.begin(), mysignificand.size(),
3799 significandParts());
3800 return;
3801 }
3802
3803 if (is_zero) {
3804 makeZero(sign);
3805 return;
3806 }
3807
3808 category = fcNormal;
3809 exponent = myexponent - bias;
3810 std::copy_n(mysignificand.begin(), mysignificand.size(), significandParts());
3811 if (myexponent == 0) // denormal
3812 exponent = S.minExponent;
3813 else
3814 significandParts()[mysignificand.size()-1] |= integer_bit; // integer bit
3815}
3816
3817void IEEEFloat::initFromQuadrupleAPInt(const APInt &api) {
3818 initFromIEEEAPInt<semIEEEquad>(api);
3819}
3820
3821void IEEEFloat::initFromDoubleAPInt(const APInt &api) {
3822 initFromIEEEAPInt<semIEEEdouble>(api);
3823}
3824
3825void IEEEFloat::initFromFloatAPInt(const APInt &api) {
3826 initFromIEEEAPInt<semIEEEsingle>(api);
3827}
3828
3829void IEEEFloat::initFromBFloatAPInt(const APInt &api) {
3830 initFromIEEEAPInt<semBFloat>(api);
3831}
3832
3833void IEEEFloat::initFromHalfAPInt(const APInt &api) {
3834 initFromIEEEAPInt<semIEEEhalf>(api);
3835}
3836
3837void IEEEFloat::initFromFloat8E5M2APInt(const APInt &api) {
3838 initFromIEEEAPInt<semFloat8E5M2>(api);
3839}
3840
3841void IEEEFloat::initFromFloat8E5M2FNUZAPInt(const APInt &api) {
3842 initFromIEEEAPInt<semFloat8E5M2FNUZ>(api);
3843}
3844
3845void IEEEFloat::initFromFloat8E4M3FNAPInt(const APInt &api) {
3846 initFromIEEEAPInt<semFloat8E4M3FN>(api);
3847}
3848
3849void IEEEFloat::initFromFloat8E4M3FNUZAPInt(const APInt &api) {
3850 initFromIEEEAPInt<semFloat8E4M3FNUZ>(api);
3851}
3852
3853void IEEEFloat::initFromFloat8E4M3B11FNUZAPInt(const APInt &api) {
3854 initFromIEEEAPInt<semFloat8E4M3B11FNUZ>(api);
3855}
3856
3857void IEEEFloat::initFromFloatTF32APInt(const APInt &api) {
3858 initFromIEEEAPInt<semFloatTF32>(api);
3859}
3860
3861/// Treat api as containing the bits of a floating point number.
3862void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) {
3863 assert(api.getBitWidth() == Sem->sizeInBits);
3864 if (Sem == &semIEEEhalf)
3865 return initFromHalfAPInt(api);
3866 if (Sem == &semBFloat)
3867 return initFromBFloatAPInt(api);
3868 if (Sem == &semIEEEsingle)
3869 return initFromFloatAPInt(api);
3870 if (Sem == &semIEEEdouble)
3871 return initFromDoubleAPInt(api);
3872 if (Sem == &semX87DoubleExtended)
3873 return initFromF80LongDoubleAPInt(api);
3874 if (Sem == &semIEEEquad)
3875 return initFromQuadrupleAPInt(api);
3876 if (Sem == &semPPCDoubleDoubleLegacy)
3877 return initFromPPCDoubleDoubleAPInt(api);
3878 if (Sem == &semFloat8E5M2)
3879 return initFromFloat8E5M2APInt(api);
3880 if (Sem == &semFloat8E5M2FNUZ)
3881 return initFromFloat8E5M2FNUZAPInt(api);
3882 if (Sem == &semFloat8E4M3FN)
3883 return initFromFloat8E4M3FNAPInt(api);
3884 if (Sem == &semFloat8E4M3FNUZ)
3885 return initFromFloat8E4M3FNUZAPInt(api);
3886 if (Sem == &semFloat8E4M3B11FNUZ)
3887 return initFromFloat8E4M3B11FNUZAPInt(api);
3888 if (Sem == &semFloatTF32)
3889 return initFromFloatTF32APInt(api);
3890
3891 llvm_unreachable(nullptr);
3892}
3893
3894/// Make this number the largest magnitude normal number in the given
3895/// semantics.
3896void IEEEFloat::makeLargest(bool Negative) {
3897 // We want (in interchange format):
3898 // sign = {Negative}
3899 // exponent = 1..10
3900 // significand = 1..1
3901 category = fcNormal;
3902 sign = Negative;
3903 exponent = semantics->maxExponent;
3904
3905 // Use memset to set all but the highest integerPart to all ones.
3906 integerPart *significand = significandParts();
3907 unsigned PartCount = partCount();
3908 memset(significand, 0xFF, sizeof(integerPart)*(PartCount - 1));
3909
3910 // Set the high integerPart especially setting all unused top bits for
3911 // internal consistency.
3912 const unsigned NumUnusedHighBits =
3913 PartCount*integerPartWidth - semantics->precision;
3914 significand[PartCount - 1] = (NumUnusedHighBits < integerPartWidth)
3915 ? (~integerPart(0) >> NumUnusedHighBits)
3916 : 0;
3917
3918 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
3919 semantics->nanEncoding == fltNanEncoding::AllOnes)
3920 significand[0] &= ~integerPart(1);
3921}
3922
3923/// Make this number the smallest magnitude denormal number in the given
3924/// semantics.
3925void IEEEFloat::makeSmallest(bool Negative) {
3926 // We want (in interchange format):
3927 // sign = {Negative}
3928 // exponent = 0..0
3929 // significand = 0..01
3930 category = fcNormal;
3931 sign = Negative;
3932 exponent = semantics->minExponent;
3933 APInt::tcSet(significandParts(), 1, partCount());
3934}
3935
3936void IEEEFloat::makeSmallestNormalized(bool Negative) {
3937 // We want (in interchange format):
3938 // sign = {Negative}
3939 // exponent = 0..0
3940 // significand = 10..0
3941
3942 category = fcNormal;
3943 zeroSignificand();
3944 sign = Negative;
3945 exponent = semantics->minExponent;
3946 APInt::tcSetBit(significandParts(), semantics->precision - 1);
3947}
3948
3949IEEEFloat::IEEEFloat(const fltSemantics &Sem, const APInt &API) {
3950 initFromAPInt(&Sem, API);
3951}
3952
3953IEEEFloat::IEEEFloat(float f) {
3954 initFromAPInt(&semIEEEsingle, APInt::floatToBits(f));
3955}
3956
3957IEEEFloat::IEEEFloat(double d) {
3958 initFromAPInt(&semIEEEdouble, APInt::doubleToBits(d));
3959}
3960
3961namespace {
3962 void append(SmallVectorImpl<char> &Buffer, StringRef Str) {
3963 Buffer.append(Str.begin(), Str.end());
3964 }
3965
3966 /// Removes data from the given significand until it is no more
3967 /// precise than is required for the desired precision.
3968 void AdjustToPrecision(APInt &significand,
3969 int &exp, unsigned FormatPrecision) {
3970 unsigned bits = significand.getActiveBits();
3971
3972 // 196/59 is a very slight overestimate of lg_2(10).
3973 unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59;
3974
3975 if (bits <= bitsRequired) return;
3976
3977 unsigned tensRemovable = (bits - bitsRequired) * 59 / 196;
3978 if (!tensRemovable) return;
3979
3980 exp += tensRemovable;
3981
3982 APInt divisor(significand.getBitWidth(), 1);
3983 APInt powten(significand.getBitWidth(), 10);
3984 while (true) {
3985 if (tensRemovable & 1)
3986 divisor *= powten;
3987 tensRemovable >>= 1;
3988 if (!tensRemovable) break;
3989 powten *= powten;
3990 }
3991
3992 significand = significand.udiv(divisor);
3993
3994 // Truncate the significand down to its active bit count.
3995 significand = significand.trunc(significand.getActiveBits());
3996 }
3997
3998
3999 void AdjustToPrecision(SmallVectorImpl<char> &buffer,
4000 int &exp, unsigned FormatPrecision) {
4001 unsigned N = buffer.size();
4002 if (N <= FormatPrecision) return;
4003
4004 // The most significant figures are the last ones in the buffer.
4005 unsigned FirstSignificant = N - FormatPrecision;
4006
4007 // Round.
4008 // FIXME: this probably shouldn't use 'round half up'.
4009
4010 // Rounding down is just a truncation, except we also want to drop
4011 // trailing zeros from the new result.
4012 if (buffer[FirstSignificant - 1] < '5') {
4013 while (FirstSignificant < N && buffer[FirstSignificant] == '0')
4014 FirstSignificant++;
4015
4016 exp += FirstSignificant;
4017 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
4018 return;
4019 }
4020
4021 // Rounding up requires a decimal add-with-carry. If we continue
4022 // the carry, the newly-introduced zeros will just be truncated.
4023 for (unsigned I = FirstSignificant; I != N; ++I) {
4024 if (buffer[I] == '9') {
4025 FirstSignificant++;
4026 } else {
4027 buffer[I]++;
4028 break;
4029 }
4030 }
4031
4032 // If we carried through, we have exactly one digit of precision.
4033 if (FirstSignificant == N) {
4034 exp += FirstSignificant;
4035 buffer.clear();
4036 buffer.push_back('1');
4037 return;
4038 }
4039
4040 exp += FirstSignificant;
4041 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
4042 }
4043} // namespace
4044
4045void IEEEFloat::toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision,
4046 unsigned FormatMaxPadding, bool TruncateZero) const {
4047 switch (category) {
4048 case fcInfinity:
4049 if (isNegative())
4050 return append(Str, "-Inf");
4051 else
4052 return append(Str, "+Inf");
4053
4054 case fcNaN: return append(Str, "NaN");
4055
4056 case fcZero:
4057 if (isNegative())
4058 Str.push_back('-');
4059
4060 if (!FormatMaxPadding) {
4061 if (TruncateZero)
4062 append(Str, "0.0E+0");
4063 else {
4064 append(Str, "0.0");
4065 if (FormatPrecision > 1)
4066 Str.append(FormatPrecision - 1, '0');
4067 append(Str, "e+00");
4068 }
4069 } else
4070 Str.push_back('0');
4071 return;
4072
4073 case fcNormal:
4074 break;
4075 }
4076
4077 if (isNegative())
4078 Str.push_back('-');
4079
4080 // Decompose the number into an APInt and an exponent.
4081 int exp = exponent - ((int) semantics->precision - 1);
4082 APInt significand(
4083 semantics->precision,
4084 ArrayRef(significandParts(), partCountForBits(semantics->precision)));
4085
4086 // Set FormatPrecision if zero. We want to do this before we
4087 // truncate trailing zeros, as those are part of the precision.
4088 if (!FormatPrecision) {
4089 // We use enough digits so the number can be round-tripped back to an
4090 // APFloat. The formula comes from "How to Print Floating-Point Numbers
4091 // Accurately" by Steele and White.
4092 // FIXME: Using a formula based purely on the precision is conservative;
4093 // we can print fewer digits depending on the actual value being printed.
4094
4095 // FormatPrecision = 2 + floor(significandBits / lg_2(10))
4096 FormatPrecision = 2 + semantics->precision * 59 / 196;
4097 }
4098
4099 // Ignore trailing binary zeros.
4100 int trailingZeros = significand.countr_zero();
4101 exp += trailingZeros;
4102 significand.lshrInPlace(trailingZeros);
4103
4104 // Change the exponent from 2^e to 10^e.
4105 if (exp == 0) {
4106 // Nothing to do.
4107 } else if (exp > 0) {
4108 // Just shift left.
4109 significand = significand.zext(semantics->precision + exp);
4110 significand <<= exp;
4111 exp = 0;
4112 } else { /* exp < 0 */
4113 int texp = -exp;
4114
4115 // We transform this using the identity:
4116 // (N)(2^-e) == (N)(5^e)(10^-e)
4117 // This means we have to multiply N (the significand) by 5^e.
4118 // To avoid overflow, we have to operate on numbers large
4119 // enough to store N * 5^e:
4120 // log2(N * 5^e) == log2(N) + e * log2(5)
4121 // <= semantics->precision + e * 137 / 59
4122 // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59)
4123
4124 unsigned precision = semantics->precision + (137 * texp + 136) / 59;
4125
4126 // Multiply significand by 5^e.
4127 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
4128 significand = significand.zext(precision);
4129 APInt five_to_the_i(precision, 5);
4130 while (true) {
4131 if (texp & 1) significand *= five_to_the_i;
4132
4133 texp >>= 1;
4134 if (!texp) break;
4135 five_to_the_i *= five_to_the_i;
4136 }
4137 }
4138
4139 AdjustToPrecision(significand, exp, FormatPrecision);
4140
4142
4143 // Fill the buffer.
4144 unsigned precision = significand.getBitWidth();
4145 if (precision < 4) {
4146 // We need enough precision to store the value 10.
4147 precision = 4;
4148 significand = significand.zext(precision);
4149 }
4150 APInt ten(precision, 10);
4151 APInt digit(precision, 0);
4152
4153 bool inTrail = true;
4154 while (significand != 0) {
4155 // digit <- significand % 10
4156 // significand <- significand / 10
4157 APInt::udivrem(significand, ten, significand, digit);
4158
4159 unsigned d = digit.getZExtValue();
4160
4161 // Drop trailing zeros.
4162 if (inTrail && !d) exp++;
4163 else {
4164 buffer.push_back((char) ('0' + d));
4165 inTrail = false;
4166 }
4167 }
4168
4169 assert(!buffer.empty() && "no characters in buffer!");
4170
4171 // Drop down to FormatPrecision.
4172 // TODO: don't do more precise calculations above than are required.
4173 AdjustToPrecision(buffer, exp, FormatPrecision);
4174
4175 unsigned NDigits = buffer.size();
4176
4177 // Check whether we should use scientific notation.
4178 bool FormatScientific;
4179 if (!FormatMaxPadding)
4180 FormatScientific = true;
4181 else {
4182 if (exp >= 0) {
4183 // 765e3 --> 765000
4184 // ^^^
4185 // But we shouldn't make the number look more precise than it is.
4186 FormatScientific = ((unsigned) exp > FormatMaxPadding ||
4187 NDigits + (unsigned) exp > FormatPrecision);
4188 } else {
4189 // Power of the most significant digit.
4190 int MSD = exp + (int) (NDigits - 1);
4191 if (MSD >= 0) {
4192 // 765e-2 == 7.65
4193 FormatScientific = false;
4194 } else {
4195 // 765e-5 == 0.00765
4196 // ^ ^^
4197 FormatScientific = ((unsigned) -MSD) > FormatMaxPadding;
4198 }
4199 }
4200 }
4201
4202 // Scientific formatting is pretty straightforward.
4203 if (FormatScientific) {
4204 exp += (NDigits - 1);
4205
4206 Str.push_back(buffer[NDigits-1]);
4207 Str.push_back('.');
4208 if (NDigits == 1 && TruncateZero)
4209 Str.push_back('0');
4210 else
4211 for (unsigned I = 1; I != NDigits; ++I)
4212 Str.push_back(buffer[NDigits-1-I]);
4213 // Fill with zeros up to FormatPrecision.
4214 if (!TruncateZero && FormatPrecision > NDigits - 1)
4215 Str.append(FormatPrecision - NDigits + 1, '0');
4216 // For !TruncateZero we use lower 'e'.
4217 Str.push_back(TruncateZero ? 'E' : 'e');
4218
4219 Str.push_back(exp >= 0 ? '+' : '-');
4220 if (exp < 0) exp = -exp;
4221 SmallVector<char, 6> expbuf;
4222 do {
4223 expbuf.push_back((char) ('0' + (exp % 10)));
4224 exp /= 10;
4225 } while (exp);
4226 // Exponent always at least two digits if we do not truncate zeros.
4227 if (!TruncateZero && expbuf.size() < 2)
4228 expbuf.push_back('0');
4229 for (unsigned I = 0, E = expbuf.size(); I != E; ++I)
4230 Str.push_back(expbuf[E-1-I]);
4231 return;
4232 }
4233
4234 // Non-scientific, positive exponents.
4235 if (exp >= 0) {
4236 for (unsigned I = 0; I != NDigits; ++I)
4237 Str.push_back(buffer[NDigits-1-I]);
4238 for (unsigned I = 0; I != (unsigned) exp; ++I)
4239 Str.push_back('0');
4240 return;
4241 }
4242
4243 // Non-scientific, negative exponents.
4244
4245 // The number of digits to the left of the decimal point.
4246 int NWholeDigits = exp + (int) NDigits;
4247
4248 unsigned I = 0;
4249 if (NWholeDigits > 0) {
4250 for (; I != (unsigned) NWholeDigits; ++I)
4251 Str.push_back(buffer[NDigits-I-1]);
4252 Str.push_back('.');
4253 } else {
4254 unsigned NZeros = 1 + (unsigned) -NWholeDigits;
4255
4256 Str.push_back('0');
4257 Str.push_back('.');
4258 for (unsigned Z = 1; Z != NZeros; ++Z)
4259 Str.push_back('0');
4260 }
4261
4262 for (; I != NDigits; ++I)
4263 Str.push_back(buffer[NDigits-I-1]);
4264}
4265
4266bool IEEEFloat::getExactInverse(APFloat *inv) const {
4267 // Special floats and denormals have no exact inverse.
4268 if (!isFiniteNonZero())
4269 return false;
4270
4271 // Check that the number is a power of two by making sure that only the
4272 // integer bit is set in the significand.
4273 if (significandLSB() != semantics->precision - 1)
4274 return false;
4275
4276 // Get the inverse.
4277 IEEEFloat reciprocal(*semantics, 1ULL);
4278 if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK)
4279 return false;
4280
4281 // Avoid multiplication with a denormal, it is not safe on all platforms and
4282 // may be slower than a normal division.
4283 if (reciprocal.isDenormal())
4284 return false;
4285
4286 assert(reciprocal.isFiniteNonZero() &&
4287 reciprocal.significandLSB() == reciprocal.semantics->precision - 1);
4288
4289 if (inv)
4290 *inv = APFloat(reciprocal, *semantics);
4291
4292 return true;
4293}
4294
4295int IEEEFloat::getExactLog2Abs() const {
4296 if (!isFinite() || isZero())
4297 return INT_MIN;
4298
4299 const integerPart *Parts = significandParts();
4300 const int PartCount = partCountForBits(semantics->precision);
4301
4302 int PopCount = 0;
4303 for (int i = 0; i < PartCount; ++i) {
4304 PopCount += llvm::popcount(Parts[i]);
4305 if (PopCount > 1)
4306 return INT_MIN;
4307 }
4308
4309 if (exponent != semantics->minExponent)
4310 return exponent;
4311
4312 int CountrParts = 0;
4313 for (int i = 0; i < PartCount;
4314 ++i, CountrParts += APInt::APINT_BITS_PER_WORD) {
4315 if (Parts[i] != 0) {
4316 return exponent - semantics->precision + CountrParts +
4317 llvm::countr_zero(Parts[i]) + 1;
4318 }
4319 }
4320
4321 llvm_unreachable("didn't find the set bit");
4322}
4323
4324bool IEEEFloat::isSignaling() const {
4325 if (!isNaN())
4326 return false;
4327 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly)
4328 return false;
4329
4330 // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the
4331 // first bit of the trailing significand being 0.
4332 return !APInt::tcExtractBit(significandParts(), semantics->precision - 2);
4333}
4334
4335/// IEEE-754R 2008 5.3.1: nextUp/nextDown.
4336///
4337/// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with
4338/// appropriate sign switching before/after the computation.
4339IEEEFloat::opStatus IEEEFloat::next(bool nextDown) {
4340 // If we are performing nextDown, swap sign so we have -x.
4341 if (nextDown)
4342 changeSign();
4343
4344 // Compute nextUp(x)
4345 opStatus result = opOK;
4346
4347 // Handle each float category separately.
4348 switch (category) {
4349 case fcInfinity:
4350 // nextUp(+inf) = +inf
4351 if (!isNegative())
4352 break;
4353 // nextUp(-inf) = -getLargest()
4354 makeLargest(true);
4355 break;
4356 case fcNaN:
4357 // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag.
4358 // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not
4359 // change the payload.
4360 if (isSignaling()) {
4361 result = opInvalidOp;
4362 // For consistency, propagate the sign of the sNaN to the qNaN.
4363 makeNaN(false, isNegative(), nullptr);
4364 }
4365 break;
4366 case fcZero:
4367 // nextUp(pm 0) = +getSmallest()
4368 makeSmallest(false);
4369 break;
4370 case fcNormal:
4371 // nextUp(-getSmallest()) = -0
4372 if (isSmallest() && isNegative()) {
4373 APInt::tcSet(significandParts(), 0, partCount());
4374 category = fcZero;
4375 exponent = 0;
4376 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
4377 sign = false;
4378 break;
4379 }
4380
4381 if (isLargest() && !isNegative()) {
4382 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
4383 // nextUp(getLargest()) == NAN
4384 makeNaN();
4385 break;
4386 } else {
4387 // nextUp(getLargest()) == INFINITY
4388 APInt::tcSet(significandParts(), 0, partCount());
4389 category = fcInfinity;
4390 exponent = semantics->maxExponent + 1;
4391 break;
4392 }
4393 }
4394
4395 // nextUp(normal) == normal + inc.
4396 if (isNegative()) {
4397 // If we are negative, we need to decrement the significand.
4398
4399 // We only cross a binade boundary that requires adjusting the exponent
4400 // if:
4401 // 1. exponent != semantics->minExponent. This implies we are not in the
4402 // smallest binade or are dealing with denormals.
4403 // 2. Our significand excluding the integral bit is all zeros.
4404 bool WillCrossBinadeBoundary =
4405 exponent != semantics->minExponent && isSignificandAllZeros();
4406
4407 // Decrement the significand.
4408 //
4409 // We always do this since:
4410 // 1. If we are dealing with a non-binade decrement, by definition we
4411 // just decrement the significand.
4412 // 2. If we are dealing with a normal -> normal binade decrement, since
4413 // we have an explicit integral bit the fact that all bits but the
4414 // integral bit are zero implies that subtracting one will yield a
4415 // significand with 0 integral bit and 1 in all other spots. Thus we
4416 // must just adjust the exponent and set the integral bit to 1.
4417 // 3. If we are dealing with a normal -> denormal binade decrement,
4418 // since we set the integral bit to 0 when we represent denormals, we
4419 // just decrement the significand.
4420 integerPart *Parts = significandParts();
4421 APInt::tcDecrement(Parts, partCount());
4422
4423 if (WillCrossBinadeBoundary) {
4424 // Our result is a normal number. Do the following:
4425 // 1. Set the integral bit to 1.
4426 // 2. Decrement the exponent.
4427 APInt::tcSetBit(Parts, semantics->precision - 1);
4428 exponent--;
4429 }
4430 } else {
4431 // If we are positive, we need to increment the significand.
4432
4433 // We only cross a binade boundary that requires adjusting the exponent if
4434 // the input is not a denormal and all of said input's significand bits
4435 // are set. If all of said conditions are true: clear the significand, set
4436 // the integral bit to 1, and increment the exponent. If we have a
4437 // denormal always increment since moving denormals and the numbers in the
4438 // smallest normal binade have the same exponent in our representation.
4439 bool WillCrossBinadeBoundary = !isDenormal() && isSignificandAllOnes();
4440
4441 if (WillCrossBinadeBoundary) {
4442 integerPart *Parts = significandParts();
4443 APInt::tcSet(Parts, 0, partCount());
4444 APInt::tcSetBit(Parts, semantics->precision - 1);
4445 assert(exponent != semantics->maxExponent &&
4446 "We can not increment an exponent beyond the maxExponent allowed"
4447 " by the given floating point semantics.");
4448 exponent++;
4449 } else {
4450 incrementSignificand();
4451 }
4452 }
4453 break;
4454 }
4455
4456 // If we are performing nextDown, swap sign so we have -nextUp(-x)
4457 if (nextDown)
4458 changeSign();
4459
4460 return result;
4461}
4462
4463APFloatBase::ExponentType IEEEFloat::exponentNaN() const {
4464 return ::exponentNaN(*semantics);
4465}
4466
4467APFloatBase::ExponentType IEEEFloat::exponentInf() const {
4468 return ::exponentInf(*semantics);
4469}
4470
4471APFloatBase::ExponentType IEEEFloat::exponentZero() const {
4472 return ::exponentZero(*semantics);
4473}
4474
4475void IEEEFloat::makeInf(bool Negative) {
4476 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
4477 // There is no Inf, so make NaN instead.
4478 makeNaN(false, Negative);
4479 return;
4480 }
4481 category = fcInfinity;
4482 sign = Negative;
4483 exponent = exponentInf();
4484 APInt::tcSet(significandParts(), 0, partCount());
4485}
4486
4487void IEEEFloat::makeZero(bool Negative) {
4488 category = fcZero;
4489 sign = Negative;
4490 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
4491 // Merge negative zero to positive because 0b10000...000 is used for NaN
4492 sign = false;
4493 }
4494 exponent = exponentZero();
4495 APInt::tcSet(significandParts(), 0, partCount());
4496}
4497
4498void IEEEFloat::makeQuiet() {
4499 assert(isNaN());
4500 if (semantics->nonFiniteBehavior != fltNonfiniteBehavior::NanOnly)
4501 APInt::tcSetBit(significandParts(), semantics->precision - 2);
4502}
4503
4504int ilogb(const IEEEFloat &Arg) {
4505 if (Arg.isNaN())
4506 return IEEEFloat::IEK_NaN;
4507 if (Arg.isZero())
4508 return IEEEFloat::IEK_Zero;
4509 if (Arg.isInfinity())
4510 return IEEEFloat::IEK_Inf;
4511 if (!Arg.isDenormal())
4512 return Arg.exponent;
4513
4514 IEEEFloat Normalized(Arg);
4515 int SignificandBits = Arg.getSemantics().precision - 1;
4516
4517 Normalized.exponent += SignificandBits;
4518 Normalized.normalize(IEEEFloat::rmNearestTiesToEven, lfExactlyZero);
4519 return Normalized.exponent - SignificandBits;
4520}
4521
4523 auto MaxExp = X.getSemantics().maxExponent;
4524 auto MinExp = X.getSemantics().minExponent;
4525
4526 // If Exp is wildly out-of-scale, simply adding it to X.exponent will
4527 // overflow; clamp it to a safe range before adding, but ensure that the range
4528 // is large enough that the clamp does not change the result. The range we
4529 // need to support is the difference between the largest possible exponent and
4530 // the normalized exponent of half the smallest denormal.
4531
4532 int SignificandBits = X.getSemantics().precision - 1;
4533 int MaxIncrement = MaxExp - (MinExp - SignificandBits) + 1;
4534
4535 // Clamp to one past the range ends to let normalize handle overlflow.
4536 X.exponent += std::clamp(Exp, -MaxIncrement - 1, MaxIncrement);
4537 X.normalize(RoundingMode, lfExactlyZero);
4538 if (X.isNaN())
4539 X.makeQuiet();
4540 return X;
4541}
4542
4544 Exp = ilogb(Val);
4545
4546 // Quiet signalling nans.
4547 if (Exp == IEEEFloat::IEK_NaN) {
4548 IEEEFloat Quiet(Val);
4549 Quiet.makeQuiet();
4550 return Quiet;
4551 }
4552
4553 if (Exp == IEEEFloat::IEK_Inf)
4554 return Val;
4555
4556 // 1 is added because frexp is defined to return a normalized fraction in
4557 // +/-[0.5, 1.0), rather than the usual +/-[1.0, 2.0).
4558 Exp = Exp == IEEEFloat::IEK_Zero ? 0 : Exp + 1;
4559 return scalbn(Val, -Exp, RM);
4560}
4561
4562DoubleAPFloat::DoubleAPFloat(const fltSemantics &S)
4563 : Semantics(&S),
4565 assert(Semantics == &semPPCDoubleDouble);
4566}
4567
4569 : Semantics(&S),
4572 assert(Semantics == &semPPCDoubleDouble);
4573}
4574
4576 : Semantics(&S), Floats(new APFloat[2]{APFloat(semIEEEdouble, I),
4578 assert(Semantics == &semPPCDoubleDouble);
4579}
4580
4582 : Semantics(&S),
4583 Floats(new APFloat[2]{
4584 APFloat(semIEEEdouble, APInt(64, I.getRawData()[0])),
4585 APFloat(semIEEEdouble, APInt(64, I.getRawData()[1]))}) {
4586 assert(Semantics == &semPPCDoubleDouble);
4587}
4588
4590 APFloat &&Second)
4591 : Semantics(&S),
4592 Floats(new APFloat[2]{std::move(First), std::move(Second)}) {
4593 assert(Semantics == &semPPCDoubleDouble);
4594 assert(&Floats[0].getSemantics() == &semIEEEdouble);
4595 assert(&Floats[1].getSemantics() == &semIEEEdouble);
4596}
4597
4600 Floats(RHS.Floats ? new APFloat[2]{APFloat(RHS.Floats[0]),
4601 APFloat(RHS.Floats[1])}
4602 : nullptr) {
4603 assert(Semantics == &semPPCDoubleDouble);
4604}
4605
4607 : Semantics(RHS.Semantics), Floats(std::move(RHS.Floats)) {
4608 RHS.Semantics = &semBogus;
4610}
4611
4613 if (Semantics == RHS.Semantics && RHS.Floats) {
4614 Floats[0] = RHS.Floats[0];
4615 Floats[1] = RHS.Floats[1];
4616 } else if (this != &RHS) {
4617 this->~DoubleAPFloat();
4618 new (this) DoubleAPFloat(RHS);
4619 }
4620 return *this;
4621}
4622
4623// Implement addition, subtraction, multiplication and division based on:
4624// "Software for Doubled-Precision Floating-Point Computations",
4625// by Seppo Linnainmaa, ACM TOMS vol 7 no 3, September 1981, pages 272-283.
4626APFloat::opStatus DoubleAPFloat::addImpl(const APFloat &a, const APFloat &aa,
4627 const APFloat &c, const APFloat &cc,
4628 roundingMode RM) {
4629 int Status = opOK;
4630 APFloat z = a;
4631 Status |= z.add(c, RM);
4632 if (!z.isFinite()) {
4633 if (!z.isInfinity()) {
4634 Floats[0] = std::move(z);
4635 Floats[1].makeZero(/* Neg = */ false);
4636 return (opStatus)Status;
4637 }
4638 Status = opOK;
4639 auto AComparedToC = a.compareAbsoluteValue(c);
4640 z = cc;
4641 Status |= z.add(aa, RM);
4642 if (AComparedToC == APFloat::cmpGreaterThan) {
4643 // z = cc + aa + c + a;
4644 Status |= z.add(c, RM);
4645 Status |= z.add(a, RM);
4646 } else {
4647 // z = cc + aa + a + c;
4648 Status |= z.add(a, RM);
4649 Status |= z.add(c, RM);
4650 }
4651 if (!z.isFinite()) {
4652 Floats[0] = std::move(z);
4653 Floats[1].makeZero(/* Neg = */ false);
4654 return (opStatus)Status;
4655 }
4656 Floats[0] = z;
4657 APFloat zz = aa;
4658 Status |= zz.add(cc, RM);
4659 if (AComparedToC == APFloat::cmpGreaterThan) {
4660 // Floats[1] = a - z + c + zz;
4661 Floats[1] = a;
4662 Status |= Floats[1].subtract(z, RM);
4663 Status |= Floats[1].add(c, RM);
4664 Status |= Floats[1].add(zz, RM);
4665 } else {
4666 // Floats[1] = c - z + a + zz;
4667 Floats[1] = c;
4668 Status |= Floats[1].subtract(z, RM);
4669 Status |= Floats[1].add(a, RM);
4670 Status |= Floats[1].add(zz, RM);
4671 }
4672 } else {
4673 // q = a - z;
4674 APFloat q = a;
4675 Status |= q.subtract(z, RM);
4676
4677 // zz = q + c + (a - (q + z)) + aa + cc;
4678 // Compute a - (q + z) as -((q + z) - a) to avoid temporary copies.
4679 auto zz = q;
4680 Status |= zz.add(c, RM);
4681 Status |= q.add(z, RM);
4682 Status |= q.subtract(a, RM);
4683 q.changeSign();
4684 Status |= zz.add(q, RM);
4685 Status |= zz.add(aa, RM);
4686 Status |= zz.add(cc, RM);
4687 if (zz.isZero() && !zz.isNegative()) {
4688 Floats[0] = std::move(z);
4689 Floats[1].makeZero(/* Neg = */ false);
4690 return opOK;
4691 }
4692 Floats[0] = z;
4693 Status |= Floats[0].add(zz, RM);
4694 if (!Floats[0].isFinite()) {
4695 Floats[1].makeZero(/* Neg = */ false);
4696 return (opStatus)Status;
4697 }
4698 Floats[1] = std::move(z);
4699 Status |= Floats[1].subtract(Floats[0], RM);
4700 Status |= Floats[1].add(zz, RM);
4701 }
4702 return (opStatus)Status;
4703}
4704
4705APFloat::opStatus DoubleAPFloat::addWithSpecial(const DoubleAPFloat &LHS,
4706 const DoubleAPFloat &RHS,
4707 DoubleAPFloat &Out,
4708 roundingMode RM) {
4709 if (LHS.getCategory() == fcNaN) {
4710 Out = LHS;
4711 return opOK;
4712 }
4713 if (RHS.getCategory() == fcNaN) {
4714 Out = RHS;
4715 return opOK;
4716 }
4717 if (LHS.getCategory() == fcZero) {
4718 Out = RHS;
4719 return opOK;
4720 }
4721 if (RHS.getCategory() == fcZero) {
4722 Out = LHS;
4723 return opOK;
4724 }
4725 if (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcInfinity &&
4726 LHS.isNegative() != RHS.isNegative()) {
4727 Out.makeNaN(false, Out.isNegative(), nullptr);
4728 return opInvalidOp;
4729 }
4730 if (LHS.getCategory() == fcInfinity) {
4731 Out = LHS;
4732 return opOK;
4733 }
4734 if (RHS.getCategory() == fcInfinity) {
4735 Out = RHS;
4736 return opOK;
4737 }
4738 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal);
4739
4740 APFloat A(LHS.Floats[0]), AA(LHS.Floats[1]), C(RHS.Floats[0]),
4741 CC(RHS.Floats[1]);
4742 assert(&A.getSemantics() == &semIEEEdouble);
4743 assert(&AA.getSemantics() == &semIEEEdouble);
4744 assert(&C.getSemantics() == &semIEEEdouble);
4745 assert(&CC.getSemantics() == &semIEEEdouble);
4746 assert(&Out.Floats[0].getSemantics() == &semIEEEdouble);
4747 assert(&Out.Floats[1].getSemantics() == &semIEEEdouble);
4748 return Out.addImpl(A, AA, C, CC, RM);
4749}
4750
4752 roundingMode RM) {
4753 return addWithSpecial(*this, RHS, *this, RM);
4754}
4755
4757 roundingMode RM) {
4758 changeSign();
4759 auto Ret = add(RHS, RM);
4760 changeSign();
4761 return Ret;
4762}
4763
4766 const auto &LHS = *this;
4767 auto &Out = *this;
4768 /* Interesting observation: For special categories, finding the lowest
4769 common ancestor of the following layered graph gives the correct
4770 return category:
4771
4772 NaN
4773 / \
4774 Zero Inf
4775 \ /
4776 Normal
4777
4778 e.g. NaN * NaN = NaN
4779 Zero * Inf = NaN
4780 Normal * Zero = Zero
4781 Normal * Inf = Inf
4782 */
4783 if (LHS.getCategory() == fcNaN) {
4784 Out = LHS;
4785 return opOK;
4786 }
4787 if (RHS.getCategory() == fcNaN) {
4788 Out = RHS;
4789 return opOK;
4790 }
4791 if ((LHS.getCategory() == fcZero && RHS.getCategory() == fcInfinity) ||
4792 (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcZero)) {
4793 Out.makeNaN(false, false, nullptr);
4794 return opOK;
4795 }
4796 if (LHS.getCategory() == fcZero || LHS.getCategory() == fcInfinity) {
4797 Out = LHS;
4798 return opOK;
4799 }
4800 if (RHS.getCategory() == fcZero || RHS.getCategory() == fcInfinity) {
4801 Out = RHS;
4802 return opOK;
4803 }
4804 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal &&
4805 "Special cases not handled exhaustively");
4806
4807 int Status = opOK;
4808 APFloat A = Floats[0], B = Floats[1], C = RHS.Floats[0], D = RHS.Floats[1];
4809 // t = a * c
4810 APFloat T = A;
4811 Status |= T.multiply(C, RM);
4812 if (!T.isFiniteNonZero()) {
4813 Floats[0] = T;
4814 Floats[1].makeZero(/* Neg = */ false);
4815 return (opStatus)Status;
4816 }
4817
4818 // tau = fmsub(a, c, t), that is -fmadd(-a, c, t).
4819 APFloat Tau = A;
4820 T.changeSign();
4821 Status |= Tau.fusedMultiplyAdd(C, T, RM);
4822 T.changeSign();
4823 {
4824 // v = a * d
4825 APFloat V = A;
4826 Status |= V.multiply(D, RM);
4827 // w = b * c
4828 APFloat W = B;
4829 Status |= W.multiply(C, RM);
4830 Status |= V.add(W, RM);
4831 // tau += v + w
4832 Status |= Tau.add(V, RM);
4833 }
4834 // u = t + tau
4835 APFloat U = T;
4836 Status |= U.add(Tau, RM);
4837
4838 Floats[0] = U;
4839 if (!U.isFinite()) {
4840 Floats[1].makeZero(/* Neg = */ false);
4841 } else {
4842 // Floats[1] = (t - u) + tau
4843 Status |= T.subtract(U, RM);
4844 Status |= T.add(Tau, RM);
4845 Floats[1] = T;
4846 }
4847 return (opStatus)Status;
4848}
4849
4852 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4854 auto Ret =
4855 Tmp.divide(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()), RM);
4857 return Ret;
4858}
4859
4861 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4863 auto Ret =
4864 Tmp.remainder(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
4866 return Ret;
4867}
4868
4870 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4872 auto Ret = Tmp.mod(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
4874 return Ret;
4875}
4876
4879 const DoubleAPFloat &Addend,
4881 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4883 auto Ret = Tmp.fusedMultiplyAdd(
4887 return Ret;
4888}
4889
4891 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4893 auto Ret = Tmp.roundToIntegral(RM);
4895 return Ret;
4896}
4897
4899 Floats[0].changeSign();
4900 Floats[1].changeSign();
4901}
4902
4905 auto Result = Floats[0].compareAbsoluteValue(RHS.Floats[0]);
4906 if (Result != cmpEqual)
4907 return Result;
4908 Result = Floats[1].compareAbsoluteValue(RHS.Floats[1]);
4909 if (Result == cmpLessThan || Result == cmpGreaterThan) {
4910 auto Against = Floats[0].isNegative() ^ Floats[1].isNegative();
4911 auto RHSAgainst = RHS.Floats[0].isNegative() ^ RHS.Floats[1].isNegative();
4912 if (Against && !RHSAgainst)
4913 return cmpLessThan;
4914 if (!Against && RHSAgainst)
4915 return cmpGreaterThan;
4916 if (!Against && !RHSAgainst)
4917 return Result;
4918 if (Against && RHSAgainst)
4919 return (cmpResult)(cmpLessThan + cmpGreaterThan - Result);
4920 }
4921 return Result;
4922}
4923
4925 return Floats[0].getCategory();
4926}
4927
4928bool DoubleAPFloat::isNegative() const { return Floats[0].isNegative(); }
4929
4931 Floats[0].makeInf(Neg);
4932 Floats[1].makeZero(/* Neg = */ false);
4933}
4934
4936 Floats[0].makeZero(Neg);
4937 Floats[1].makeZero(/* Neg = */ false);
4938}
4939
4941 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4942 Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x7fefffffffffffffull));
4943 Floats[1] = APFloat(semIEEEdouble, APInt(64, 0x7c8ffffffffffffeull));
4944 if (Neg)
4945 changeSign();
4946}
4947
4949 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4950 Floats[0].makeSmallest(Neg);
4951 Floats[1].makeZero(/* Neg = */ false);
4952}
4953
4955 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4956 Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x0360000000000000ull));
4957 if (Neg)
4958 Floats[0].changeSign();
4959 Floats[1].makeZero(/* Neg = */ false);
4960}
4961
4962void DoubleAPFloat::makeNaN(bool SNaN, bool Neg, const APInt *fill) {
4963 Floats[0].makeNaN(SNaN, Neg, fill);
4964 Floats[1].makeZero(/* Neg = */ false);
4965}
4966
4968 auto Result = Floats[0].compare(RHS.Floats[0]);
4969 // |Float[0]| > |Float[1]|
4970 if (Result == APFloat::cmpEqual)
4971 return Floats[1].compare(RHS.Floats[1]);
4972 return Result;
4973}
4974
4976 return Floats[0].bitwiseIsEqual(RHS.Floats[0]) &&
4977 Floats[1].bitwiseIsEqual(RHS.Floats[1]);
4978}
4979
4981 if (Arg.Floats)
4982 return hash_combine(hash_value(Arg.Floats[0]), hash_value(Arg.Floats[1]));
4983 return hash_combine(Arg.Semantics);
4984}
4985
4987 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4988 uint64_t Data[] = {
4989 Floats[0].bitcastToAPInt().getRawData()[0],
4990 Floats[1].bitcastToAPInt().getRawData()[0],
4991 };
4992 return APInt(128, 2, Data);
4993}
4994
4996 roundingMode RM) {
4997 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4999 auto Ret = Tmp.convertFromString(S, RM);
5001 return Ret;
5002}
5003
5005 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5007 auto Ret = Tmp.next(nextDown);
5009 return Ret;
5010}
5011
5014 unsigned int Width, bool IsSigned,
5015 roundingMode RM, bool *IsExact) const {
5016 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5018 .convertToInteger(Input, Width, IsSigned, RM, IsExact);
5019}
5020
5022 bool IsSigned,
5023 roundingMode RM) {
5024 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5026 auto Ret = Tmp.convertFromAPInt(Input, IsSigned, RM);
5028 return Ret;
5029}
5030
5033 unsigned int InputSize,
5034 bool IsSigned, roundingMode RM) {
5035 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5037 auto Ret = Tmp.convertFromSignExtendedInteger(Input, InputSize, IsSigned, RM);
5039 return Ret;
5040}
5041
5044 unsigned int InputSize,
5045 bool IsSigned, roundingMode RM) {
5046 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5048 auto Ret = Tmp.convertFromZeroExtendedInteger(Input, InputSize, IsSigned, RM);
5050 return Ret;
5051}
5052
5054 unsigned int HexDigits,
5055 bool UpperCase,
5056 roundingMode RM) const {
5057 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5059 .convertToHexString(DST, HexDigits, UpperCase, RM);
5060}
5061
5063 return getCategory() == fcNormal &&
5064 (Floats[0].isDenormal() || Floats[1].isDenormal() ||
5065 // (double)(Hi + Lo) == Hi defines a normal number.
5066 Floats[0] != Floats[0] + Floats[1]);
5067}
5068
5070 if (getCategory() != fcNormal)
5071 return false;
5072 DoubleAPFloat Tmp(*this);
5073 Tmp.makeSmallest(this->isNegative());
5074 return Tmp.compare(*this) == cmpEqual;
5075}
5076
5078 if (getCategory() != fcNormal)
5079 return false;
5080
5081 DoubleAPFloat Tmp(*this);
5083 return Tmp.compare(*this) == cmpEqual;
5084}
5085
5087 if (getCategory() != fcNormal)
5088 return false;
5089 DoubleAPFloat Tmp(*this);
5090 Tmp.makeLargest(this->isNegative());
5091 return Tmp.compare(*this) == cmpEqual;
5092}
5093
5095 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5096 return Floats[0].isInteger() && Floats[1].isInteger();
5097}
5098
5100 unsigned FormatPrecision,
5101 unsigned FormatMaxPadding,
5102 bool TruncateZero) const {
5103 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5105 .toString(Str, FormatPrecision, FormatMaxPadding, TruncateZero);
5106}
5107
5109 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5111 if (!inv)
5112 return Tmp.getExactInverse(nullptr);
5114 auto Ret = Tmp.getExactInverse(&Inv);
5116 return Ret;
5117}
5118
5120 // TODO: Implement me
5121 return INT_MIN;
5122}
5123
5125 // TODO: Implement me
5126 return INT_MIN;
5127}
5128
5131 assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5132 return DoubleAPFloat(semPPCDoubleDouble, scalbn(Arg.Floats[0], Exp, RM),
5133 scalbn(Arg.Floats[1], Exp, RM));
5134}
5135
5136DoubleAPFloat frexp(const DoubleAPFloat &Arg, int &Exp,
5138 assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5139 APFloat First = frexp(Arg.Floats[0], Exp, RM);
5140 APFloat Second = Arg.Floats[1];
5141 if (Arg.getCategory() == APFloat::fcNormal)
5142 Second = scalbn(Second, -Exp, RM);
5143 return DoubleAPFloat(semPPCDoubleDouble, std::move(First), std::move(Second));
5144}
5145
5146} // namespace detail
5147
5148APFloat::Storage::Storage(IEEEFloat F, const fltSemantics &Semantics) {
5149 if (usesLayout<IEEEFloat>(Semantics)) {
5150 new (&IEEE) IEEEFloat(std::move(F));
5151 return;
5152 }
5153 if (usesLayout<DoubleAPFloat>(Semantics)) {
5154 const fltSemantics& S = F.getSemantics();
5155 new (&Double)
5156 DoubleAPFloat(Semantics, APFloat(std::move(F), S),
5158 return;
5159 }
5160 llvm_unreachable("Unexpected semantics");
5161}
5162
5164 roundingMode RM) {
5166}
5167
5169 if (APFloat::usesLayout<detail::IEEEFloat>(Arg.getSemantics()))
5170 return hash_value(Arg.U.IEEE);
5171 if (APFloat::usesLayout<detail::DoubleAPFloat>(Arg.getSemantics()))
5172 return hash_value(Arg.U.Double);
5173 llvm_unreachable("Unexpected semantics");
5174}
5175
5176APFloat::APFloat(const fltSemantics &Semantics, StringRef S)
5177 : APFloat(Semantics) {
5178 auto StatusOrErr = convertFromString(S, rmNearestTiesToEven);
5179 assert(StatusOrErr && "Invalid floating point representation");
5180 consumeError(StatusOrErr.takeError());
5181}
5182
5184 if (isZero())
5185 return isNegative() ? fcNegZero : fcPosZero;
5186 if (isNormal())
5187 return isNegative() ? fcNegNormal : fcPosNormal;
5188 if (isDenormal())
5190 if (isInfinity())
5191 return isNegative() ? fcNegInf : fcPosInf;
5192 assert(isNaN() && "Other class of FP constant");
5193 return isSignaling() ? fcSNan : fcQNan;
5194}
5195
5197 roundingMode RM, bool *losesInfo) {
5198 if (&getSemantics() == &ToSemantics) {
5199 *losesInfo = false;
5200 return opOK;
5201 }
5202 if (usesLayout<IEEEFloat>(getSemantics()) &&
5203 usesLayout<IEEEFloat>(ToSemantics))
5204 return U.IEEE.convert(ToSemantics, RM, losesInfo);
5205 if (usesLayout<IEEEFloat>(getSemantics()) &&
5206 usesLayout<DoubleAPFloat>(ToSemantics)) {
5207 assert(&ToSemantics == &semPPCDoubleDouble);
5208 auto Ret = U.IEEE.convert(semPPCDoubleDoubleLegacy, RM, losesInfo);
5209 *this = APFloat(ToSemantics, U.IEEE.bitcastToAPInt());
5210 return Ret;
5211 }
5212 if (usesLayout<DoubleAPFloat>(getSemantics()) &&
5213 usesLayout<IEEEFloat>(ToSemantics)) {
5214 auto Ret = getIEEE().convert(ToSemantics, RM, losesInfo);
5215 *this = APFloat(std::move(getIEEE()), ToSemantics);
5216 return Ret;
5217 }
5218 llvm_unreachable("Unexpected semantics");
5219}
5220
5222 return APFloat(Semantics, APInt::getAllOnes(Semantics.sizeInBits));
5223}
5224
5226 SmallVector<char, 16> Buffer;
5227 toString(Buffer);
5228 OS << Buffer << "\n";
5229}
5230
5231#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
5233#endif
5234
5236 NID.Add(bitcastToAPInt());
5237}
5238
5239/* Same as convertToInteger(integerPart*, ...), except the result is returned in
5240 an APSInt, whose initial bit-width and signed-ness are used to determine the
5241 precision of the conversion.
5242 */
5244 roundingMode rounding_mode,
5245 bool *isExact) const {
5246 unsigned bitWidth = result.getBitWidth();
5247 SmallVector<uint64_t, 4> parts(result.getNumWords());
5248 opStatus status = convertToInteger(parts, bitWidth, result.isSigned(),
5249 rounding_mode, isExact);
5250 // Keeps the original signed-ness.
5251 result = APInt(bitWidth, parts);
5252 return status;
5253}
5254
5256 if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEdouble)
5257 return getIEEE().convertToDouble();
5258 assert(getSemantics().isRepresentableBy(semIEEEdouble) &&
5259 "Float semantics is not representable by IEEEdouble");
5260 APFloat Temp = *this;
5261 bool LosesInfo;
5262 opStatus St = Temp.convert(semIEEEdouble, rmNearestTiesToEven, &LosesInfo);
5263 assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
5264 (void)St;
5265 return Temp.getIEEE().convertToDouble();
5266}
5267
5269 if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEsingle)
5270 return getIEEE().convertToFloat();
5271 assert(getSemantics().isRepresentableBy(semIEEEsingle) &&
5272 "Float semantics is not representable by IEEEsingle");
5273 APFloat Temp = *this;
5274 bool LosesInfo;
5275 opStatus St = Temp.convert(semIEEEsingle, rmNearestTiesToEven, &LosesInfo);
5276 assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
5277 (void)St;
5278 return Temp.getIEEE().convertToFloat();
5279}
5280
5281} // namespace llvm
5282
5283#undef APFLOAT_DISPATCH_ON_SEMANTICS
#define PackCategoriesIntoKey(_lhs, _rhs)
A macro used to combine two fcCategory enums into one key which can be used in a switch statement to ...
Definition: APFloat.cpp:48
This file declares a class to represent arbitrary precision floating point values and provide a varie...
#define APFLOAT_DISPATCH_ON_SEMANTICS(METHOD_CALL)
Definition: APFloat.h:24
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:529
Looks at all the uses of the given value Returns the Liveness deduced from the uses of this value Adds all uses that cause the result to be MaybeLive to MaybeLiveRetUses If the result is MaybeLiveUses might be modified but its content should be ignored(since it might not be complete). DeadArgumentEliminationPass
Given that RA is a live value
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static bool isSigned(unsigned int Opcode)
expand large fp convert
Utilities for dealing with flags related to floating point properties and mode controls.
This file defines a hash set that can be used to remove duplication of nodes in a graph.
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition: Lint.cpp:531
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define P(N)
if(VerifyEach)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
raw_pwrite_stream & OS
This file contains some functions that are useful when dealing with strings.
Value * RHS
Value * LHS
void Profile(FoldingSetNodeID &NID) const
Used to insert APFloat objects, or objects that contain APFloat objects, into FoldingSets.
Definition: APFloat.cpp:5235
opStatus divide(const APFloat &RHS, roundingMode RM)
Definition: APFloat.h:1069
bool getExactInverse(APFloat *inv) const
Definition: APFloat.h:1334
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition: APFloat.cpp:5196
bool isNegative() const
Definition: APFloat.h:1295
double convertToDouble() const
Converts this APFloat to host double value.
Definition: APFloat.cpp:5255
void toString(SmallVectorImpl< char > &Str, unsigned FormatPrecision=0, unsigned FormatMaxPadding=3, bool TruncateZero=true) const
Definition: APFloat.h:1325
bool isNormal() const
Definition: APFloat.h:1299
bool isDenormal() const
Definition: APFloat.h:1296
opStatus add(const APFloat &RHS, roundingMode RM)
Definition: APFloat.h:1042
static APFloat getAllOnesValue(const fltSemantics &Semantics)
Returns a float which is bitcasted from an all one value int.
Definition: APFloat.cpp:5221
const fltSemantics & getSemantics() const
Definition: APFloat.h:1303
opStatus convertFromSignExtendedInteger(const integerPart *Input, unsigned int InputSize, bool IsSigned, roundingMode RM)
Definition: APFloat.h:1197
bool isFinite() const
Definition: APFloat.h:1300
bool isNaN() const
Definition: APFloat.h:1293
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition: APFloat.h:1193
unsigned int convertToHexString(char *DST, unsigned int HexDigits, bool UpperCase, roundingMode RM) const
Definition: APFloat.h:1285
float convertToFloat() const
Converts this APFloat to host float value.
Definition: APFloat.cpp:5268
bool isSignaling() const
Definition: APFloat.h:1297
opStatus fusedMultiplyAdd(const APFloat &Multiplicand, const APFloat &Addend, roundingMode RM)
Definition: APFloat.h:1096
opStatus remainder(const APFloat &RHS)
Definition: APFloat.h:1078
bool isZero() const
Definition: APFloat.h:1291
APInt bitcastToAPInt() const
Definition: APFloat.h:1210
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition: APFloat.h:1185
opStatus next(bool nextDown)
Definition: APFloat.h:1115
FPClassTest classify() const
Return the FPClassTest which will return true for the value.
Definition: APFloat.cpp:5183
opStatus mod(const APFloat &RHS)
Definition: APFloat.h:1087
Expected< opStatus > convertFromString(StringRef, roundingMode)
Definition: APFloat.cpp:5163
void dump() const
Definition: APFloat.cpp:5232
void print(raw_ostream &) const
Definition: APFloat.cpp:5225
opStatus roundToIntegral(roundingMode RM)
Definition: APFloat.h:1109
opStatus convertFromZeroExtendedInteger(const integerPart *Input, unsigned int InputSize, bool IsSigned, roundingMode RM)
Definition: APFloat.h:1203
bool isInfinity() const
Definition: APFloat.h:1292
Class for arbitrary precision integers.
Definition: APInt.h:76
APInt udiv(const APInt &RHS) const
Unsigned division operation.
Definition: APInt.cpp:1579
static void tcSetBit(WordType *, unsigned bit)
Set the given bit of a bignum. Zero-based.
Definition: APInt.cpp:2374
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:212
static void tcSet(WordType *, WordType, unsigned)
Sets the least significant part of a bignum to the input value, and zeroes out higher parts.
Definition: APInt.cpp:2346
static void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
Dual division/remainder interface.
Definition: APInt.cpp:1764
static int tcExtractBit(const WordType *, unsigned bit)
Extract the given bit of a bignum; returns 0 or 1. Zero-based.
Definition: APInt.cpp:2369
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:981
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1491
static WordType tcAdd(WordType *, const WordType *, WordType carry, unsigned)
DST += RHS + CARRY where CARRY is zero or one. Returns the carry flag.
Definition: APInt.cpp:2448
static void tcExtract(WordType *, unsigned dstCount, const WordType *, unsigned srcBits, unsigned srcLSB)
Copy the bit vector of width srcBITS from SRC, starting at bit srcLSB, to DST, of dstCOUNT parts,...
Definition: APInt.cpp:2418
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1463
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:906
static int tcCompare(const WordType *, const WordType *, unsigned)
Comparison (unsigned) of two bignums.
Definition: APInt.cpp:2754
static APInt floatToBits(float V)
Converts a float to APInt bits.
Definition: APInt.h:1694
static void tcAssign(WordType *, const WordType *, unsigned)
Assign one bignum to another.
Definition: APInt.cpp:2354
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1439
uint64_t WordType
Definition: APInt.h:78
static void tcShiftRight(WordType *, unsigned Words, unsigned Count)
Shift a bignum right Count bits.
Definition: APInt.cpp:2728
static void tcFullMultiply(WordType *, const WordType *, const WordType *, unsigned, unsigned)
DST = LHS * RHS, where DST has width the sum of the widths of the operands.
Definition: APInt.cpp:2635
unsigned getNumWords() const
Get the number of words.
Definition: APInt.h:1446
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:307
@ APINT_BITS_PER_WORD
Bits in a word.
Definition: APInt.h:85
static void tcClearBit(WordType *, unsigned bit)
Clear the given bit of a bignum. Zero-based.
Definition: APInt.cpp:2379
static WordType tcDecrement(WordType *dst, unsigned parts)
Decrement a bignum in-place. Return the borrow flag.
Definition: APInt.h:1856
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition: APInt.h:1589
static unsigned tcLSB(const WordType *, unsigned n)
Returns the bit number of the least or most significant set bit of a number.
Definition: APInt.cpp:2385
static void tcShiftLeft(WordType *, unsigned Words, unsigned Count)
Shift a bignum left Count bits.
Definition: APInt.cpp:2701
static bool tcIsZero(const WordType *, unsigned)
Returns true if a bignum is zero, false otherwise.
Definition: APInt.cpp:2360
static unsigned tcMSB(const WordType *parts, unsigned n)
Returns the bit number of the most significant set bit of a number.
Definition: APInt.cpp:2398
float bitsToFloat() const
Converts APInt bits to a float.
Definition: APInt.h:1678
static int tcMultiplyPart(WordType *dst, const WordType *src, WordType multiplier, WordType carry, unsigned srcParts, unsigned dstParts, bool add)
DST += SRC * MULTIPLIER + PART if add is true DST = SRC * MULTIPLIER + PART if add is false.
Definition: APInt.cpp:2536
static WordType tcSubtract(WordType *, const WordType *, WordType carry, unsigned)
DST -= RHS + CARRY where CARRY is zero or one. Returns the carry flag.
Definition: APInt.cpp:2483
static void tcNegate(WordType *, unsigned)
Negate a bignum in-place.
Definition: APInt.cpp:2522
static APInt doubleToBits(double V)
Converts a double to APInt bits.
Definition: APInt.h:1686
static WordType tcIncrement(WordType *dst, unsigned parts)
Increment a bignum in-place. Return the carry flag.
Definition: APInt.h:1851
double bitsToDouble() const
Converts APInt bits to a double.
Definition: APInt.h:1671
const uint64_t * getRawData() const
This function returns a pointer to the internal storage of the APInt.
Definition: APInt.h:547
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:178
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:836
An arbitrary precision integer that knows its signedness.
Definition: APSInt.h:23
bool isSigned() const
Definition: APSInt.h:77
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
Lightweight error class with error context and mandatory checking.
Definition: Error.h:160
static ErrorSuccess success()
Create a success value.
Definition: Error.h:334
Tagged union holding either a T or a Error.
Definition: Error.h:474
FoldingSetNodeID - This class is used to gather all the unique data bits of a node.
Definition: FoldingSet.h:320
void Add(const T &x)
Definition: FoldingSet.h:370
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:307
T * data() const
Definition: ArrayRef.h:354
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
iterator erase(const_iterator CI)
Definition: SmallVector.h:750
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:696
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:466
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:257
constexpr bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:134
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition: StringRef.h:605
iterator begin() const
Definition: StringRef.h:111
char back() const
back - Get the last character in the string.
Definition: StringRef.h:146
StringRef slice(size_t Start, size_t End) const
Return a reference to the substring from [Start, End).
Definition: StringRef.h:680
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:137
char front() const
front - Get the first character in the string.
Definition: StringRef.h:140
iterator end() const
Definition: StringRef.h:113
bool equals(StringRef RHS) const
equals - Check for string equality, this is more efficient than compare() when the relative ordering ...
Definition: StringRef.h:164
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
void makeSmallestNormalized(bool Neg)
Definition: APFloat.cpp:4954
DoubleAPFloat & operator=(const DoubleAPFloat &RHS)
Definition: APFloat.cpp:4612
LLVM_READONLY int getExactLog2() const
Definition: APFloat.cpp:5119
opStatus remainder(const DoubleAPFloat &RHS)
Definition: APFloat.cpp:4860
opStatus multiply(const DoubleAPFloat &RHS, roundingMode RM)
Definition: APFloat.cpp:4764
fltCategory getCategory() const
Definition: APFloat.cpp:4924
bool bitwiseIsEqual(const DoubleAPFloat &RHS) const
Definition: APFloat.cpp:4975
LLVM_READONLY int getExactLog2Abs() const
Definition: APFloat.cpp:5124
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition: APFloat.cpp:5021
opStatus convertFromZeroExtendedInteger(const integerPart *Input, unsigned int InputSize, bool IsSigned, roundingMode RM)
Definition: APFloat.cpp:5043
APInt bitcastToAPInt() const
Definition: APFloat.cpp:4986
bool getExactInverse(APFloat *inv) const
Definition: APFloat.cpp:5108
Expected< opStatus > convertFromString(StringRef, roundingMode)
Definition: APFloat.cpp:4995
opStatus subtract(const DoubleAPFloat &RHS, roundingMode RM)
Definition: APFloat.cpp:4756
cmpResult compareAbsoluteValue(const DoubleAPFloat &RHS) const
Definition: APFloat.cpp:4904
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition: APFloat.cpp:5013
void makeSmallest(bool Neg)
Definition: APFloat.cpp:4948
opStatus next(bool nextDown)
Definition: APFloat.cpp:5004
opStatus divide(const DoubleAPFloat &RHS, roundingMode RM)
Definition: APFloat.cpp:4850
bool isSmallestNormalized() const
Definition: APFloat.cpp:5077
opStatus mod(const DoubleAPFloat &RHS)
Definition: APFloat.cpp:4869
DoubleAPFloat(const fltSemantics &S)
Definition: APFloat.cpp:4562
void toString(SmallVectorImpl< char > &Str, unsigned FormatPrecision, unsigned FormatMaxPadding, bool TruncateZero=true) const
Definition: APFloat.cpp:5099
void makeLargest(bool Neg)
Definition: APFloat.cpp:4940
cmpResult compare(const DoubleAPFloat &RHS) const
Definition: APFloat.cpp:4967
opStatus roundToIntegral(roundingMode RM)
Definition: APFloat.cpp:4890
opStatus convertFromSignExtendedInteger(const integerPart *Input, unsigned int InputSize, bool IsSigned, roundingMode RM)
Definition: APFloat.cpp:5032
opStatus fusedMultiplyAdd(const DoubleAPFloat &Multiplicand, const DoubleAPFloat &Addend, roundingMode RM)
Definition: APFloat.cpp:4878
unsigned int convertToHexString(char *DST, unsigned int HexDigits, bool UpperCase, roundingMode RM) const
Definition: APFloat.cpp:5053
opStatus add(const DoubleAPFloat &RHS, roundingMode RM)
Definition: APFloat.cpp:4751
void makeNaN(bool SNaN, bool Neg, const APInt *fill)
Definition: APFloat.cpp:4962
unsigned int convertToHexString(char *dst, unsigned int hexDigits, bool upperCase, roundingMode) const
Write out a hexadecimal representation of the floating point value to DST, which must be of sufficien...
Definition: APFloat.cpp:3244
fltCategory getCategory() const
Definition: APFloat.h:421
bool isFiniteNonZero() const
Definition: APFloat.h:424
opStatus add(const IEEEFloat &, roundingMode)
Definition: APFloat.cpp:2040
bool needsCleanup() const
Returns whether this instance allocated memory.
Definition: APFloat.h:311
APInt bitcastToAPInt() const
Definition: APFloat.cpp:3617
bool isNegative() const
IEEE-754R isSignMinus: Returns true if and only if the current value is negative.
Definition: APFloat.h:386
opStatus convertFromZeroExtendedInteger(const integerPart *, unsigned int, bool, roundingMode)
Definition: APFloat.cpp:2803
bool isNaN() const
Returns true if and only if the float is a quiet or signaling NaN.
Definition: APFloat.h:411
opStatus convertFromAPInt(const APInt &, bool, roundingMode)
Definition: APFloat.cpp:2759
opStatus roundToIntegral(roundingMode)
Definition: APFloat.cpp:2275
double convertToDouble() const
Definition: APFloat.cpp:3666
float convertToFloat() const
Definition: APFloat.cpp:3659
cmpResult compareAbsoluteValue(const IEEEFloat &) const
Definition: APFloat.cpp:1464
opStatus fusedMultiplyAdd(const IEEEFloat &, const IEEEFloat &, roundingMode)
Definition: APFloat.cpp:2229
void makeInf(bool Neg=false)
Definition: APFloat.cpp:4475
Expected< opStatus > convertFromString(StringRef, roundingMode)
Definition: APFloat.cpp:3191
bool isSmallestNormalized() const
Returns true if this is the smallest (by magnitude) normalized finite number in the given semantics.
Definition: APFloat.cpp:988
bool isLargest() const
Returns true if and only if the number has the largest possible finite magnitude in the current seman...
Definition: APFloat.cpp:1080
bool isFinite() const
Returns true if and only if the current value is zero, subnormal, or normal.
Definition: APFloat.h:398
void makeNaN(bool SNaN=false, bool Neg=false, const APInt *fill=nullptr)
Definition: APFloat.cpp:885
opStatus convertToInteger(MutableArrayRef< integerPart >, unsigned int, bool, roundingMode, bool *) const
Definition: APFloat.cpp:2699
friend int ilogb(const IEEEFloat &Arg)
Returns the exponent of the internal representation of the APFloat.
Definition: APFloat.cpp:4504
opStatus remainder(const IEEEFloat &)
IEEE remainder.
Definition: APFloat.cpp:2092
IEEEFloat & operator=(const IEEEFloat &)
Definition: APFloat.cpp:949
opStatus divide(const IEEEFloat &, roundingMode)
Definition: APFloat.cpp:2072
friend IEEEFloat scalbn(IEEEFloat X, int Exp, roundingMode)
Returns: X * 2^Exp for integral exponents.
bool bitwiseIsEqual(const IEEEFloat &) const
Bitwise comparison for equality (QNaNs compare equal, 0!=-0).
Definition: APFloat.cpp:1104
bool isInteger() const
Returns true if and only if the number is an exact integer.
Definition: APFloat.cpp:1096
IEEEFloat(const fltSemantics &)
Definition: APFloat.cpp:1131
cmpResult compare(const IEEEFloat &) const
IEEE comparison with another floating point number (NaNs compare unordered, 0==-0).
Definition: APFloat.cpp:2362
opStatus subtract(const IEEEFloat &, roundingMode)
Definition: APFloat.cpp:2046
bool isInfinity() const
IEEE-754R isInfinite(): Returns true if and only if the float is infinity.
Definition: APFloat.h:408
const fltSemantics & getSemantics() const
Definition: APFloat.h:422
bool isZero() const
Returns true if and only if the float is plus or minus zero.
Definition: APFloat.h:401
bool isSignaling() const
Returns true if and only if the float is a signaling NaN.
Definition: APFloat.cpp:4324
opStatus convertFromSignExtendedInteger(const integerPart *, unsigned int, bool, roundingMode)
Definition: APFloat.cpp:2777
opStatus convert(const fltSemantics &, roundingMode, bool *)
IEEEFloat::convert - convert a value of one floating point type to another.
Definition: APFloat.cpp:2439
void makeZero(bool Neg=false)
Definition: APFloat.cpp:4487
bool isDenormal() const
IEEE-754R isSubnormal(): Returns true if and only if the float is a denormal.
Definition: APFloat.cpp:974
bool isSmallest() const
Returns true if and only if the number has the smallest possible non-zero magnitude in the current se...
Definition: APFloat.cpp:980
opStatus mod(const IEEEFloat &)
C fmod, or llvm frem.
Definition: APFloat.cpp:2202
opStatus multiply(const IEEEFloat &, roundingMode)
Definition: APFloat.cpp:2052
An opaque object representing a hash code.
Definition: Hashing.h:74
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
IEEEFloat scalbn(IEEEFloat X, int Exp, IEEEFloat::roundingMode)
Definition: APFloat.cpp:4522
static void tcSetLeastSignificantBits(APInt::WordType *dst, unsigned parts, unsigned bits)
Definition: APFloat.cpp:1489
hash_code hash_value(const IEEEFloat &Arg)
Definition: APFloat.cpp:3393
IEEEFloat frexp(const IEEEFloat &Val, int &Exp, IEEEFloat::roundingMode RM)
Definition: APFloat.cpp:4543
int ilogb(const IEEEFloat &Arg)
Definition: APFloat.cpp:4504
std::error_code status(const Twine &path, file_status &result, bool follow=true)
Get file status as if by POSIX stat().
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
static unsigned int partAsHex(char *dst, APFloatBase::integerPart part, unsigned int count, const char *hexDigitChars)
Definition: APFloat.cpp:801
static constexpr fltSemantics semBogus
Definition: APFloat.cpp:143
static const char infinityL[]
Definition: APFloat.cpp:792
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1731
hash_code hash_value(const FixedPointSemantics &Val)
Definition: APFixedPoint.h:128
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition: bit.h:385
static constexpr unsigned int partCountForBits(unsigned int bits)
Definition: APFloat.cpp:361
static const char NaNU[]
Definition: APFloat.cpp:795
static unsigned int HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
Definition: APFloat.cpp:671
static unsigned int powerOf5(APFloatBase::integerPart *dst, unsigned int power)
Definition: APFloat.cpp:730
static constexpr APFloatBase::ExponentType exponentZero(const fltSemantics &semantics)
Definition: APFloat.cpp:336
static Expected< int > totalExponent(StringRef::iterator p, StringRef::iterator end, int exponentAdjustment)
Definition: APFloat.cpp:422
std::error_code inconvertibleErrorCode()
The value returned by this function can be returned from convertToErrorCode for Error values where no...
Definition: Error.cpp:90
static constexpr fltSemantics semIEEEquad
Definition: APFloat.cpp:131
const unsigned int maxPowerOfFiveExponent
Definition: APFloat.cpp:287
static char * writeUnsignedDecimal(char *dst, unsigned int n)
Definition: APFloat.cpp:819
static constexpr fltSemantics semFloat8E4M3FNUZ
Definition: APFloat.cpp:137
const unsigned int maxPrecision
Definition: APFloat.cpp:286
static constexpr fltSemantics semIEEEdouble
Definition: APFloat.cpp:130
APFloat frexp(const APFloat &X, int &Exp, APFloat::roundingMode RM)
Equivalent of C standard library function.
Definition: APFloat.h:1373
static const char NaNL[]
Definition: APFloat.cpp:794
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
static constexpr fltSemantics semFloat8E4M3FN
Definition: APFloat.cpp:135
static const char infinityU[]
Definition: APFloat.cpp:793
lostFraction
Enum that represents what fraction of the LSB truncated bits of an fp number represent.
Definition: APFloat.h:48
@ lfMoreThanHalf
Definition: APFloat.h:52
@ lfLessThanHalf
Definition: APFloat.h:50
@ lfExactlyHalf
Definition: APFloat.h:51
@ lfExactlyZero
Definition: APFloat.h:49
static constexpr fltSemantics semPPCDoubleDouble
Definition: APFloat.cpp:154
static Error interpretDecimal(StringRef::iterator begin, StringRef::iterator end, decimalInfo *D)
Definition: APFloat.cpp:514
static constexpr fltSemantics semFloat8E5M2FNUZ
Definition: APFloat.cpp:133
bool isFinite(const Loop *L)
Return true if this loop can be assumed to run for a finite number of iterations.
Definition: LoopInfo.cpp:1108
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
const unsigned int maxPowerOfFiveParts
Definition: APFloat.cpp:288
static constexpr fltSemantics semIEEEsingle
Definition: APFloat.cpp:129
APFloat scalbn(APFloat X, int Exp, APFloat::roundingMode RM)
Definition: APFloat.h:1361
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
static constexpr APFloatBase::ExponentType exponentNaN(const fltSemantics &semantics)
Definition: APFloat.cpp:346
static Error createError(const Twine &Err)
Definition: APFloat.cpp:357
static constexpr fltSemantics semIEEEhalf
Definition: APFloat.cpp:127
static constexpr fltSemantics semPPCDoubleDoubleLegacy
Definition: APFloat.cpp:180
static lostFraction shiftRight(APFloatBase::integerPart *dst, unsigned int parts, unsigned int bits)
Definition: APFloat.cpp:637
static constexpr fltSemantics semFloat8E5M2
Definition: APFloat.cpp:132
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
static const char hexDigitsUpper[]
Definition: APFloat.cpp:791
const unsigned int maxExponent
Definition: APFloat.cpp:285
static unsigned int decDigitValue(unsigned int c)
Definition: APFloat.cpp:367
static constexpr fltSemantics semFloat8E4M3B11FNUZ
Definition: APFloat.cpp:139
fltNonfiniteBehavior
Definition: APFloat.cpp:57
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1923
static lostFraction combineLostFractions(lostFraction moreSignificant, lostFraction lessSignificant)
Definition: APFloat.cpp:650
static Expected< StringRef::iterator > skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end, StringRef::iterator *dot)
Definition: APFloat.cpp:474
RoundingMode
Rounding mode.
OutputIt copy(R &&Range, OutputIt Out)
Definition: STLExtras.h:1833
static constexpr fltSemantics semX87DoubleExtended
Definition: APFloat.cpp:142
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1858
static constexpr fltSemantics semFloatTF32
Definition: APFloat.cpp:141
static constexpr APFloatBase::ExponentType exponentInf(const fltSemantics &semantics)
Definition: APFloat.cpp:341
static lostFraction lostFractionThroughTruncation(const APFloatBase::integerPart *parts, unsigned int partCount, unsigned int bits)
Definition: APFloat.cpp:615
static APFloatBase::integerPart ulpsFromBoundary(const APFloatBase::integerPart *parts, unsigned int bits, bool isNearest)
Definition: APFloat.cpp:685
static char * writeSignedDecimal(char *dst, int value)
Definition: APFloat.cpp:837
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
Definition: Hashing.h:613
static constexpr fltSemantics semBFloat
Definition: APFloat.cpp:128
static Expected< lostFraction > trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end, unsigned int digitValue)
Definition: APFloat.cpp:584
void consumeError(Error Err)
Consume a Error without doing anything.
Definition: Error.h:1041
fltNanEncoding
Definition: APFloat.cpp:77
static Expected< int > readExponent(StringRef::iterator begin, StringRef::iterator end)
Definition: APFloat.cpp:377
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
Definition: Hashing.h:491
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition: MathExtras.h:349
static const char hexDigitsLower[]
Definition: APFloat.cpp:790
Implement std::hash so that hash_code can be used in STL containers.
Definition: BitVector.h:858
#define N
static const llvm::fltSemantics & EnumToSemantics(Semantics S)
Definition: APFloat.cpp:183
static const fltSemantics & IEEEsingle() LLVM_READNONE
Definition: APFloat.cpp:249
static constexpr roundingMode rmNearestTiesToAway
Definition: APFloat.h:235
cmpResult
IEEE-754R 5.11: Floating Point Comparison Relations.
Definition: APFloat.h:220
static constexpr roundingMode rmTowardNegative
Definition: APFloat.h:233
static ExponentType semanticsMinExponent(const fltSemantics &)
Definition: APFloat.cpp:300
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:230
static unsigned int semanticsSizeInBits(const fltSemantics &)
Definition: APFloat.cpp:303
static unsigned getSizeInBits(const fltSemantics &Sem)
Returns the size of the floating point number (in bits) in the given semantics.
Definition: APFloat.cpp:331
static const fltSemantics & Float8E4M3FN() LLVM_READNONE
Definition: APFloat.cpp:257
static const fltSemantics & PPCDoubleDouble() LLVM_READNONE
Definition: APFloat.cpp:252
static constexpr roundingMode rmTowardZero
Definition: APFloat.h:234
static const fltSemantics & x87DoubleExtended() LLVM_READNONE
Definition: APFloat.cpp:263
uninitializedTag
Convenience enum used to construct an uninitialized APFloat.
Definition: APFloat.h:264
static const fltSemantics & IEEEquad() LLVM_READNONE
Definition: APFloat.cpp:251
static const fltSemantics & Float8E4M3B11FNUZ() LLVM_READNONE
Definition: APFloat.cpp:259
static const fltSemantics & Bogus() LLVM_READNONE
A Pseudo fltsemantic used to construct APFloats that cannot conflict with anything real.
Definition: APFloat.cpp:266
static ExponentType semanticsMaxExponent(const fltSemantics &)
Definition: APFloat.cpp:296
static unsigned int semanticsPrecision(const fltSemantics &)
Definition: APFloat.cpp:292
static const fltSemantics & IEEEdouble() LLVM_READNONE
Definition: APFloat.cpp:250
static const fltSemantics & Float8E5M2() LLVM_READNONE
Definition: APFloat.cpp:255
static Semantics SemanticsToEnum(const llvm::fltSemantics &Sem)
Definition: APFloat.cpp:216
static constexpr unsigned integerPartWidth
Definition: APFloat.h:144
static const fltSemantics & IEEEhalf() LLVM_READNONE
Definition: APFloat.cpp:247
APInt::WordType integerPart
Definition: APFloat.h:143
static constexpr roundingMode rmTowardPositive
Definition: APFloat.h:232
static bool isRepresentableAsNormalIn(const fltSemantics &Src, const fltSemantics &Dst)
Definition: APFloat.cpp:317
static const fltSemantics & Float8E4M3FNUZ() LLVM_READNONE
Definition: APFloat.cpp:258
static const fltSemantics & BFloat() LLVM_READNONE
Definition: APFloat.cpp:248
static const fltSemantics & FloatTF32() LLVM_READNONE
Definition: APFloat.cpp:262
static const fltSemantics & Float8E5M2FNUZ() LLVM_READNONE
Definition: APFloat.cpp:256
fltCategory
Category of internally-represented number.
Definition: APFloat.h:256
opStatus
IEEE-754R 7: Default exception handling.
Definition: APFloat.h:246
int32_t ExponentType
A signed type to represent a floating point numbers unbiased exponent.
Definition: APFloat.h:147
static unsigned int semanticsIntSizeInBits(const fltSemantics &, bool)
Definition: APFloat.cpp:306
const char * lastSigDigit
Definition: APFloat.cpp:509
const char * firstSigDigit
Definition: APFloat.cpp:508
bool isRepresentableBy(const fltSemantics &S) const
Definition: APFloat.cpp:121
APFloatBase::ExponentType maxExponent
Definition: APFloat.cpp:102
fltNonfiniteBehavior nonFiniteBehavior
Definition: APFloat.cpp:115
APFloatBase::ExponentType minExponent
Definition: APFloat.cpp:106
unsigned int sizeInBits
Definition: APFloat.cpp:113
unsigned int precision
Definition: APFloat.cpp:110
fltNanEncoding nanEncoding
Definition: APFloat.cpp:117