| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | // Copyright 2020-2023 Daniel Lemire | ||
| 2 | // Copyright 2023 Matt Borland | ||
| 3 | // Distributed under the Boost Software License, Version 1.0. | ||
| 4 | // https://www.boost.org/LICENSE_1_0.txt | ||
| 5 | // | ||
| 6 | // Derivative of: https://github.com/fastfloat/fast_float | ||
| 7 | |||
| 8 | #ifndef BOOST_JSON_DETAIL_CHARCONV_DETAIL_FASTFLOAT_DECIMAL_TO_BINARY_HPP | ||
| 9 | #define BOOST_JSON_DETAIL_CHARCONV_DETAIL_FASTFLOAT_DECIMAL_TO_BINARY_HPP | ||
| 10 | |||
| 11 | #include <boost/json/detail/charconv/detail/fast_float/float_common.hpp> | ||
| 12 | #include <boost/json/detail/charconv/detail/fast_float/fast_table.hpp> | ||
| 13 | #include <cfloat> | ||
| 14 | #include <cinttypes> | ||
| 15 | #include <cmath> | ||
| 16 | #include <cstdint> | ||
| 17 | #include <cstdlib> | ||
| 18 | #include <cstring> | ||
| 19 | |||
| 20 | namespace boost { namespace json { namespace detail { namespace charconv { namespace detail { namespace fast_float { | ||
| 21 | |||
| 22 | // This will compute or rather approximate w * 5**q and return a pair of 64-bit words approximating | ||
| 23 | // the result, with the "high" part corresponding to the most significant bits and the | ||
| 24 | // low part corresponding to the least significant bits. | ||
| 25 | // | ||
| 26 | template <int bit_precision> | ||
| 27 | BOOST_FORCEINLINE BOOST_JSON_FASTFLOAT_CONSTEXPR20 | ||
| 28 | value128 compute_product_approximation(int64_t q, uint64_t w) { | ||
| 29 | 2005879 | const int index = 2 * int(q - powers::smallest_power_of_five); | |
| 30 | // For small values of q, e.g., q in [0,27], the answer is always exact because | ||
| 31 | // The line value128 firstproduct = full_multiplication(w, power_of_five_128[index]); | ||
| 32 | // gives the exact answer. | ||
| 33 | 4011758 | value128 firstproduct = full_multiplication(w, powers::power_of_five_128[index]); | |
| 34 | static_assert((bit_precision >= 0) && (bit_precision <= 64), " precision should be in (0,64]"); | ||
| 35 | 2005879 | constexpr uint64_t precision_mask = (bit_precision < 64) ? | |
| 36 | (uint64_t(0xFFFFFFFFFFFFFFFF) >> bit_precision) | ||
| 37 | : uint64_t(0xFFFFFFFFFFFFFFFF); | ||
| 38 |
6/6✓ Branch 0 taken 2122 times.
✓ Branch 1 taken 1000838 times.
✓ Branch 2 taken 2390 times.
✓ Branch 3 taken 997543 times.
✓ Branch 4 taken 483 times.
✓ Branch 5 taken 2503 times.
|
2005879 | if((firstproduct.high & precision_mask) == precision_mask) { // could further guard with (lower + w < lower) |
| 39 | // regarding the second product, we only need secondproduct.high, but our expectation is that the compiler will optimize this extra work away if needed. | ||
| 40 | 4995 | value128 secondproduct = full_multiplication(w, powers::power_of_five_128[index + 1]); | |
| 41 | 4995 | firstproduct.low += secondproduct.high; | |
| 42 |
6/6✓ Branch 0 taken 716 times.
✓ Branch 1 taken 1406 times.
✓ Branch 2 taken 1108 times.
✓ Branch 3 taken 1282 times.
✓ Branch 4 taken 1 times.
✓ Branch 5 taken 482 times.
|
4995 | if(secondproduct.high > firstproduct.low) { |
| 43 | 1825 | firstproduct.high++; | |
| 44 | } | ||
| 45 | } | ||
| 46 | 2005879 | return firstproduct; | |
| 47 | } | ||
| 48 | |||
| 49 | namespace detail { | ||
| 50 | /** | ||
| 51 | * For q in (0,350), we have that | ||
| 52 | * f = (((152170 + 65536) * q ) >> 16); | ||
| 53 | * is equal to | ||
| 54 | * floor(p) + q | ||
| 55 | * where | ||
| 56 | * p = log(5**q)/log(2) = q * log(5)/log(2) | ||
| 57 | * | ||
| 58 | * For negative values of q in (-400,0), we have that | ||
| 59 | * f = (((152170 + 65536) * q ) >> 16); | ||
| 60 | * is equal to | ||
| 61 | * -ceil(p) + q | ||
| 62 | * where | ||
| 63 | * p = log(5**-q)/log(2) = -q * log(5)/log(2) | ||
| 64 | */ | ||
| 65 | constexpr BOOST_FORCEINLINE int32_t power(int32_t q) noexcept { | ||
| 66 | 2005879 | return (((152170 + 65536) * q) >> 16) + 63; | |
| 67 | } | ||
| 68 | } // namespace detail | ||
| 69 | |||
| 70 | // create an adjusted mantissa, biased by the invalid power2 | ||
| 71 | // for significant digits already multiplied by 10 ** q. | ||
| 72 | template <typename binary> | ||
| 73 | BOOST_FORCEINLINE BOOST_JSON_CXX14_CONSTEXPR_NO_INLINE | ||
| 74 | adjusted_mantissa compute_error_scaled(int64_t q, uint64_t w, int lz) noexcept { | ||
| 75 | 2986 | int hilz = int(w >> 63) ^ 1; | |
| 76 | 2986 | adjusted_mantissa answer; | |
| 77 | 2986 | answer.mantissa = w << hilz; | |
| 78 | 2986 | int bias = binary::mantissa_explicit_bits() - binary::minimum_exponent(); | |
| 79 | 5972 | answer.power2 = int32_t(detail::power(int32_t(q)) + bias - hilz - lz - 62 + invalid_am_bias); | |
| 80 | 2986 | return answer; | |
| 81 | } | ||
| 82 | |||
| 83 | // w * 10 ** q, without rounding the representation up. | ||
| 84 | // the power2 in the exponent will be adjusted by invalid_am_bias. | ||
| 85 | template <typename binary> | ||
| 86 | BOOST_FORCEINLINE BOOST_JSON_FASTFLOAT_CONSTEXPR20 | ||
| 87 | adjusted_mantissa compute_error(int64_t q, uint64_t w) noexcept { | ||
| 88 | 2986 | int lz = leading_zeroes(w); | |
| 89 | 2986 | w <<= lz; | |
| 90 | value128 product = compute_product_approximation<binary::mantissa_explicit_bits() + 3>(q, w); | ||
| 91 | 8958 | return compute_error_scaled<binary>(q, product.high, lz); | |
| 92 | } | ||
| 93 | |||
| 94 | // w * 10 ** q | ||
| 95 | // The returned value should be a valid ieee64 number that simply need to be packed. | ||
| 96 | // However, in some very rare cases, the computation will fail. In such cases, we | ||
| 97 | // return an adjusted_mantissa with a negative power of 2: the caller should recompute | ||
| 98 | // in such cases. | ||
| 99 | template <typename binary> | ||
| 100 | BOOST_FORCEINLINE BOOST_JSON_FASTFLOAT_CONSTEXPR20 | ||
| 101 | adjusted_mantissa compute_float(int64_t q, uint64_t w) noexcept { | ||
| 102 | 2004452 | adjusted_mantissa answer; | |
| 103 |
8/12✓ Branch 0 taken 1003739 times.
✓ Branch 1 taken 1 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 1003739 times.
✓ Branch 5 taken 1 times.
✓ Branch 6 taken 1003739 times.
✓ Branch 7 taken 1000712 times.
✗ Branch 8 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 1000712 times.
✗ Branch 12 not taken.
✓ Branch 13 taken 1000712 times.
|
2004452 | if ((w == 0) || (q < binary::smallest_power_of_ten())) { |
| 104 | 1 | answer.power2 = 0; | |
| 105 | 1 | answer.mantissa = 0; | |
| 106 | // result should be zero | ||
| 107 | 1 | return answer; | |
| 108 | } | ||
| 109 |
4/4✓ Branch 1 taken 779 times.
✓ Branch 2 taken 1002960 times.
✓ Branch 4 taken 779 times.
✓ Branch 5 taken 999933 times.
|
2004451 | if (q > binary::largest_power_of_ten()) { |
| 110 | // we want to get infinity: | ||
| 111 | 1558 | answer.power2 = binary::infinite_power(); | |
| 112 | 1558 | answer.mantissa = 0; | |
| 113 | 1558 | return answer; | |
| 114 | } | ||
| 115 | // At this point in time q is in [powers::smallest_power_of_five, powers::largest_power_of_five]. | ||
| 116 | |||
| 117 | // We want the most significant bit of i to be 1. Shift if needed. | ||
| 118 | 2002893 | int lz = leading_zeroes(w); | |
| 119 | 2002893 | w <<= lz; | |
| 120 | |||
| 121 | // The required precision is binary::mantissa_explicit_bits() + 3 because | ||
| 122 | // 1. We need the implicit bit | ||
| 123 | // 2. We need an extra bit for rounding purposes | ||
| 124 | // 3. We might lose a bit due to the "upperbit" routine (result too small, requiring a shift) | ||
| 125 | |||
| 126 | value128 product = compute_product_approximation<binary::mantissa_explicit_bits() + 3>(q, w); | ||
| 127 | // The computed 'product' is always sufficient. | ||
| 128 | // Mathematical proof: | ||
| 129 | // Noble Mushtak and Daniel Lemire, Fast Number Parsing Without Fallback (to appear) | ||
| 130 | // See script/mushtak_lemire.py | ||
| 131 | |||
| 132 | // The "compute_product_approximation" function can be slightly slower than a branchless approach: | ||
| 133 | // value128 product = compute_product(q, w); | ||
| 134 | // but in practice, we can win big with the compute_product_approximation if its additional branch | ||
| 135 | // is easily predicted. Which is best is data specific. | ||
| 136 | 2002893 | int upperbit = int(product.high >> 63); | |
| 137 | |||
| 138 | 2002893 | answer.mantissa = product.high >> (upperbit + 64 - binary::mantissa_explicit_bits() - 3); | |
| 139 | |||
| 140 | 4005786 | answer.power2 = int32_t(detail::power(int32_t(q)) + upperbit - lz - binary::minimum_exponent()); | |
| 141 |
3/4✓ Branch 0 taken 82 times.
✓ Branch 1 taken 1002878 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 999933 times.
|
2002893 | if (answer.power2 <= 0) { // we have a subnormal? |
| 142 | // Here have that answer.power2 <= 0 so -answer.power2 >= 0 | ||
| 143 |
1/4✗ Branch 0 not taken.
✓ Branch 1 taken 82 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
82 | if(-answer.power2 + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure. |
| 144 | ✗ | answer.power2 = 0; | |
| 145 | ✗ | answer.mantissa = 0; | |
| 146 | // result should be zero | ||
| 147 | ✗ | return answer; | |
| 148 | } | ||
| 149 | // next line is safe because -answer.power2 + 1 < 64 | ||
| 150 | 82 | answer.mantissa >>= -answer.power2 + 1; | |
| 151 | // Thankfully, we can't have both "round-to-even" and subnormals because | ||
| 152 | // "round-to-even" only occurs for powers close to 0. | ||
| 153 | 82 | answer.mantissa += (answer.mantissa & 1); // round up | |
| 154 | 82 | answer.mantissa >>= 1; | |
| 155 | // There is a weird scenario where we don't have a subnormal but just. | ||
| 156 | // Suppose we start with 2.2250738585072013e-308, we end up | ||
| 157 | // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal | ||
| 158 | // whereas 0x40000000000000 x 2^-1023-53 is normal. Now, we need to round | ||
| 159 | // up 0x3fffffffffffff x 2^-1023-53 and once we do, we are no longer | ||
| 160 | // subnormal, but we can only know this after rounding. | ||
| 161 | // So we only declare a subnormal if we are smaller than the threshold. | ||
| 162 | 82 | answer.power2 = (answer.mantissa < (uint64_t(1) << binary::mantissa_explicit_bits())) ? 0 : 1; | |
| 163 | 82 | return answer; | |
| 164 | } | ||
| 165 | |||
| 166 | // usually, we round *up*, but if we fall right in between and and we have an | ||
| 167 | // even basis, we need to round down | ||
| 168 | // We are only concerned with the cases where 5**q fits in single 64-bit word. | ||
| 169 |
15/16✓ Branch 0 taken 5326 times.
✓ Branch 1 taken 997552 times.
✓ Branch 3 taken 5324 times.
✓ Branch 4 taken 2 times.
✓ Branch 6 taken 3158 times.
✓ Branch 7 taken 2166 times.
✓ Branch 8 taken 800 times.
✓ Branch 9 taken 1002078 times.
✓ Branch 10 taken 3929 times.
✓ Branch 11 taken 996004 times.
✓ Branch 13 taken 3569 times.
✓ Branch 14 taken 360 times.
✓ Branch 16 taken 3569 times.
✗ Branch 17 not taken.
✓ Branch 18 taken 1057 times.
✓ Branch 19 taken 998876 times.
|
2009538 | if ((product.low <= 1) && (q >= binary::min_exponent_round_to_even()) && (q <= binary::max_exponent_round_to_even()) && |
| 170 |
4/4✓ Branch 0 taken 800 times.
✓ Branch 1 taken 2358 times.
✓ Branch 2 taken 1057 times.
✓ Branch 3 taken 2512 times.
|
6727 | ((answer.mantissa & 3) == 1) ) { // we may fall between two floats! |
| 171 | // To be in-between two floats we need that in doing | ||
| 172 | // answer.mantissa = product.high >> (upperbit + 64 - binary::mantissa_explicit_bits() - 3); | ||
| 173 | // ... we dropped out only zeroes. But if this happened, then we can go back!!! | ||
| 174 |
4/4✓ Branch 1 taken 4 times.
✓ Branch 2 taken 796 times.
✓ Branch 4 taken 10 times.
✓ Branch 5 taken 1047 times.
|
1857 | if((answer.mantissa << (upperbit + 64 - binary::mantissa_explicit_bits() - 3)) == product.high) { |
| 175 | 14 | answer.mantissa &= ~uint64_t(1); // flip it so that we do not round up | |
| 176 | } | ||
| 177 | } | ||
| 178 | |||
| 179 | 2002811 | answer.mantissa += (answer.mantissa & 1); // round up | |
| 180 | 2002811 | answer.mantissa >>= 1; | |
| 181 |
3/4✓ Branch 1 taken 360 times.
✓ Branch 2 taken 1002518 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 999933 times.
|
2002811 | if (answer.mantissa >= (uint64_t(2) << binary::mantissa_explicit_bits())) { |
| 182 | 360 | answer.mantissa = (uint64_t(1) << binary::mantissa_explicit_bits()); | |
| 183 | 360 | answer.power2++; // undo previous addition | |
| 184 | } | ||
| 185 | |||
| 186 | 2002811 | answer.mantissa &= ~(uint64_t(1) << binary::mantissa_explicit_bits()); | |
| 187 |
4/4✓ Branch 1 taken 29829 times.
✓ Branch 2 taken 973049 times.
✓ Branch 4 taken 29829 times.
✓ Branch 5 taken 970104 times.
|
2002811 | if (answer.power2 >= binary::infinite_power()) { // infinity |
| 188 | 59658 | answer.power2 = binary::infinite_power(); | |
| 189 | 59658 | answer.mantissa = 0; | |
| 190 | } | ||
| 191 | 2002811 | return answer; | |
| 192 | } | ||
| 193 | |||
| 194 | }}}}}} // namespace fast_float | ||
| 195 | |||
| 196 | #endif | ||
| 197 |