| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | // Copyright 2020-2023 Daniel Lemire | ||
| 2 | // Copyright 2023 Matt Borland | ||
| 3 | // Distributed under the Boost Software License, Version 1.0. | ||
| 4 | // https://www.boost.org/LICENSE_1_0.txt | ||
| 5 | // | ||
| 6 | // Derivative of: https://github.com/fastfloat/fast_float | ||
| 7 | |||
| 8 | #ifndef BOOST_JSON_DETAIL_CHARCONV_DETAIL_FASTFLOAT_BIGINT_HPP | ||
| 9 | #define BOOST_JSON_DETAIL_CHARCONV_DETAIL_FASTFLOAT_BIGINT_HPP | ||
| 10 | |||
| 11 | #include <boost/json/detail/charconv/detail/fast_float/float_common.hpp> | ||
| 12 | #include <algorithm> | ||
| 13 | #include <cstdint> | ||
| 14 | #include <climits> | ||
| 15 | #include <cstring> | ||
| 16 | |||
| 17 | namespace boost { namespace json { namespace detail { namespace charconv { namespace detail { namespace fast_float { | ||
| 18 | |||
| 19 | // the limb width: we want efficient multiplication of double the bits in | ||
| 20 | // limb, or for 64-bit limbs, at least 64-bit multiplication where we can | ||
| 21 | // extract the high and low parts efficiently. this is every 64-bit | ||
| 22 | // architecture except for sparc, which emulates 128-bit multiplication. | ||
| 23 | // we might have platforms where `CHAR_BIT` is not 8, so let's avoid | ||
| 24 | // doing `8 * sizeof(limb)`. | ||
| 25 | #if defined(BOOST_JSON_FASTFLOAT_64BIT) && !defined(__sparc) | ||
| 26 | #define BOOST_JSON_FASTFLOAT_64BIT_LIMB 1 | ||
| 27 | typedef uint64_t limb; | ||
| 28 | constexpr size_t limb_bits = 64; | ||
| 29 | #else | ||
| 30 | #define BOOST_JSON_FASTFLOAT_32BIT_LIMB | ||
| 31 | typedef uint32_t limb; | ||
| 32 | constexpr size_t limb_bits = 32; | ||
| 33 | #endif | ||
| 34 | |||
| 35 | typedef span<limb> limb_span; | ||
| 36 | |||
| 37 | // number of bits in a bigint. this needs to be at least the number | ||
| 38 | // of bits required to store the largest bigint, which is | ||
| 39 | // `log2(10**(digits + max_exp))`, or `log2(10**(767 + 342))`, or | ||
| 40 | // ~3600 bits, so we round to 4000. | ||
| 41 | constexpr size_t bigint_bits = 4000; | ||
| 42 | constexpr size_t bigint_limbs = bigint_bits / limb_bits; | ||
| 43 | |||
| 44 | // vector-like type that is allocated on the stack. the entire | ||
| 45 | // buffer is pre-allocated, and only the length changes. | ||
| 46 | template <uint16_t size> | ||
| 47 | struct stackvec { | ||
| 48 | limb data[size]; | ||
| 49 | // we never need more than 150 limbs | ||
| 50 | uint16_t length{0}; | ||
| 51 | |||
| 52 | 12693 | stackvec() = default; | |
| 53 | stackvec(const stackvec &) = delete; | ||
| 54 | stackvec &operator=(const stackvec &) = delete; | ||
| 55 | stackvec(stackvec &&) = delete; | ||
| 56 | stackvec &operator=(stackvec &&other) = delete; | ||
| 57 | |||
| 58 | // create stack vector from existing limb span. | ||
| 59 | 2024 | BOOST_JSON_FASTFLOAT_CONSTEXPR20 stackvec(limb_span s) { | |
| 60 | 2024 | try_extend(s); | |
| 61 | 2024 | } | |
| 62 | |||
| 63 | 255428 | BOOST_JSON_CXX14_CONSTEXPR limb& operator[](size_t index) noexcept { | |
| 64 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 255428 times.
|
255428 | BOOST_ASSERT(index < length); |
| 65 | 255428 | return data[index]; | |
| 66 | } | ||
| 67 | 6198 | BOOST_JSON_CXX14_CONSTEXPR const limb& operator[](size_t index) const noexcept { | |
| 68 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 6198 times.
|
6198 | BOOST_ASSERT(index < length); |
| 69 | 6198 | return data[index]; | |
| 70 | } | ||
| 71 | // index from the end of the container | ||
| 72 | 9129 | BOOST_JSON_CXX14_CONSTEXPR const limb& rindex(size_t index) const noexcept { | |
| 73 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 9129 times.
|
9129 | BOOST_ASSERT(index < length); |
| 74 | 9129 | size_t rindex = length - index - 1; | |
| 75 | 9129 | return data[rindex]; | |
| 76 | } | ||
| 77 | |||
| 78 | // set the length, without bounds checking. | ||
| 79 | 28303 | BOOST_JSON_CXX14_CONSTEXPR void set_len(size_t len) noexcept { | |
| 80 | 28303 | length = uint16_t(len); | |
| 81 | 28303 | } | |
| 82 | 259478 | constexpr size_t len() const noexcept { | |
| 83 | 259478 | return length; | |
| 84 | } | ||
| 85 | 3789 | constexpr bool is_empty() const noexcept { | |
| 86 | 3789 | return length == 0; | |
| 87 | } | ||
| 88 | 45829 | constexpr size_t capacity() const noexcept { | |
| 89 | 45829 | return size; | |
| 90 | } | ||
| 91 | // append item to vector, without bounds checking | ||
| 92 | 27233 | BOOST_JSON_CXX14_CONSTEXPR void push_unchecked(limb value) noexcept { | |
| 93 | 27233 | data[length] = value; | |
| 94 | 27233 | length++; | |
| 95 | 27233 | } | |
| 96 | // append item to vector, returning if item was added | ||
| 97 | 25622 | BOOST_JSON_CXX14_CONSTEXPR bool try_push(limb value) noexcept { | |
| 98 |
1/2✓ Branch 2 taken 25622 times.
✗ Branch 3 not taken.
|
25622 | if (len() < capacity()) { |
| 99 | 25622 | push_unchecked(value); | |
| 100 | 25622 | return true; | |
| 101 | } else { | ||
| 102 | ✗ | return false; | |
| 103 | } | ||
| 104 | } | ||
| 105 | // add items to the vector, from a span, without bounds checking | ||
| 106 | 10120 | BOOST_JSON_FASTFLOAT_CONSTEXPR20 void extend_unchecked(limb_span s) noexcept { | |
| 107 | 10120 | limb* ptr = data + length; | |
| 108 | 10120 | std::copy_n(s.ptr, s.len(), ptr); | |
| 109 | 10120 | set_len(len() + s.len()); | |
| 110 | 10120 | } | |
| 111 | // try to add items to the vector, returning if items were added | ||
| 112 | 10120 | BOOST_JSON_FASTFLOAT_CONSTEXPR20 bool try_extend(limb_span s) noexcept { | |
| 113 |
1/2✓ Branch 3 taken 10120 times.
✗ Branch 4 not taken.
|
10120 | if (len() + s.len() <= capacity()) { |
| 114 | 10120 | extend_unchecked(s); | |
| 115 | 10120 | return true; | |
| 116 | } else { | ||
| 117 | ✗ | return false; | |
| 118 | } | ||
| 119 | } | ||
| 120 | // resize the vector, without bounds checking | ||
| 121 | // if the new size is longer than the vector, assign value to each | ||
| 122 | // appended item. | ||
| 123 | BOOST_JSON_FASTFLOAT_CONSTEXPR20 | ||
| 124 | 7673 | void resize_unchecked(size_t new_len, limb value) noexcept { | |
| 125 |
1/2✓ Branch 1 taken 7673 times.
✗ Branch 2 not taken.
|
7673 | if (new_len > len()) { |
| 126 | 7673 | size_t count = new_len - len(); | |
| 127 | 7673 | limb* first = data + len(); | |
| 128 | 7673 | limb* last = first + count; | |
| 129 | 7673 | ::std::fill(first, last, value); | |
| 130 | 7673 | set_len(new_len); | |
| 131 | } else { | ||
| 132 | ✗ | set_len(new_len); | |
| 133 | } | ||
| 134 | 7673 | } | |
| 135 | // try to resize the vector, returning if the vector was resized. | ||
| 136 | 7673 | BOOST_JSON_FASTFLOAT_CONSTEXPR20 bool try_resize(size_t new_len, limb value) noexcept { | |
| 137 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 7673 times.
|
7673 | if (new_len > capacity()) { |
| 138 | ✗ | return false; | |
| 139 | } else { | ||
| 140 | 7673 | resize_unchecked(new_len, value); | |
| 141 | 7673 | return true; | |
| 142 | } | ||
| 143 | } | ||
| 144 | // check if any limbs are non-zero after the given index. | ||
| 145 | // this needs to be done in reverse order, since the index | ||
| 146 | // is relative to the most significant limbs. | ||
| 147 | 1375 | BOOST_JSON_CXX14_CONSTEXPR bool nonzero(size_t index) const noexcept { | |
| 148 |
2/2✓ Branch 1 taken 1369 times.
✓ Branch 2 taken 6 times.
|
1375 | while (index < len()) { |
| 149 |
1/2✓ Branch 1 taken 1369 times.
✗ Branch 2 not taken.
|
1369 | if (rindex(index) != 0) { |
| 150 | 1369 | return true; | |
| 151 | } | ||
| 152 | ✗ | index++; | |
| 153 | } | ||
| 154 | 6 | return false; | |
| 155 | } | ||
| 156 | // normalize the big integer, so most-significant zero limbs are removed. | ||
| 157 | 3635 | BOOST_JSON_CXX14_CONSTEXPR void normalize() noexcept { | |
| 158 |
3/6✓ Branch 1 taken 3635 times.
✗ Branch 2 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 3635 times.
✗ Branch 6 not taken.
✓ Branch 7 taken 3635 times.
|
3635 | while (len() > 0 && rindex(0) == 0) { |
| 159 | ✗ | length--; | |
| 160 | } | ||
| 161 | 3635 | } | |
| 162 | }; | ||
| 163 | |||
| 164 | BOOST_FORCEINLINE BOOST_JSON_CXX14_CONSTEXPR_NO_INLINE | ||
| 165 | uint64_t empty_hi64(bool& truncated) noexcept { | ||
| 166 | ✗ | truncated = false; | |
| 167 | ✗ | return 0; | |
| 168 | } | ||
| 169 | |||
| 170 | BOOST_FORCEINLINE BOOST_JSON_FASTFLOAT_CONSTEXPR20 | ||
| 171 | uint64_t uint64_hi64(uint64_t r0, bool& truncated) noexcept { | ||
| 172 | ✗ | truncated = false; | |
| 173 | ✗ | int shl = leading_zeroes(r0); | |
| 174 | ✗ | return r0 << shl; | |
| 175 | } | ||
| 176 | |||
| 177 | BOOST_FORCEINLINE BOOST_JSON_FASTFLOAT_CONSTEXPR20 | ||
| 178 | uint64_t uint64_hi64(uint64_t r0, uint64_t r1, bool& truncated) noexcept { | ||
| 179 | 1375 | int shl = leading_zeroes(r0); | |
| 180 |
2/2✓ Branch 0 taken 27 times.
✓ Branch 1 taken 1348 times.
|
1375 | if (shl == 0) { |
| 181 | 27 | truncated = r1 != 0; | |
| 182 | 27 | return r0; | |
| 183 | } else { | ||
| 184 | 1348 | int shr = 64 - shl; | |
| 185 | 1348 | truncated = (r1 << shl) != 0; | |
| 186 | 1348 | return (r0 << shl) | (r1 >> shr); | |
| 187 | } | ||
| 188 | } | ||
| 189 | |||
| 190 | BOOST_FORCEINLINE BOOST_JSON_FASTFLOAT_CONSTEXPR20 | ||
| 191 | uint64_t uint32_hi64(uint32_t r0, bool& truncated) noexcept { | ||
| 192 | return uint64_hi64(r0, truncated); | ||
| 193 | } | ||
| 194 | |||
| 195 | BOOST_FORCEINLINE BOOST_JSON_FASTFLOAT_CONSTEXPR20 | ||
| 196 | uint64_t uint32_hi64(uint32_t r0, uint32_t r1, bool& truncated) noexcept { | ||
| 197 | uint64_t x0 = r0; | ||
| 198 | uint64_t x1 = r1; | ||
| 199 | return uint64_hi64((x0 << 32) | x1, truncated); | ||
| 200 | } | ||
| 201 | |||
| 202 | BOOST_FORCEINLINE BOOST_JSON_FASTFLOAT_CONSTEXPR20 | ||
| 203 | uint64_t uint32_hi64(uint32_t r0, uint32_t r1, uint32_t r2, bool& truncated) noexcept { | ||
| 204 | uint64_t x0 = r0; | ||
| 205 | uint64_t x1 = r1; | ||
| 206 | uint64_t x2 = r2; | ||
| 207 | return uint64_hi64(x0, (x1 << 32) | x2, truncated); | ||
| 208 | } | ||
| 209 | |||
| 210 | // add two small integers, checking for overflow. | ||
| 211 | // we want an efficient operation. for msvc, where | ||
| 212 | // we don't have built-in intrinsics, this is still | ||
| 213 | // pretty fast. | ||
| 214 | BOOST_FORCEINLINE BOOST_JSON_FASTFLOAT_CONSTEXPR20 | ||
| 215 | limb scalar_add(limb x, limb y, bool& overflow) noexcept { | ||
| 216 | limb z; | ||
| 217 | // gcc and clang | ||
| 218 | #if defined(__has_builtin) | ||
| 219 | #if __has_builtin(__builtin_add_overflow) | ||
| 220 |
3/6✓ Branch 0 taken 26245 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 5566 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 6679 times.
✗ Branch 5 not taken.
|
38490 | if (!cpp20_and_in_constexpr()) { |
| 221 | 38490 | overflow = __builtin_add_overflow(x, y, &z); | |
| 222 | 38490 | return z; | |
| 223 | } | ||
| 224 | #endif | ||
| 225 | #endif | ||
| 226 | |||
| 227 | // generic, this still optimizes correctly on MSVC. | ||
| 228 | ✗ | z = x + y; | |
| 229 | ✗ | overflow = z < x; | |
| 230 | ✗ | return z; | |
| 231 | } | ||
| 232 | |||
| 233 | // multiply two small integers, getting both the high and low bits. | ||
| 234 | BOOST_FORCEINLINE BOOST_JSON_FASTFLOAT_CONSTEXPR20 | ||
| 235 | limb scalar_mul(limb x, limb y, limb& carry) noexcept { | ||
| 236 | #ifdef BOOST_JSON_FASTFLOAT_64BIT_LIMB | ||
| 237 | #if defined(__SIZEOF_INT128__) | ||
| 238 | // GCC and clang both define it as an extension. | ||
| 239 | 80707 | __uint128_t z = __uint128_t(x) * __uint128_t(y) + __uint128_t(carry); | |
| 240 | 80707 | carry = limb(z >> limb_bits); | |
| 241 | 80707 | return limb(z); | |
| 242 | #else | ||
| 243 | // fallback, no native 128-bit integer multiplication with carry. | ||
| 244 | // on msvc, this optimizes identically, somehow. | ||
| 245 | value128 z = full_multiplication(x, y); | ||
| 246 | bool overflow; | ||
| 247 | z.low = scalar_add(z.low, carry, overflow); | ||
| 248 | z.high += uint64_t(overflow); // cannot overflow | ||
| 249 | carry = z.high; | ||
| 250 | return z.low; | ||
| 251 | #endif | ||
| 252 | #else | ||
| 253 | uint64_t z = uint64_t(x) * uint64_t(y) + uint64_t(carry); | ||
| 254 | carry = limb(z >> limb_bits); | ||
| 255 | return limb(z); | ||
| 256 | #endif | ||
| 257 | } | ||
| 258 | |||
| 259 | // add scalar value to bigint starting from offset. | ||
| 260 | // used in grade school multiplication | ||
| 261 | template <uint16_t size> | ||
| 262 | inline BOOST_JSON_FASTFLOAT_CONSTEXPR20 | ||
| 263 | 9437 | bool small_add_from(stackvec<size>& vec, limb y, size_t start) noexcept { | |
| 264 | 9437 | size_t index = start; | |
| 265 | 9437 | limb carry = y; | |
| 266 | bool overflow; | ||
| 267 |
6/6✓ Branch 0 taken 9665 times.
✓ Branch 1 taken 6451 times.
✓ Branch 3 taken 6679 times.
✓ Branch 4 taken 2986 times.
✓ Branch 5 taken 6679 times.
✓ Branch 6 taken 9437 times.
|
16116 | while (carry != 0 && index < vec.len()) { |
| 268 | 6679 | vec[index] = scalar_add(vec[index], carry, overflow); | |
| 269 | 6679 | carry = limb(overflow); | |
| 270 | 6679 | index += 1; | |
| 271 | } | ||
| 272 |
2/2✓ Branch 0 taken 2986 times.
✓ Branch 1 taken 6451 times.
|
9437 | if (carry != 0) { |
| 273 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 2986 times.
|
2986 | BOOST_JSON_FASTFLOAT_TRY(vec.try_push(carry)); |
| 274 | } | ||
| 275 | 9437 | return true; | |
| 276 | } | ||
| 277 | |||
| 278 | // add scalar value to bigint. | ||
| 279 | template <uint16_t size> | ||
| 280 | BOOST_FORCEINLINE BOOST_JSON_FASTFLOAT_CONSTEXPR20 | ||
| 281 | bool small_add(stackvec<size>& vec, limb y) noexcept { | ||
| 282 | 9437 | return small_add_from(vec, y, 0); | |
| 283 | } | ||
| 284 | |||
| 285 | // multiply bigint by scalar value. | ||
| 286 | template <uint16_t size> | ||
| 287 | inline BOOST_JSON_FASTFLOAT_CONSTEXPR20 | ||
| 288 | 28029 | bool small_mul(stackvec<size>& vec, limb y) noexcept { | |
| 289 | 28029 | limb carry = 0; | |
| 290 |
2/2✓ Branch 1 taken 80707 times.
✓ Branch 2 taken 28029 times.
|
108736 | for (size_t index = 0; index < vec.len(); index++) { |
| 291 | 161414 | vec[index] = scalar_mul(vec[index], y, carry); | |
| 292 | } | ||
| 293 |
2/2✓ Branch 0 taken 21102 times.
✓ Branch 1 taken 6927 times.
|
28029 | if (carry != 0) { |
| 294 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 21102 times.
|
21102 | BOOST_JSON_FASTFLOAT_TRY(vec.try_push(carry)); |
| 295 | } | ||
| 296 | 28029 | return true; | |
| 297 | } | ||
| 298 | |||
| 299 | // add bigint to bigint starting from index. | ||
| 300 | // used in grade school multiplication | ||
| 301 | template <uint16_t size> | ||
| 302 | BOOST_JSON_FASTFLOAT_CONSTEXPR20 | ||
| 303 | 8096 | bool large_add_from(stackvec<size>& x, limb_span y, size_t start) noexcept { | |
| 304 | // the effective x buffer is from `xstart..x.len()`, so exit early | ||
| 305 | // if we can't get that current range. | ||
| 306 |
5/6✓ Branch 1 taken 8096 times.
✗ Branch 2 not taken.
✓ Branch 5 taken 7673 times.
✓ Branch 6 taken 423 times.
✓ Branch 7 taken 7673 times.
✓ Branch 8 taken 423 times.
|
8096 | if (x.len() < start || y.len() > x.len() - start) { |
| 307 |
1/2✗ Branch 2 not taken.
✓ Branch 3 taken 7673 times.
|
7673 | BOOST_JSON_FASTFLOAT_TRY(x.try_resize(y.len() + start, 0)); |
| 308 | } | ||
| 309 | |||
| 310 | 8096 | bool carry = false; | |
| 311 |
2/2✓ Branch 1 taken 26245 times.
✓ Branch 2 taken 8096 times.
|
34341 | for (size_t index = 0; index < y.len(); index++) { |
| 312 | 26245 | limb xi = x[index + start]; | |
| 313 | 26245 | limb yi = y[index]; | |
| 314 | 26245 | bool c1 = false; | |
| 315 | 26245 | bool c2 = false; | |
| 316 | 26245 | xi = scalar_add(xi, yi, c1); | |
| 317 |
2/2✓ Branch 0 taken 5566 times.
✓ Branch 1 taken 20679 times.
|
26245 | if (carry) { |
| 318 | 5566 | xi = scalar_add(xi, 1, c2); | |
| 319 | } | ||
| 320 | 26245 | x[index + start] = xi; | |
| 321 | 26245 | carry = c1 | c2; | |
| 322 | } | ||
| 323 | |||
| 324 | // handle overflow | ||
| 325 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 8096 times.
|
8096 | if (carry) { |
| 326 | ✗ | BOOST_JSON_FASTFLOAT_TRY(small_add_from(x, 1, y.len() + start)); | |
| 327 | } | ||
| 328 | 8096 | return true; | |
| 329 | } | ||
| 330 | |||
| 331 | // add bigint to bigint. | ||
| 332 | template <uint16_t size> | ||
| 333 | BOOST_FORCEINLINE BOOST_JSON_FASTFLOAT_CONSTEXPR20 | ||
| 334 | bool large_add_from(stackvec<size>& x, limb_span y) noexcept { | ||
| 335 | return large_add_from(x, y, 0); | ||
| 336 | } | ||
| 337 | |||
| 338 | // grade-school multiplication algorithm | ||
| 339 | template <uint16_t size> | ||
| 340 | BOOST_JSON_FASTFLOAT_CONSTEXPR20 | ||
| 341 | 2024 | bool long_mul(stackvec<size>& x, limb_span y) noexcept { | |
| 342 | 2024 | limb_span xs = limb_span(x.data, x.len()); | |
| 343 | 2024 | stackvec<size> z(xs); | |
| 344 | 2024 | limb_span zs = limb_span(z.data, z.len()); | |
| 345 | |||
| 346 |
1/2✓ Branch 1 taken 2024 times.
✗ Branch 2 not taken.
|
2024 | if (y.len() != 0) { |
| 347 | 2024 | limb y0 = y[0]; | |
| 348 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 2024 times.
|
2024 | BOOST_JSON_FASTFLOAT_TRY(small_mul(x, y0)); |
| 349 |
2/2✓ Branch 1 taken 8096 times.
✓ Branch 2 taken 2024 times.
|
10120 | for (size_t index = 1; index < y.len(); index++) { |
| 350 | 8096 | limb yi = y[index]; | |
| 351 | 8096 | stackvec<size> zi; | |
| 352 |
1/2✓ Branch 0 taken 8096 times.
✗ Branch 1 not taken.
|
8096 | if (yi != 0) { |
| 353 | // re-use the same buffer throughout | ||
| 354 | 8096 | zi.set_len(0); | |
| 355 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 8096 times.
|
8096 | BOOST_JSON_FASTFLOAT_TRY(zi.try_extend(zs)); |
| 356 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 8096 times.
|
8096 | BOOST_JSON_FASTFLOAT_TRY(small_mul(zi, yi)); |
| 357 | 8096 | limb_span zis = limb_span(zi.data, zi.len()); | |
| 358 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 8096 times.
|
8096 | BOOST_JSON_FASTFLOAT_TRY(large_add_from(x, zis, index)); |
| 359 | } | ||
| 360 | } | ||
| 361 | } | ||
| 362 | |||
| 363 | 2024 | x.normalize(); | |
| 364 | 2024 | return true; | |
| 365 | } | ||
| 366 | |||
| 367 | // grade-school multiplication algorithm | ||
| 368 | template <uint16_t size> | ||
| 369 | BOOST_JSON_FASTFLOAT_CONSTEXPR20 | ||
| 370 | 2024 | bool large_mul(stackvec<size>& x, limb_span y) noexcept { | |
| 371 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 2024 times.
|
2024 | if (y.len() == 1) { |
| 372 | ✗ | BOOST_JSON_FASTFLOAT_TRY(small_mul(x, y[0])); | |
| 373 | } else { | ||
| 374 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 2024 times.
|
2024 | BOOST_JSON_FASTFLOAT_TRY(long_mul(x, y)); |
| 375 | } | ||
| 376 | 2024 | return true; | |
| 377 | } | ||
| 378 | |||
| 379 | template <typename = void> | ||
| 380 | struct pow5_tables { | ||
| 381 | static constexpr uint32_t large_step = 135; | ||
| 382 | static constexpr uint64_t small_power_of_5[] = { | ||
| 383 | 1UL, 5UL, 25UL, 125UL, 625UL, 3125UL, 15625UL, 78125UL, 390625UL, | ||
| 384 | 1953125UL, 9765625UL, 48828125UL, 244140625UL, 1220703125UL, | ||
| 385 | 6103515625UL, 30517578125UL, 152587890625UL, 762939453125UL, | ||
| 386 | 3814697265625UL, 19073486328125UL, 95367431640625UL, 476837158203125UL, | ||
| 387 | 2384185791015625UL, 11920928955078125UL, 59604644775390625UL, | ||
| 388 | 298023223876953125UL, 1490116119384765625UL, 7450580596923828125UL, | ||
| 389 | }; | ||
| 390 | #ifdef BOOST_JSON_FASTFLOAT_64BIT_LIMB | ||
| 391 | constexpr static limb large_power_of_5[] = { | ||
| 392 | 1414648277510068013UL, 9180637584431281687UL, 4539964771860779200UL, | ||
| 393 | 10482974169319127550UL, 198276706040285095UL}; | ||
| 394 | #else | ||
| 395 | constexpr static limb large_power_of_5[] = { | ||
| 396 | 4279965485U, 329373468U, 4020270615U, 2137533757U, 4287402176U, | ||
| 397 | 1057042919U, 1071430142U, 2440757623U, 381945767U, 46164893U}; | ||
| 398 | #endif | ||
| 399 | }; | ||
| 400 | |||
| 401 | template <typename T> | ||
| 402 | constexpr uint32_t pow5_tables<T>::large_step; | ||
| 403 | |||
| 404 | template <typename T> | ||
| 405 | constexpr uint64_t pow5_tables<T>::small_power_of_5[]; | ||
| 406 | |||
| 407 | template <typename T> | ||
| 408 | constexpr limb pow5_tables<T>::large_power_of_5[]; | ||
| 409 | |||
| 410 | // big integer type. implements a small subset of big integer | ||
| 411 | // arithmetic, using simple algorithms since asymptotically | ||
| 412 | // faster algorithms are slower for a small number of limbs. | ||
| 413 | // all operations assume the big-integer is normalized. | ||
| 414 | struct bigint : pow5_tables<> { | ||
| 415 | // storage of the limbs, in little-endian order. | ||
| 416 | stackvec<bigint_limbs> vec; | ||
| 417 | |||
| 418 | 2986 | BOOST_JSON_FASTFLOAT_CONSTEXPR20 bigint(): vec() {} | |
| 419 | bigint(const bigint &) = delete; | ||
| 420 | bigint &operator=(const bigint &) = delete; | ||
| 421 | bigint(bigint &&) = delete; | ||
| 422 | bigint &operator=(bigint &&other) = delete; | ||
| 423 | |||
| 424 | 1611 | BOOST_JSON_FASTFLOAT_CONSTEXPR20 bigint(uint64_t value): vec() { | |
| 425 | #ifdef BOOST_JSON_FASTFLOAT_64BIT_LIMB | ||
| 426 | 1611 | vec.push_unchecked(value); | |
| 427 | #else | ||
| 428 | vec.push_unchecked(uint32_t(value)); | ||
| 429 | vec.push_unchecked(uint32_t(value >> 32)); | ||
| 430 | #endif | ||
| 431 | 1611 | vec.normalize(); | |
| 432 | 1611 | } | |
| 433 | |||
| 434 | // get the high 64 bits from the vector, and if bits were truncated. | ||
| 435 | // this is to get the significant digits for the float. | ||
| 436 | 1375 | BOOST_JSON_FASTFLOAT_CONSTEXPR20 uint64_t hi64(bool& truncated) const noexcept { | |
| 437 | #ifdef BOOST_JSON_FASTFLOAT_64BIT_LIMB | ||
| 438 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 1375 times.
|
1375 | if (vec.len() == 0) { |
| 439 | ✗ | return empty_hi64(truncated); | |
| 440 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 1375 times.
|
1375 | } else if (vec.len() == 1) { |
| 441 | ✗ | return uint64_hi64(vec.rindex(0), truncated); | |
| 442 | } else { | ||
| 443 |
1/2✗ Branch 2 not taken.
✓ Branch 3 taken 1375 times.
|
1375 | uint64_t result = uint64_hi64(vec.rindex(0), vec.rindex(1), truncated); |
| 444 | 1375 | truncated |= vec.nonzero(2); | |
| 445 | 1375 | return result; | |
| 446 | } | ||
| 447 | #else | ||
| 448 | if (vec.len() == 0) { | ||
| 449 | return empty_hi64(truncated); | ||
| 450 | } else if (vec.len() == 1) { | ||
| 451 | return uint32_hi64(vec.rindex(0), truncated); | ||
| 452 | } else if (vec.len() == 2) { | ||
| 453 | return uint32_hi64(vec.rindex(0), vec.rindex(1), truncated); | ||
| 454 | } else { | ||
| 455 | uint64_t result = uint32_hi64(vec.rindex(0), vec.rindex(1), vec.rindex(2), truncated); | ||
| 456 | truncated |= vec.nonzero(3); | ||
| 457 | return result; | ||
| 458 | } | ||
| 459 | #endif | ||
| 460 | } | ||
| 461 | |||
| 462 | // compare two big integers, returning the large value. | ||
| 463 | // assumes both are normalized. if the return value is | ||
| 464 | // negative, other is larger, if the return value is | ||
| 465 | // positive, this is larger, otherwise they are equal. | ||
| 466 | // the limbs are stored in little-endian order, so we | ||
| 467 | // must compare the limbs in ever order. | ||
| 468 | 1611 | BOOST_JSON_FASTFLOAT_CONSTEXPR20 int compare(const bigint& other) const noexcept { | |
| 469 |
1/2✗ Branch 2 not taken.
✓ Branch 3 taken 1611 times.
|
1611 | if (vec.len() > other.vec.len()) { |
| 470 | ✗ | return 1; | |
| 471 |
1/2✗ Branch 2 not taken.
✓ Branch 3 taken 1611 times.
|
1611 | } else if (vec.len() < other.vec.len()) { |
| 472 | ✗ | return -1; | |
| 473 | } else { | ||
| 474 |
1/2✓ Branch 1 taken 3099 times.
✗ Branch 2 not taken.
|
3099 | for (size_t index = vec.len(); index > 0; index--) { |
| 475 | 3099 | limb xi = vec[index - 1]; | |
| 476 | 3099 | limb yi = other.vec[index - 1]; | |
| 477 |
2/2✓ Branch 0 taken 767 times.
✓ Branch 1 taken 2332 times.
|
3099 | if (xi > yi) { |
| 478 | 767 | return 1; | |
| 479 |
2/2✓ Branch 0 taken 844 times.
✓ Branch 1 taken 1488 times.
|
2332 | } else if (xi < yi) { |
| 480 | 844 | return -1; | |
| 481 | } | ||
| 482 | } | ||
| 483 | ✗ | return 0; | |
| 484 | } | ||
| 485 | } | ||
| 486 | |||
| 487 | // shift left each limb n bits, carrying over to the new limb | ||
| 488 | // returns true if we were able to shift all the digits. | ||
| 489 | 2931 | BOOST_JSON_FASTFLOAT_CONSTEXPR20 bool shl_bits(size_t n) noexcept { | |
| 490 | // Internally, for each item, we shift left by n, and add the previous | ||
| 491 | // right shifted limb-bits. | ||
| 492 | // For example, we transform (for u8) shifted left 2, to: | ||
| 493 | // b10100100 b01000010 | ||
| 494 | // b10 b10010001 b00001000 | ||
| 495 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2931 times.
|
2931 | BOOST_ASSERT(n != 0); |
| 496 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2931 times.
|
2931 | BOOST_ASSERT(n < sizeof(limb) * 8); |
| 497 | |||
| 498 | 2931 | size_t shl = n; | |
| 499 | 2931 | size_t shr = limb_bits - shl; | |
| 500 | 2931 | limb prev = 0; | |
| 501 |
2/2✓ Branch 1 taken 14083 times.
✓ Branch 2 taken 2931 times.
|
17014 | for (size_t index = 0; index < vec.len(); index++) { |
| 502 | 14083 | limb xi = vec[index]; | |
| 503 | 14083 | vec[index] = (xi << shl) | (prev >> shr); | |
| 504 | 14083 | prev = xi; | |
| 505 | } | ||
| 506 | |||
| 507 | 2931 | limb carry = prev >> shr; | |
| 508 |
2/2✓ Branch 0 taken 1534 times.
✓ Branch 1 taken 1397 times.
|
2931 | if (carry != 0) { |
| 509 | 1534 | return vec.try_push(carry); | |
| 510 | } | ||
| 511 | 1397 | return true; | |
| 512 | } | ||
| 513 | |||
| 514 | // move the limbs left by `n` limbs. | ||
| 515 | 2414 | BOOST_JSON_FASTFLOAT_CONSTEXPR20 bool shl_limbs(size_t n) noexcept { | |
| 516 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2414 times.
|
2414 | BOOST_ASSERT(n != 0); |
| 517 |
1/2✗ Branch 2 not taken.
✓ Branch 3 taken 2414 times.
|
2414 | if (n + vec.len() > vec.capacity()) { |
| 518 | ✗ | return false; | |
| 519 |
1/2✓ Branch 1 taken 2414 times.
✗ Branch 2 not taken.
|
2414 | } else if (!vec.is_empty()) { |
| 520 | // move limbs | ||
| 521 | 2414 | limb* dst = vec.data + n; | |
| 522 | 2414 | const limb* src = vec.data; | |
| 523 | 2414 | std::copy_backward(src, src + vec.len(), dst + vec.len()); | |
| 524 | // fill in empty limbs | ||
| 525 | 2414 | limb* first = vec.data; | |
| 526 | 2414 | limb* last = first + n; | |
| 527 | 2414 | ::std::fill(first, last, 0); | |
| 528 | 2414 | vec.set_len(n + vec.len()); | |
| 529 | 2414 | return true; | |
| 530 | } else { | ||
| 531 | ✗ | return true; | |
| 532 | } | ||
| 533 | } | ||
| 534 | |||
| 535 | // move the limbs left by `n` bits. | ||
| 536 | 2984 | BOOST_JSON_FASTFLOAT_CONSTEXPR20 bool shl(size_t n) noexcept { | |
| 537 | 2984 | size_t rem = n % limb_bits; | |
| 538 | 2984 | size_t div = n / limb_bits; | |
| 539 |
2/2✓ Branch 0 taken 2931 times.
✓ Branch 1 taken 53 times.
|
2984 | if (rem != 0) { |
| 540 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 2931 times.
|
2931 | BOOST_JSON_FASTFLOAT_TRY(shl_bits(rem)); |
| 541 | } | ||
| 542 |
2/2✓ Branch 0 taken 2414 times.
✓ Branch 1 taken 570 times.
|
2984 | if (div != 0) { |
| 543 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 2414 times.
|
2414 | BOOST_JSON_FASTFLOAT_TRY(shl_limbs(div)); |
| 544 | } | ||
| 545 | 2984 | return true; | |
| 546 | } | ||
| 547 | |||
| 548 | // get the number of leading zeros in the bigint. | ||
| 549 | 1375 | BOOST_JSON_FASTFLOAT_CONSTEXPR20 int ctlz() const noexcept { | |
| 550 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 1375 times.
|
1375 | if (vec.is_empty()) { |
| 551 | ✗ | return 0; | |
| 552 | } else { | ||
| 553 | #ifdef BOOST_JSON_FASTFLOAT_64BIT_LIMB | ||
| 554 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 1375 times.
|
2750 | return leading_zeroes(vec.rindex(0)); |
| 555 | #else | ||
| 556 | // no use defining a specialized leading_zeroes for a 32-bit type. | ||
| 557 | uint64_t r0 = vec.rindex(0); | ||
| 558 | return leading_zeroes(r0 << 32); | ||
| 559 | #endif | ||
| 560 | } | ||
| 561 | } | ||
| 562 | |||
| 563 | // get the number of bits in the bigint. | ||
| 564 | 1375 | BOOST_JSON_FASTFLOAT_CONSTEXPR20 int bit_length() const noexcept { | |
| 565 | 1375 | int lz = ctlz(); | |
| 566 | 1375 | return int(limb_bits * vec.len()) - lz; | |
| 567 | } | ||
| 568 | |||
| 569 | 9437 | BOOST_JSON_FASTFLOAT_CONSTEXPR20 bool mul(limb y) noexcept { | |
| 570 | 9437 | return small_mul(vec, y); | |
| 571 | } | ||
| 572 | |||
| 573 | 9437 | BOOST_JSON_FASTFLOAT_CONSTEXPR20 bool add(limb y) noexcept { | |
| 574 | 18874 | return small_add(vec, y); | |
| 575 | } | ||
| 576 | |||
| 577 | // multiply as if by 2 raised to a power. | ||
| 578 | 2984 | BOOST_JSON_FASTFLOAT_CONSTEXPR20 bool pow2(uint32_t exp) noexcept { | |
| 579 | 2984 | return shl(exp); | |
| 580 | } | ||
| 581 | |||
| 582 | // multiply as if by 5 raised to a power. | ||
| 583 | 2986 | BOOST_JSON_FASTFLOAT_CONSTEXPR20 bool pow5(uint32_t exp) noexcept { | |
| 584 | // multiply by a power of 5 | ||
| 585 | 2986 | constexpr size_t large_length = sizeof(large_power_of_5) / sizeof(limb); | |
| 586 | 2986 | limb_span large = limb_span(large_power_of_5, large_length); | |
| 587 |
2/2✓ Branch 0 taken 2024 times.
✓ Branch 1 taken 2986 times.
|
5010 | while (exp >= large_step) { |
| 588 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 2024 times.
|
2024 | BOOST_JSON_FASTFLOAT_TRY(large_mul(vec, large)); |
| 589 | 2024 | exp -= large_step; | |
| 590 | } | ||
| 591 | #ifdef BOOST_JSON_FASTFLOAT_64BIT_LIMB | ||
| 592 | 2986 | constexpr uint32_t small_step = 27; | |
| 593 | 2986 | constexpr limb max_native = 7450580596923828125UL; | |
| 594 | #else | ||
| 595 | constexpr uint32_t small_step = 13; | ||
| 596 | constexpr limb max_native = 1220703125U; | ||
| 597 | #endif | ||
| 598 |
2/2✓ Branch 0 taken 5604 times.
✓ Branch 1 taken 2986 times.
|
8590 | while (exp >= small_step) { |
| 599 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 5604 times.
|
5604 | BOOST_JSON_FASTFLOAT_TRY(small_mul(vec, max_native)); |
| 600 | 5604 | exp -= small_step; | |
| 601 | } | ||
| 602 |
2/2✓ Branch 0 taken 2868 times.
✓ Branch 1 taken 118 times.
|
2986 | if (exp != 0) { |
| 603 | // Work around clang bug https://godbolt.org/z/zedh7rrhc | ||
| 604 | // This is similar to https://github.com/llvm/llvm-project/issues/47746, | ||
| 605 | // except the workaround described there don't work here | ||
| 606 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 2868 times.
|
2868 | BOOST_JSON_FASTFLOAT_TRY( |
| 607 | small_mul(vec, limb(((void)small_power_of_5[0], small_power_of_5[exp]))) | ||
| 608 | ); | ||
| 609 | } | ||
| 610 | |||
| 611 | 2986 | return true; | |
| 612 | } | ||
| 613 | |||
| 614 | // multiply as if by 10 raised to a power. | ||
| 615 | 1375 | BOOST_JSON_FASTFLOAT_CONSTEXPR20 bool pow10(uint32_t exp) noexcept { | |
| 616 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 1375 times.
|
1375 | BOOST_JSON_FASTFLOAT_TRY(pow5(exp)); |
| 617 | 1375 | return pow2(exp); | |
| 618 | } | ||
| 619 | }; | ||
| 620 | |||
| 621 | }}}}}} // namespace fast_float | ||
| 622 | |||
| 623 | #endif | ||
| 624 |