| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | // Copyright 2020-2023 Daniel Lemire | ||
| 2 | // Copyright 2023 Matt Borland | ||
| 3 | // Distributed under the Boost Software License, Version 1.0. | ||
| 4 | // https://www.boost.org/LICENSE_1_0.txt | ||
| 5 | // | ||
| 6 | // Derivative of: https://github.com/fastfloat/fast_float | ||
| 7 | |||
| 8 | #ifndef BOOST_JSON_DETAIL_CHARCONV_DETAIL_FASTFLOAT_ASCII_NUMBER_HPP | ||
| 9 | #define BOOST_JSON_DETAIL_CHARCONV_DETAIL_FASTFLOAT_ASCII_NUMBER_HPP | ||
| 10 | |||
| 11 | #include <boost/endian/conversion.hpp> | ||
| 12 | #include <boost/json/detail/charconv/detail/fast_float/float_common.hpp> | ||
| 13 | #include <cctype> | ||
| 14 | #include <cstdint> | ||
| 15 | #include <cstring> | ||
| 16 | #include <iterator> | ||
| 17 | |||
| 18 | namespace boost { namespace json { namespace detail { namespace charconv { namespace detail { namespace fast_float { | ||
| 19 | |||
| 20 | // Next function can be micro-optimized, but compilers are entirely | ||
| 21 | // able to optimize it well. | ||
| 22 | template <typename UC> | ||
| 23 | BOOST_FORCEINLINE constexpr bool is_integer(UC c) noexcept { | ||
| 24 |
6/10✓ Branch 0 taken 3902 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 19417139 times.
✓ Branch 3 taken 1006820 times.
✓ Branch 4 taken 3423102 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 1005136 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 3192086 times.
✗ Branch 9 not taken.
|
29053321 | return !(c > UC('9') || c < UC('0')); |
| 25 | } | ||
| 26 | |||
| 27 | BOOST_FORCEINLINE constexpr uint64_t byteswap(uint64_t val) { | ||
| 28 | return (val & 0xFF00000000000000) >> 56 | ||
| 29 | | (val & 0x00FF000000000000) >> 40 | ||
| 30 | | (val & 0x0000FF0000000000) >> 24 | ||
| 31 | | (val & 0x000000FF00000000) >> 8 | ||
| 32 | | (val & 0x00000000FF000000) << 8 | ||
| 33 | | (val & 0x0000000000FF0000) << 24 | ||
| 34 | | (val & 0x000000000000FF00) << 40 | ||
| 35 | | (val & 0x00000000000000FF) << 56; | ||
| 36 | } | ||
| 37 | |||
| 38 | BOOST_FORCEINLINE BOOST_JSON_FASTFLOAT_CONSTEXPR20 | ||
| 39 | uint64_t read_u64(const char *chars) { | ||
| 40 |
4/8✗ Branch 0 not taken.
✓ Branch 1 taken 5972 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 5960 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 2139963 times.
✗ Branch 6 not taken.
✓ Branch 7 taken 2683865 times.
|
4835760 | if (cpp20_and_in_constexpr()) { |
| 41 | ✗ | uint64_t val = 0; | |
| 42 | ✗ | for(int i = 0; i < 8; ++i) { | |
| 43 | ✗ | val |= uint64_t(*chars) << (i*8); | |
| 44 | ✗ | ++chars; | |
| 45 | } | ||
| 46 | ✗ | return val; | |
| 47 | } | ||
| 48 | uint64_t val; | ||
| 49 | 4835760 | ::memcpy(&val, chars, sizeof(uint64_t)); | |
| 50 | 4835760 | endian::little_to_native_inplace(val); | |
| 51 | 4835760 | return val; | |
| 52 | } | ||
| 53 | |||
| 54 | BOOST_FORCEINLINE BOOST_JSON_FASTFLOAT_CONSTEXPR20 | ||
| 55 | void write_u64(uint8_t *chars, uint64_t val) { | ||
| 56 | if (cpp20_and_in_constexpr()) { | ||
| 57 | for(int i = 0; i < 8; ++i) { | ||
| 58 | *chars = uint8_t(val); | ||
| 59 | val >>= 8; | ||
| 60 | ++chars; | ||
| 61 | } | ||
| 62 | return; | ||
| 63 | } | ||
| 64 | endian::native_to_little_inplace(val); | ||
| 65 | ::memcpy(chars, &val, sizeof(uint64_t)); | ||
| 66 | } | ||
| 67 | |||
| 68 | // credit @aqrit | ||
| 69 | BOOST_FORCEINLINE BOOST_JSON_CXX14_CONSTEXPR_NO_INLINE | ||
| 70 | uint32_t parse_eight_digits_unrolled(uint64_t val) { | ||
| 71 | 2151895 | constexpr uint64_t mask = 0x000000FF000000FF; | |
| 72 | 2151895 | constexpr uint64_t mul1 = 0x000F424000000064; // 100 + (1000000ULL << 32) | |
| 73 | 2151895 | constexpr uint64_t mul2 = 0x0000271000000001; // 1 + (10000ULL << 32) | |
| 74 | 2151895 | val -= 0x3030303030303030; | |
| 75 | 2151895 | val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8; | |
| 76 | 2151895 | val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32; | |
| 77 | 2151895 | return uint32_t(val); | |
| 78 | } | ||
| 79 | |||
| 80 | BOOST_FORCEINLINE constexpr | ||
| 81 | uint32_t parse_eight_digits_unrolled(const char16_t *) noexcept { | ||
| 82 | return 0; | ||
| 83 | } | ||
| 84 | |||
| 85 | BOOST_FORCEINLINE constexpr | ||
| 86 | uint32_t parse_eight_digits_unrolled(const char32_t *) noexcept { | ||
| 87 | return 0; | ||
| 88 | } | ||
| 89 | |||
| 90 | BOOST_FORCEINLINE BOOST_JSON_FASTFLOAT_CONSTEXPR20 | ||
| 91 | uint32_t parse_eight_digits_unrolled(const char *chars) noexcept { | ||
| 92 | 4303790 | return parse_eight_digits_unrolled(read_u64(chars)); | |
| 93 | } | ||
| 94 | |||
| 95 | // credit @aqrit | ||
| 96 | BOOST_FORCEINLINE constexpr bool is_made_of_eight_digits_fast(uint64_t val) noexcept { | ||
| 97 | 2683865 | return !((((val + 0x4646464646464646) | (val - 0x3030303030303030)) & 0x8080808080808080)); | |
| 98 | } | ||
| 99 | |||
| 100 | BOOST_FORCEINLINE constexpr | ||
| 101 | bool is_made_of_eight_digits_fast(const char16_t *) noexcept { | ||
| 102 | return false; | ||
| 103 | } | ||
| 104 | |||
| 105 | BOOST_FORCEINLINE constexpr | ||
| 106 | bool is_made_of_eight_digits_fast(const char32_t *) noexcept { | ||
| 107 | return false; | ||
| 108 | } | ||
| 109 | |||
| 110 | BOOST_FORCEINLINE BOOST_JSON_FASTFLOAT_CONSTEXPR20 | ||
| 111 | bool is_made_of_eight_digits_fast(const char *chars) noexcept { | ||
| 112 | 5367730 | return is_made_of_eight_digits_fast(read_u64(chars)); | |
| 113 | } | ||
| 114 | |||
| 115 | template <typename UC> | ||
| 116 | struct parsed_number_string_t { | ||
| 117 | int64_t exponent{0}; | ||
| 118 | uint64_t mantissa{0}; | ||
| 119 | UC const * lastmatch{nullptr}; | ||
| 120 | bool negative{false}; | ||
| 121 | bool valid{false}; | ||
| 122 | bool too_many_digits{false}; | ||
| 123 | // contains the range of the significant digits | ||
| 124 | span<const UC> integer{}; // non-nullable | ||
| 125 | span<const UC> fraction{}; // nullable | ||
| 126 | }; | ||
| 127 | using byte_span = span<char>; | ||
| 128 | using parsed_number_string = parsed_number_string_t<char>; | ||
| 129 | // Assuming that you use no more than 19 digits, this will | ||
| 130 | // parse an ASCII string. | ||
| 131 | template <typename UC> | ||
| 132 | BOOST_FORCEINLINE BOOST_JSON_FASTFLOAT_CONSTEXPR20 | ||
| 133 | parsed_number_string_t<UC> parse_number_string(UC const *p, UC const * pend, parse_options_t<UC> options) noexcept { | ||
| 134 | 1009310 | chars_format const fmt = options.format; | |
| 135 | 1009310 | UC const decimal_point = options.decimal_point; | |
| 136 | |||
| 137 | 1009310 | parsed_number_string_t<UC> answer; | |
| 138 | 1009310 | answer.valid = false; | |
| 139 | 1009310 | answer.too_many_digits = false; | |
| 140 | 1009310 | answer.negative = (*p == UC('-')); | |
| 141 |
2/2✓ Branch 0 taken 3902 times.
✓ Branch 1 taken 1005408 times.
|
1009310 | if (*p == UC('-')) // C++17 20.19.3.(7.1) explicitly forbids '+' sign here |
| 142 | { | ||
| 143 | 3902 | ++p; | |
| 144 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 3902 times.
|
3902 | if (p == pend) { |
| 145 | ✗ | return answer; | |
| 146 | } | ||
| 147 |
3/8✓ Branch 0 taken 3902 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 3902 times.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 3902 times.
|
7804 | if (!is_integer(*p) && (*p != decimal_point)) { // a sign must be followed by an integer or the dot |
| 148 | ✗ | return answer; | |
| 149 | } | ||
| 150 | } | ||
| 151 | 1009310 | UC const * const start_digits = p; | |
| 152 | |||
| 153 | 1009310 | uint64_t i = 0; // an unsigned int avoids signed overflows (which are bad) | |
| 154 | |||
| 155 |
7/8✓ Branch 0 taken 20426449 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 20423959 times.
✓ Branch 3 taken 2490 times.
✓ Branch 4 taken 19417139 times.
✓ Branch 5 taken 1009310 times.
✓ Branch 6 taken 19417139 times.
✓ Branch 7 taken 1009310 times.
|
40852898 | while ((p != pend) && is_integer(*p)) { |
| 156 | // a multiplication by 10 is cheaper than an arbitrary integer | ||
| 157 | // multiplication | ||
| 158 | 19417139 | i = 10 * i + | |
| 159 | 19417139 | uint64_t(*p - UC('0')); // might overflow, we will handle the overflow later | |
| 160 | 19417139 | ++p; | |
| 161 | } | ||
| 162 | 1009310 | UC const * const end_of_integer_part = p; | |
| 163 | 1009310 | int64_t digit_count = int64_t(end_of_integer_part - start_digits); | |
| 164 | 1009310 | answer.integer = span<const UC>(start_digits, size_t(digit_count)); | |
| 165 | 1009310 | int64_t exponent = 0; | |
| 166 |
3/4✓ Branch 0 taken 1009310 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1006820 times.
✓ Branch 3 taken 2490 times.
|
1009310 | if ((p != pend) && (*p == decimal_point)) { |
| 167 | 1006820 | ++p; | |
| 168 | 1006820 | UC const * before = p; | |
| 169 | // can occur at most twice without overflowing, but let it occur more, since | ||
| 170 | // for integers with many digits, digit parsing is the primary bottleneck. | ||
| 171 | if (std::is_same<UC,char>::value) { | ||
| 172 |
6/6✓ Branch 0 taken 2683865 times.
✓ Branch 1 taken 462918 times.
✓ Branch 2 taken 2139963 times.
✓ Branch 3 taken 543902 times.
✓ Branch 4 taken 2139963 times.
✓ Branch 5 taken 1006820 times.
|
6837468 | while ((std::distance(p, pend) >= 8) && is_made_of_eight_digits_fast(p)) { |
| 173 | 2139963 | i = i * 100000000 + parse_eight_digits_unrolled(p); // in rare cases, this will overflow, but that's ok | |
| 174 | 2139963 | p += 8; | |
| 175 | } | ||
| 176 | } | ||
| 177 |
8/8✓ Branch 0 taken 4425748 times.
✓ Branch 1 taken 4174 times.
✓ Branch 2 taken 3423102 times.
✓ Branch 3 taken 1002646 times.
✓ Branch 4 taken 3423102 times.
✓ Branch 5 taken 1002646 times.
✓ Branch 6 taken 3423102 times.
✓ Branch 7 taken 1006820 times.
|
8855670 | while ((p != pend) && is_integer(*p)) { |
| 178 | 3423102 | uint8_t digit = uint8_t(*p - UC('0')); | |
| 179 | 3423102 | ++p; | |
| 180 | 3423102 | i = i * 10 + digit; // in rare cases, this will overflow, but that's ok | |
| 181 | } | ||
| 182 | 1006820 | exponent = before - p; | |
| 183 | 1006820 | answer.fraction = span<const UC>(before, size_t(p - before)); | |
| 184 | 1006820 | digit_count -= exponent; | |
| 185 | } | ||
| 186 | // we must have encountered at least one integer! | ||
| 187 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1009310 times.
|
1009310 | if (digit_count == 0) { |
| 188 | ✗ | return answer; | |
| 189 | } | ||
| 190 | 1009310 | int64_t exp_number = 0; // explicit exponential part | |
| 191 |
4/8✓ Branch 0 taken 1009310 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1005136 times.
✓ Branch 3 taken 4174 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 1005136 times.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
|
1009310 | if (((unsigned)fmt & (unsigned)chars_format::scientific) && (p != pend) && ((UC('e') == *p) || (UC('E') == *p))) { |
| 192 | 1005136 | UC const * location_of_e = p; | |
| 193 | 1005136 | ++p; | |
| 194 | 1005136 | bool neg_exp = false; | |
| 195 |
3/4✓ Branch 0 taken 1005136 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 499687 times.
✓ Branch 3 taken 505449 times.
|
1005136 | if ((p != pend) && (UC('-') == *p)) { |
| 196 | 499687 | neg_exp = true; | |
| 197 | 499687 | ++p; | |
| 198 |
2/4✓ Branch 0 taken 505449 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 505449 times.
|
505449 | } else if ((p != pend) && (UC('+') == *p)) { // '+' on exponent is allowed by C++17 20.19.3.(7.1) |
| 199 | ✗ | ++p; | |
| 200 | } | ||
| 201 |
4/8✓ Branch 0 taken 1005136 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1005136 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 1005136 times.
✗ Branch 6 not taken.
✓ Branch 7 taken 1005136 times.
|
2010272 | if ((p == pend) || !is_integer(*p)) { |
| 202 | ✗ | if(!((unsigned)fmt & (unsigned)chars_format::fixed)) { | |
| 203 | // We are in error. | ||
| 204 | ✗ | return answer; | |
| 205 | } | ||
| 206 | // Otherwise, we will be ignoring the 'e'. | ||
| 207 | ✗ | p = location_of_e; | |
| 208 | } else { | ||
| 209 |
6/8✓ Branch 0 taken 3192086 times.
✓ Branch 1 taken 1005136 times.
✓ Branch 2 taken 3192086 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 3192086 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 3192086 times.
✓ Branch 7 taken 1005136 times.
|
7389308 | while ((p != pend) && is_integer(*p)) { |
| 210 | 3192086 | uint8_t digit = uint8_t(*p - UC('0')); | |
| 211 |
1/2✓ Branch 0 taken 3192086 times.
✗ Branch 1 not taken.
|
3192086 | if (exp_number < 0x10000000) { |
| 212 | 3192086 | exp_number = 10 * exp_number + digit; | |
| 213 | } | ||
| 214 | 3192086 | ++p; | |
| 215 | } | ||
| 216 |
2/2✓ Branch 0 taken 499687 times.
✓ Branch 1 taken 505449 times.
|
1005136 | if(neg_exp) { exp_number = - exp_number; } |
| 217 | 1005136 | exponent += exp_number; | |
| 218 | } | ||
| 219 | 1005136 | } else { | |
| 220 | // If it scientific and not fixed, we have to bail out. | ||
| 221 |
2/4✓ Branch 0 taken 4174 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 4174 times.
|
4174 | if(((unsigned)fmt & (unsigned)chars_format::scientific) && !((unsigned)fmt & (unsigned)chars_format::fixed)) |
| 222 | { | ||
| 223 | ✗ | return answer; | |
| 224 | } | ||
| 225 | } | ||
| 226 | 1009310 | answer.lastmatch = p; | |
| 227 | 1009310 | answer.valid = true; | |
| 228 | |||
| 229 | // If we frequently had to deal with long strings of digits, | ||
| 230 | // we could extend our code by using a 128-bit integer instead | ||
| 231 | // of a 64-bit integer. However, this is uncommon. | ||
| 232 | // | ||
| 233 | // We can deal with up to 19 digits. | ||
| 234 |
2/2✓ Branch 0 taken 1003241 times.
✓ Branch 1 taken 6069 times.
|
1009310 | if (digit_count > 19) { // this is uncommon |
| 235 | // It is possible that the integer had an overflow. | ||
| 236 | // We have to handle the case where we have 0.0000somenumber. | ||
| 237 | // We need to be mindful of the case where we only have zeroes... | ||
| 238 | // E.g., 0.000000000...000. | ||
| 239 | 1003241 | UC const * start = start_digits; | |
| 240 |
6/6✓ Branch 0 taken 2111116 times.
✓ Branch 1 taken 1 times.
✓ Branch 2 taken 1104987 times.
✓ Branch 3 taken 1006129 times.
✓ Branch 4 taken 2889 times.
✓ Branch 5 taken 1003240 times.
|
2111117 | while ((start != pend) && (*start == UC('0') || *start == decimal_point)) { |
| 241 |
2/2✓ Branch 0 taken 1104987 times.
✓ Branch 1 taken 2889 times.
|
1107876 | if(*start == UC('0')) { digit_count --; } |
| 242 | 1107876 | start++; | |
| 243 | } | ||
| 244 |
2/2✓ Branch 0 taken 1000712 times.
✓ Branch 1 taken 2529 times.
|
1003241 | if (digit_count > 19) { |
| 245 | 1000712 | answer.too_many_digits = true; | |
| 246 | // Let us start again, this time, avoiding overflows. | ||
| 247 | // We don't need to check if is_integer, since we use the | ||
| 248 | // pre-tokenized spans from above. | ||
| 249 | 1000712 | i = 0; | |
| 250 | 1000712 | p = answer.integer.ptr; | |
| 251 | 1000712 | UC const * int_end = p + answer.integer.len(); | |
| 252 | 1000712 | constexpr uint64_t minimal_nineteen_digit_integer{1000000000000000000}; | |
| 253 |
4/4✓ Branch 0 taken 19001512 times.
✓ Branch 1 taken 946260 times.
✓ Branch 2 taken 18947060 times.
✓ Branch 3 taken 54452 times.
|
19947772 | while((i < minimal_nineteen_digit_integer) && (p != int_end)) { |
| 254 | 18947060 | i = i * 10 + uint64_t(*p - UC('0')); | |
| 255 | 18947060 | ++p; | |
| 256 | } | ||
| 257 |
2/2✓ Branch 0 taken 946260 times.
✓ Branch 1 taken 54452 times.
|
1000712 | if (i >= minimal_nineteen_digit_integer) { // We have a big integers |
| 258 | 946260 | exponent = end_of_integer_part - p + exp_number; | |
| 259 | } else { // We have a value with a fractional component. | ||
| 260 | 54452 | p = answer.fraction.ptr; | |
| 261 | 54452 | UC const * frac_end = p + answer.fraction.len(); | |
| 262 |
3/4✓ Branch 0 taken 66828 times.
✓ Branch 1 taken 54452 times.
✓ Branch 2 taken 66828 times.
✗ Branch 3 not taken.
|
121280 | while((i < minimal_nineteen_digit_integer) && (p != frac_end)) { |
| 263 | 66828 | i = i * 10 + uint64_t(*p - UC('0')); | |
| 264 | 66828 | ++p; | |
| 265 | } | ||
| 266 | 54452 | exponent = answer.fraction.ptr - p + exp_number; | |
| 267 | } | ||
| 268 | // We have now corrected both exponent and i, to a truncated value | ||
| 269 | } | ||
| 270 | } | ||
| 271 | 1009310 | answer.exponent = exponent; | |
| 272 | 1009310 | answer.mantissa = i; | |
| 273 | 1009310 | return answer; | |
| 274 | } | ||
| 275 | |||
| 276 | }}}}}} // namespace s | ||
| 277 | |||
| 278 | #endif | ||
| 279 |