GCC Code Coverage Report


Directory: libs/json/include/boost/json/
File: detail/charconv/detail/fast_float/ascii_number.hpp
Date: 2025-12-23 17:20:53
Exec Total Coverage
Lines: 95 108 88.0%
Functions: 0 0 -%
Branches: 86 130 66.2%

Line Branch Exec Source
1 // Copyright 2020-2023 Daniel Lemire
2 // Copyright 2023 Matt Borland
3 // Distributed under the Boost Software License, Version 1.0.
4 // https://www.boost.org/LICENSE_1_0.txt
5 //
6 // Derivative of: https://github.com/fastfloat/fast_float
7
8 #ifndef BOOST_JSON_DETAIL_CHARCONV_DETAIL_FASTFLOAT_ASCII_NUMBER_HPP
9 #define BOOST_JSON_DETAIL_CHARCONV_DETAIL_FASTFLOAT_ASCII_NUMBER_HPP
10
11 #include <boost/endian/conversion.hpp>
12 #include <boost/json/detail/charconv/detail/fast_float/float_common.hpp>
13 #include <cctype>
14 #include <cstdint>
15 #include <cstring>
16 #include <iterator>
17
18 namespace boost { namespace json { namespace detail { namespace charconv { namespace detail { namespace fast_float {
19
20 // Next function can be micro-optimized, but compilers are entirely
21 // able to optimize it well.
22 template <typename UC>
23 BOOST_FORCEINLINE constexpr bool is_integer(UC c) noexcept {
24
6/10
✓ Branch 0 taken 3902 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 19417139 times.
✓ Branch 3 taken 1006820 times.
✓ Branch 4 taken 3423102 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 1005136 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 3192086 times.
✗ Branch 9 not taken.
29053321 return !(c > UC('9') || c < UC('0'));
25 }
26
27 BOOST_FORCEINLINE constexpr uint64_t byteswap(uint64_t val) {
28 return (val & 0xFF00000000000000) >> 56
29 | (val & 0x00FF000000000000) >> 40
30 | (val & 0x0000FF0000000000) >> 24
31 | (val & 0x000000FF00000000) >> 8
32 | (val & 0x00000000FF000000) << 8
33 | (val & 0x0000000000FF0000) << 24
34 | (val & 0x000000000000FF00) << 40
35 | (val & 0x00000000000000FF) << 56;
36 }
37
38 BOOST_FORCEINLINE BOOST_JSON_FASTFLOAT_CONSTEXPR20
39 uint64_t read_u64(const char *chars) {
40
4/8
✗ Branch 0 not taken.
✓ Branch 1 taken 5972 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 5960 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 2139963 times.
✗ Branch 6 not taken.
✓ Branch 7 taken 2683865 times.
4835760 if (cpp20_and_in_constexpr()) {
41 uint64_t val = 0;
42 for(int i = 0; i < 8; ++i) {
43 val |= uint64_t(*chars) << (i*8);
44 ++chars;
45 }
46 return val;
47 }
48 uint64_t val;
49 4835760 ::memcpy(&val, chars, sizeof(uint64_t));
50 4835760 endian::little_to_native_inplace(val);
51 4835760 return val;
52 }
53
54 BOOST_FORCEINLINE BOOST_JSON_FASTFLOAT_CONSTEXPR20
55 void write_u64(uint8_t *chars, uint64_t val) {
56 if (cpp20_and_in_constexpr()) {
57 for(int i = 0; i < 8; ++i) {
58 *chars = uint8_t(val);
59 val >>= 8;
60 ++chars;
61 }
62 return;
63 }
64 endian::native_to_little_inplace(val);
65 ::memcpy(chars, &val, sizeof(uint64_t));
66 }
67
68 // credit @aqrit
69 BOOST_FORCEINLINE BOOST_JSON_CXX14_CONSTEXPR_NO_INLINE
70 uint32_t parse_eight_digits_unrolled(uint64_t val) {
71 2151895 constexpr uint64_t mask = 0x000000FF000000FF;
72 2151895 constexpr uint64_t mul1 = 0x000F424000000064; // 100 + (1000000ULL << 32)
73 2151895 constexpr uint64_t mul2 = 0x0000271000000001; // 1 + (10000ULL << 32)
74 2151895 val -= 0x3030303030303030;
75 2151895 val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8;
76 2151895 val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32;
77 2151895 return uint32_t(val);
78 }
79
80 BOOST_FORCEINLINE constexpr
81 uint32_t parse_eight_digits_unrolled(const char16_t *) noexcept {
82 return 0;
83 }
84
85 BOOST_FORCEINLINE constexpr
86 uint32_t parse_eight_digits_unrolled(const char32_t *) noexcept {
87 return 0;
88 }
89
90 BOOST_FORCEINLINE BOOST_JSON_FASTFLOAT_CONSTEXPR20
91 uint32_t parse_eight_digits_unrolled(const char *chars) noexcept {
92 4303790 return parse_eight_digits_unrolled(read_u64(chars));
93 }
94
95 // credit @aqrit
96 BOOST_FORCEINLINE constexpr bool is_made_of_eight_digits_fast(uint64_t val) noexcept {
97 2683865 return !((((val + 0x4646464646464646) | (val - 0x3030303030303030)) & 0x8080808080808080));
98 }
99
100 BOOST_FORCEINLINE constexpr
101 bool is_made_of_eight_digits_fast(const char16_t *) noexcept {
102 return false;
103 }
104
105 BOOST_FORCEINLINE constexpr
106 bool is_made_of_eight_digits_fast(const char32_t *) noexcept {
107 return false;
108 }
109
110 BOOST_FORCEINLINE BOOST_JSON_FASTFLOAT_CONSTEXPR20
111 bool is_made_of_eight_digits_fast(const char *chars) noexcept {
112 5367730 return is_made_of_eight_digits_fast(read_u64(chars));
113 }
114
115 template <typename UC>
116 struct parsed_number_string_t {
117 int64_t exponent{0};
118 uint64_t mantissa{0};
119 UC const * lastmatch{nullptr};
120 bool negative{false};
121 bool valid{false};
122 bool too_many_digits{false};
123 // contains the range of the significant digits
124 span<const UC> integer{}; // non-nullable
125 span<const UC> fraction{}; // nullable
126 };
127 using byte_span = span<char>;
128 using parsed_number_string = parsed_number_string_t<char>;
129 // Assuming that you use no more than 19 digits, this will
130 // parse an ASCII string.
131 template <typename UC>
132 BOOST_FORCEINLINE BOOST_JSON_FASTFLOAT_CONSTEXPR20
133 parsed_number_string_t<UC> parse_number_string(UC const *p, UC const * pend, parse_options_t<UC> options) noexcept {
134 1009310 chars_format const fmt = options.format;
135 1009310 UC const decimal_point = options.decimal_point;
136
137 1009310 parsed_number_string_t<UC> answer;
138 1009310 answer.valid = false;
139 1009310 answer.too_many_digits = false;
140 1009310 answer.negative = (*p == UC('-'));
141
2/2
✓ Branch 0 taken 3902 times.
✓ Branch 1 taken 1005408 times.
1009310 if (*p == UC('-')) // C++17 20.19.3.(7.1) explicitly forbids '+' sign here
142 {
143 3902 ++p;
144
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 3902 times.
3902 if (p == pend) {
145 return answer;
146 }
147
3/8
✓ Branch 0 taken 3902 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 3902 times.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 3902 times.
7804 if (!is_integer(*p) && (*p != decimal_point)) { // a sign must be followed by an integer or the dot
148 return answer;
149 }
150 }
151 1009310 UC const * const start_digits = p;
152
153 1009310 uint64_t i = 0; // an unsigned int avoids signed overflows (which are bad)
154
155
7/8
✓ Branch 0 taken 20426449 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 20423959 times.
✓ Branch 3 taken 2490 times.
✓ Branch 4 taken 19417139 times.
✓ Branch 5 taken 1009310 times.
✓ Branch 6 taken 19417139 times.
✓ Branch 7 taken 1009310 times.
40852898 while ((p != pend) && is_integer(*p)) {
156 // a multiplication by 10 is cheaper than an arbitrary integer
157 // multiplication
158 19417139 i = 10 * i +
159 19417139 uint64_t(*p - UC('0')); // might overflow, we will handle the overflow later
160 19417139 ++p;
161 }
162 1009310 UC const * const end_of_integer_part = p;
163 1009310 int64_t digit_count = int64_t(end_of_integer_part - start_digits);
164 1009310 answer.integer = span<const UC>(start_digits, size_t(digit_count));
165 1009310 int64_t exponent = 0;
166
3/4
✓ Branch 0 taken 1009310 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1006820 times.
✓ Branch 3 taken 2490 times.
1009310 if ((p != pend) && (*p == decimal_point)) {
167 1006820 ++p;
168 1006820 UC const * before = p;
169 // can occur at most twice without overflowing, but let it occur more, since
170 // for integers with many digits, digit parsing is the primary bottleneck.
171 if (std::is_same<UC,char>::value) {
172
6/6
✓ Branch 0 taken 2683865 times.
✓ Branch 1 taken 462918 times.
✓ Branch 2 taken 2139963 times.
✓ Branch 3 taken 543902 times.
✓ Branch 4 taken 2139963 times.
✓ Branch 5 taken 1006820 times.
6837468 while ((std::distance(p, pend) >= 8) && is_made_of_eight_digits_fast(p)) {
173 2139963 i = i * 100000000 + parse_eight_digits_unrolled(p); // in rare cases, this will overflow, but that's ok
174 2139963 p += 8;
175 }
176 }
177
8/8
✓ Branch 0 taken 4425748 times.
✓ Branch 1 taken 4174 times.
✓ Branch 2 taken 3423102 times.
✓ Branch 3 taken 1002646 times.
✓ Branch 4 taken 3423102 times.
✓ Branch 5 taken 1002646 times.
✓ Branch 6 taken 3423102 times.
✓ Branch 7 taken 1006820 times.
8855670 while ((p != pend) && is_integer(*p)) {
178 3423102 uint8_t digit = uint8_t(*p - UC('0'));
179 3423102 ++p;
180 3423102 i = i * 10 + digit; // in rare cases, this will overflow, but that's ok
181 }
182 1006820 exponent = before - p;
183 1006820 answer.fraction = span<const UC>(before, size_t(p - before));
184 1006820 digit_count -= exponent;
185 }
186 // we must have encountered at least one integer!
187
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1009310 times.
1009310 if (digit_count == 0) {
188 return answer;
189 }
190 1009310 int64_t exp_number = 0; // explicit exponential part
191
4/8
✓ Branch 0 taken 1009310 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1005136 times.
✓ Branch 3 taken 4174 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 1005136 times.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
1009310 if (((unsigned)fmt & (unsigned)chars_format::scientific) && (p != pend) && ((UC('e') == *p) || (UC('E') == *p))) {
192 1005136 UC const * location_of_e = p;
193 1005136 ++p;
194 1005136 bool neg_exp = false;
195
3/4
✓ Branch 0 taken 1005136 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 499687 times.
✓ Branch 3 taken 505449 times.
1005136 if ((p != pend) && (UC('-') == *p)) {
196 499687 neg_exp = true;
197 499687 ++p;
198
2/4
✓ Branch 0 taken 505449 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 505449 times.
505449 } else if ((p != pend) && (UC('+') == *p)) { // '+' on exponent is allowed by C++17 20.19.3.(7.1)
199 ++p;
200 }
201
4/8
✓ Branch 0 taken 1005136 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1005136 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 1005136 times.
✗ Branch 6 not taken.
✓ Branch 7 taken 1005136 times.
2010272 if ((p == pend) || !is_integer(*p)) {
202 if(!((unsigned)fmt & (unsigned)chars_format::fixed)) {
203 // We are in error.
204 return answer;
205 }
206 // Otherwise, we will be ignoring the 'e'.
207 p = location_of_e;
208 } else {
209
6/8
✓ Branch 0 taken 3192086 times.
✓ Branch 1 taken 1005136 times.
✓ Branch 2 taken 3192086 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 3192086 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 3192086 times.
✓ Branch 7 taken 1005136 times.
7389308 while ((p != pend) && is_integer(*p)) {
210 3192086 uint8_t digit = uint8_t(*p - UC('0'));
211
1/2
✓ Branch 0 taken 3192086 times.
✗ Branch 1 not taken.
3192086 if (exp_number < 0x10000000) {
212 3192086 exp_number = 10 * exp_number + digit;
213 }
214 3192086 ++p;
215 }
216
2/2
✓ Branch 0 taken 499687 times.
✓ Branch 1 taken 505449 times.
1005136 if(neg_exp) { exp_number = - exp_number; }
217 1005136 exponent += exp_number;
218 }
219 1005136 } else {
220 // If it scientific and not fixed, we have to bail out.
221
2/4
✓ Branch 0 taken 4174 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 4174 times.
4174 if(((unsigned)fmt & (unsigned)chars_format::scientific) && !((unsigned)fmt & (unsigned)chars_format::fixed))
222 {
223 return answer;
224 }
225 }
226 1009310 answer.lastmatch = p;
227 1009310 answer.valid = true;
228
229 // If we frequently had to deal with long strings of digits,
230 // we could extend our code by using a 128-bit integer instead
231 // of a 64-bit integer. However, this is uncommon.
232 //
233 // We can deal with up to 19 digits.
234
2/2
✓ Branch 0 taken 1003241 times.
✓ Branch 1 taken 6069 times.
1009310 if (digit_count > 19) { // this is uncommon
235 // It is possible that the integer had an overflow.
236 // We have to handle the case where we have 0.0000somenumber.
237 // We need to be mindful of the case where we only have zeroes...
238 // E.g., 0.000000000...000.
239 1003241 UC const * start = start_digits;
240
6/6
✓ Branch 0 taken 2111116 times.
✓ Branch 1 taken 1 times.
✓ Branch 2 taken 1104987 times.
✓ Branch 3 taken 1006129 times.
✓ Branch 4 taken 2889 times.
✓ Branch 5 taken 1003240 times.
2111117 while ((start != pend) && (*start == UC('0') || *start == decimal_point)) {
241
2/2
✓ Branch 0 taken 1104987 times.
✓ Branch 1 taken 2889 times.
1107876 if(*start == UC('0')) { digit_count --; }
242 1107876 start++;
243 }
244
2/2
✓ Branch 0 taken 1000712 times.
✓ Branch 1 taken 2529 times.
1003241 if (digit_count > 19) {
245 1000712 answer.too_many_digits = true;
246 // Let us start again, this time, avoiding overflows.
247 // We don't need to check if is_integer, since we use the
248 // pre-tokenized spans from above.
249 1000712 i = 0;
250 1000712 p = answer.integer.ptr;
251 1000712 UC const * int_end = p + answer.integer.len();
252 1000712 constexpr uint64_t minimal_nineteen_digit_integer{1000000000000000000};
253
4/4
✓ Branch 0 taken 19001512 times.
✓ Branch 1 taken 946260 times.
✓ Branch 2 taken 18947060 times.
✓ Branch 3 taken 54452 times.
19947772 while((i < minimal_nineteen_digit_integer) && (p != int_end)) {
254 18947060 i = i * 10 + uint64_t(*p - UC('0'));
255 18947060 ++p;
256 }
257
2/2
✓ Branch 0 taken 946260 times.
✓ Branch 1 taken 54452 times.
1000712 if (i >= minimal_nineteen_digit_integer) { // We have a big integers
258 946260 exponent = end_of_integer_part - p + exp_number;
259 } else { // We have a value with a fractional component.
260 54452 p = answer.fraction.ptr;
261 54452 UC const * frac_end = p + answer.fraction.len();
262
3/4
✓ Branch 0 taken 66828 times.
✓ Branch 1 taken 54452 times.
✓ Branch 2 taken 66828 times.
✗ Branch 3 not taken.
121280 while((i < minimal_nineteen_digit_integer) && (p != frac_end)) {
263 66828 i = i * 10 + uint64_t(*p - UC('0'));
264 66828 ++p;
265 }
266 54452 exponent = answer.fraction.ptr - p + exp_number;
267 }
268 // We have now corrected both exponent and i, to a truncated value
269 }
270 }
271 1009310 answer.exponent = exponent;
272 1009310 answer.mantissa = i;
273 1009310 return answer;
274 }
275
276 }}}}}} // namespace s
277
278 #endif
279