GCC Code Coverage Report


Directory: libs/json/include/boost/json/
File: detail/sse2.hpp
Date: 2025-12-23 17:20:53
Exec Total Coverage
Lines: 134 137 97.8%
Functions: 6 6 100.0%
Branches: 59 61 96.7%

Line Branch Exec Source
1 //
2 // Copyright (c) 2019 Peter Dimov (pdimov at gmail dot com),
3 // Vinnie Falco (vinnie.falco@gmail.com)
4 // Copyright (c) 2020 Krystian Stasiowski (sdkrystian@gmail.com)
5 //
6 // Distributed under the Boost Software License, Version 1.0. (See accompanying
7 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
8 //
9 // Official repository: https://github.com/boostorg/json
10 //
11
12 #ifndef BOOST_JSON_DETAIL_SSE2_HPP
13 #define BOOST_JSON_DETAIL_SSE2_HPP
14
15 #include <boost/json/detail/config.hpp>
16 #include <boost/json/detail/utf8.hpp>
17 #include <cstddef>
18 #include <cstring>
19 #ifdef BOOST_JSON_USE_SSE2
20 # include <emmintrin.h>
21 # include <xmmintrin.h>
22 # ifdef _MSC_VER
23 # include <intrin.h>
24 # endif
25 #endif
26
27 namespace boost {
28 namespace json {
29 namespace detail {
30
31 #ifdef BOOST_JSON_USE_SSE2
32
33 template<bool AllowBadUTF8>
34 inline
35 const char*
36 2177 count_valid(
37 char const* p,
38 const char* end) noexcept
39 {
40 2177 __m128i const q1 = _mm_set1_epi8( '\x22' ); // '"'
41 2177 __m128i const q2 = _mm_set1_epi8( '\\' ); // '\\'
42 2177 __m128i const q3 = _mm_set1_epi8( 0x1F );
43
44
2/2
✓ Branch 0 taken 924 times.
✓ Branch 1 taken 1491 times.
2415 while(end - p >= 16)
45 {
46 924 __m128i v1 = _mm_loadu_si128( (__m128i const*)p );
47 924 __m128i v2 = _mm_cmpeq_epi8( v1, q1 ); // quote
48 924 __m128i v3 = _mm_cmpeq_epi8( v1, q2 ); // backslash
49 924 __m128i v4 = _mm_or_si128( v2, v3 ); // combine quotes and backslash
50 924 __m128i v5 = _mm_min_epu8( v1, q3 );
51 924 __m128i v6 = _mm_cmpeq_epi8( v5, v1 ); // controls
52 924 __m128i v7 = _mm_or_si128( v4, v6 ); // combine with control
53
54 924 int w = _mm_movemask_epi8( v7 );
55
56
2/2
✓ Branch 0 taken 686 times.
✓ Branch 1 taken 238 times.
924 if( w != 0 )
57 {
58 int m;
59 #if defined(__GNUC__) || defined(__clang__)
60 686 m = __builtin_ffs( w ) - 1;
61 #else
62 unsigned long index;
63 _BitScanForward( &index, w );
64 m = index;
65 #endif
66 686 return p + m;
67 }
68
69 238 p += 16;
70 }
71
72
2/2
✓ Branch 0 taken 3689 times.
✓ Branch 1 taken 49 times.
3738 while(p != end)
73 {
74 3689 const unsigned char c = *p;
75
6/6
✓ Branch 0 taken 2716 times.
✓ Branch 1 taken 973 times.
✓ Branch 2 taken 2282 times.
✓ Branch 3 taken 434 times.
✓ Branch 4 taken 2247 times.
✓ Branch 5 taken 35 times.
3689 if(c == '\x22' || c == '\\' || c < 0x20)
76 break;
77 2247 ++p;
78 }
79
80 1491 return p;
81 }
82
83 template<>
84 inline
85 const char*
86 162806 count_valid<false>(
87 char const* p,
88 const char* end) noexcept
89 {
90 162806 __m128i const q1 = _mm_set1_epi8( '\x22' ); // '"'
91 162806 __m128i const q2 = _mm_set1_epi8( '\\' );
92 162806 __m128i const q3 = _mm_set1_epi8( 0x20 );
93
94
2/2
✓ Branch 0 taken 12090989 times.
✓ Branch 1 taken 111346 times.
12202335 while(end - p >= 16)
95 {
96 12090989 __m128i v1 = _mm_loadu_si128( (__m128i const*)p );
97
98 12090989 __m128i v2 = _mm_cmpeq_epi8( v1, q1 );
99 12090989 __m128i v3 = _mm_cmpeq_epi8( v1, q2 );
100 12090989 __m128i v4 = _mm_cmplt_epi8( v1, q3 );
101
102 12090989 __m128i v5 = _mm_or_si128( v2, v3 );
103 12090989 __m128i v6 = _mm_or_si128( v5, v4 );
104
105 12090989 int w = _mm_movemask_epi8( v6 );
106
107
2/2
✓ Branch 0 taken 51460 times.
✓ Branch 1 taken 12039529 times.
12090989 if( w != 0 )
108 {
109 int m;
110 #if defined(__GNUC__) || defined(__clang__)
111 51460 m = __builtin_ffs( w ) - 1;
112 #else
113 unsigned long index;
114 _BitScanForward( &index, w );
115 m = index;
116 #endif
117 51460 p += m;
118 51460 break;
119 }
120
121 12039529 p += 16;
122 }
123
124
2/2
✓ Branch 0 taken 450135 times.
✓ Branch 1 taken 30249 times.
480384 while(p != end)
125 {
126 450135 const unsigned char c = *p;
127
6/6
✓ Branch 0 taken 332002 times.
✓ Branch 1 taken 118133 times.
✓ Branch 2 taken 321107 times.
✓ Branch 3 taken 10895 times.
✓ Branch 4 taken 321040 times.
✓ Branch 5 taken 67 times.
450135 if(c == '\x22' || c == '\\' || c < 0x20)
128 break;
129
2/2
✓ Branch 0 taken 307850 times.
✓ Branch 1 taken 13190 times.
321040 if(c < 0x80)
130 {
131 307850 ++p;
132 307850 continue;
133 }
134 // validate utf-8
135 13190 uint16_t first = classify_utf8(c);
136 13190 uint8_t len = first & 0xFF;
137
2/2
✓ Branch 0 taken 1905 times.
✓ Branch 1 taken 11285 times.
13190 if(BOOST_JSON_UNLIKELY(end - p < len))
138 1905 break;
139
2/2
✓ Branch 1 taken 1557 times.
✓ Branch 2 taken 9728 times.
11285 if(BOOST_JSON_UNLIKELY(! is_valid_utf8(p, first)))
140 1557 break;
141 9728 p += len;
142 }
143
144 162806 return p;
145 }
146
147 #else
148
149 template<bool AllowBadUTF8>
150 char const*
151 count_valid(
152 char const* p,
153 char const* end) noexcept
154 {
155 while(p != end)
156 {
157 const unsigned char c = *p;
158 if(c == '\x22' || c == '\\' || c < 0x20)
159 break;
160 ++p;
161 }
162
163 return p;
164 }
165
166 template<>
167 inline
168 char const*
169 count_valid<false>(
170 char const* p,
171 char const* end) noexcept
172 {
173 while(p != end)
174 {
175 const unsigned char c = *p;
176 if(c == '\x22' || c == '\\' || c < 0x20)
177 break;
178 if(c < 0x80)
179 {
180 ++p;
181 continue;
182 }
183 // validate utf-8
184 uint16_t first = classify_utf8(c);
185 uint8_t len = first & 0xFF;
186 if(BOOST_JSON_UNLIKELY(end - p < len))
187 break;
188 if(BOOST_JSON_UNLIKELY(! is_valid_utf8(p, first)))
189 break;
190 p += len;
191 }
192
193 return p;
194 }
195
196 #endif
197
198 // KRYSTIAN NOTE: does not stop to validate
199 // count_unescaped
200
201 #ifdef BOOST_JSON_USE_SSE2
202
203 inline
204 size_t
205 34459 count_unescaped(
206 char const* s,
207 size_t n) noexcept
208 {
209
210 34459 __m128i const q1 = _mm_set1_epi8( '\x22' ); // '"'
211 34459 __m128i const q2 = _mm_set1_epi8( '\\' ); // '\\'
212 34459 __m128i const q3 = _mm_set1_epi8( 0x1F );
213
214 34459 char const * s0 = s;
215
216
2/2
✓ Branch 0 taken 4061711 times.
✓ Branch 1 taken 34459 times.
4096170 while( n >= 16 )
217 {
218 4061711 __m128i v1 = _mm_loadu_si128( (__m128i const*)s );
219 4061711 __m128i v2 = _mm_cmpeq_epi8( v1, q1 ); // quote
220 4061711 __m128i v3 = _mm_cmpeq_epi8( v1, q2 ); // backslash
221 4061711 __m128i v4 = _mm_or_si128( v2, v3 ); // combine quotes and backslash
222 4061711 __m128i v5 = _mm_min_epu8( v1, q3 );
223 4061711 __m128i v6 = _mm_cmpeq_epi8( v5, v1 ); // controls
224 4061711 __m128i v7 = _mm_or_si128( v4, v6 ); // combine with control
225
226 4061711 int w = _mm_movemask_epi8( v7 );
227
228
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 4061711 times.
4061711 if( w != 0 )
229 {
230 int m;
231 #if defined(__GNUC__) || defined(__clang__)
232 m = __builtin_ffs( w ) - 1;
233 #else
234 unsigned long index;
235 _BitScanForward( &index, w );
236 m = index;
237 #endif
238
239 s += m;
240 break;
241 }
242
243 4061711 s += 16;
244 4061711 n -= 16;
245 }
246
247 34459 return s - s0;
248 }
249
250 #else
251
252 inline
253 std::size_t
254 count_unescaped(
255 char const*,
256 std::size_t) noexcept
257 {
258 return 0;
259 }
260
261 #endif
262
263 // count_digits
264
265 #ifdef BOOST_JSON_USE_SSE2
266
267 // assumes p..p+15 are valid
268 2024516 inline int count_digits( char const* p ) noexcept
269 {
270 2024516 __m128i v1 = _mm_loadu_si128( (__m128i const*)p );
271 4049032 v1 = _mm_add_epi8(v1, _mm_set1_epi8(70));
272 4049032 v1 = _mm_cmplt_epi8(v1, _mm_set1_epi8(118));
273
274 2024516 int m = _mm_movemask_epi8(v1);
275
276 int n;
277
278
2/2
✓ Branch 0 taken 2012400 times.
✓ Branch 1 taken 12116 times.
2024516 if( m == 0 )
279 {
280 2012400 n = 16;
281 }
282 else
283 {
284 #if defined(__GNUC__) || defined(__clang__)
285 12116 n = __builtin_ffs( m ) - 1;
286 #else
287 unsigned long index;
288 _BitScanForward( &index, m );
289 n = static_cast<int>(index);
290 #endif
291 }
292
293 2024516 return n;
294 }
295
296 #else
297
298 // assumes p..p+15 are valid
299 inline int count_digits( char const* p ) noexcept
300 {
301 int n = 0;
302
303 for( ; n < 16; ++n )
304 {
305 unsigned char const d = *p++ - '0';
306 if(d > 9) break;
307 }
308
309 return n;
310 }
311
312 #endif
313
314 // parse_unsigned
315
316 2019313 inline uint64_t parse_unsigned( uint64_t r, char const * p, std::size_t n ) noexcept
317 {
318
2/2
✓ Branch 0 taken 8045160 times.
✓ Branch 1 taken 2019313 times.
10064473 while( n >= 4 )
319 {
320 // faster on on clang for x86,
321 // slower on gcc
322 #ifdef __clang__
323 r = r * 10 + p[0] - '0';
324 r = r * 10 + p[1] - '0';
325 r = r * 10 + p[2] - '0';
326 r = r * 10 + p[3] - '0';
327 #else
328 uint32_t v;
329 8045160 std::memcpy( &v, p, 4 );
330 8045160 endian::native_to_little_inplace(v);
331
332 8045160 v -= 0x30303030;
333
334 8045160 unsigned w0 = v & 0xFF;
335 8045160 unsigned w1 = (v >> 8) & 0xFF;
336 8045160 unsigned w2 = (v >> 16) & 0xFF;
337 8045160 unsigned w3 = (v >> 24);
338
339 8045160 r = (((r * 10 + w0) * 10 + w1) * 10 + w2) * 10 + w3;
340 #endif
341 8045160 p += 4;
342 8045160 n -= 4;
343 }
344
345
4/5
✓ Branch 0 taken 2010658 times.
✓ Branch 1 taken 5484 times.
✓ Branch 2 taken 1714 times.
✓ Branch 3 taken 1457 times.
✗ Branch 4 not taken.
2019313 switch( n )
346 {
347 2010658 case 0:
348 2010658 break;
349 5484 case 1:
350 5484 r = r * 10 + p[0] - '0';
351 5484 break;
352 1714 case 2:
353 1714 r = r * 10 + p[0] - '0';
354 1714 r = r * 10 + p[1] - '0';
355 1714 break;
356 1457 case 3:
357 1457 r = r * 10 + p[0] - '0';
358 1457 r = r * 10 + p[1] - '0';
359 1457 r = r * 10 + p[2] - '0';
360 1457 break;
361 }
362 2019313 return r;
363 }
364
365 // KRYSTIAN: this function is unused
366 // count_leading
367
368 /*
369 #ifdef BOOST_JSON_USE_SSE2
370
371 // assumes p..p+15
372 inline std::size_t count_leading( char const * p, char ch ) noexcept
373 {
374 __m128i const q1 = _mm_set1_epi8( ch );
375
376 __m128i v = _mm_loadu_si128( (__m128i const*)p );
377
378 __m128i w = _mm_cmpeq_epi8( v, q1 );
379
380 int m = _mm_movemask_epi8( w ) ^ 0xFFFF;
381
382 std::size_t n;
383
384 if( m == 0 )
385 {
386 n = 16;
387 }
388 else
389 {
390 #if defined(__GNUC__) || defined(__clang__)
391 n = __builtin_ffs( m ) - 1;
392 #else
393 unsigned long index;
394 _BitScanForward( &index, m );
395 n = index;
396 #endif
397 }
398
399 return n;
400 }
401
402 #else
403
404 // assumes p..p+15
405 inline std::size_t count_leading( char const * p, char ch ) noexcept
406 {
407 std::size_t n = 0;
408
409 for( ; n < 16 && *p == ch; ++p, ++n );
410
411 return n;
412 }
413
414 #endif
415 */
416
417 // count_whitespace
418
419 #ifdef BOOST_JSON_USE_SSE2
420
421 4701384 inline const char* count_whitespace( char const* p, const char* end ) noexcept
422 {
423
2/2
✓ Branch 0 taken 2133873 times.
✓ Branch 1 taken 2567511 times.
4701384 if( p == end )
424 {
425 2133873 return p;
426 }
427
428
2/2
✓ Branch 0 taken 2484718 times.
✓ Branch 1 taken 82793 times.
2567511 if( static_cast<unsigned char>( *p ) > 0x20 )
429 {
430 2484718 return p;
431 }
432
433 82793 __m128i const q1 = _mm_set1_epi8( ' ' );
434 82793 __m128i const q2 = _mm_set1_epi8( '\n' );
435 82793 __m128i const q3 = _mm_set1_epi8( 4 ); // '\t' | 4 == '\r'
436 82793 __m128i const q4 = _mm_set1_epi8( '\r' );
437
438
2/2
✓ Branch 0 taken 105374 times.
✓ Branch 1 taken 77926 times.
183300 while( end - p >= 16 )
439 {
440 105374 __m128i v0 = _mm_loadu_si128( (__m128i const*)p );
441
442 316122 __m128i w0 = _mm_or_si128(
443 _mm_cmpeq_epi8( v0, q1 ),
444 _mm_cmpeq_epi8( v0, q2 ));
445 105374 __m128i v1 = _mm_or_si128( v0, q3 );
446 105374 __m128i w1 = _mm_cmpeq_epi8( v1, q4 );
447 105374 __m128i w2 = _mm_or_si128( w0, w1 );
448
449 105374 int m = _mm_movemask_epi8( w2 ) ^ 0xFFFF;
450
451
2/2
✓ Branch 0 taken 4867 times.
✓ Branch 1 taken 100507 times.
105374 if( m != 0 )
452 {
453 #if defined(__GNUC__) || defined(__clang__)
454 4867 std::size_t c = __builtin_ffs( m ) - 1;
455 #else
456 unsigned long index;
457 _BitScanForward( &index, m );
458 std::size_t c = index;
459 #endif
460
461 4867 p += c;
462 4867 return p;
463 }
464
465 100507 p += 16;
466 }
467
468
2/2
✓ Branch 0 taken 401661 times.
✓ Branch 1 taken 60859 times.
462520 while( p != end )
469 {
470
8/8
✓ Branch 0 taken 21110 times.
✓ Branch 1 taken 380551 times.
✓ Branch 2 taken 19542 times.
✓ Branch 3 taken 1568 times.
✓ Branch 4 taken 18756 times.
✓ Branch 5 taken 786 times.
✓ Branch 6 taken 17067 times.
✓ Branch 7 taken 1689 times.
401661 if( *p != ' ' && *p != '\t' && *p != '\r' && *p != '\n' )
471 {
472 17067 return p;
473 }
474
475 384594 ++p;
476 }
477
478 60859 return p;
479 }
480
481 /*
482
483 // slightly faster on msvc-14.2, slightly slower on clang-win
484
485 inline std::size_t count_whitespace( char const * p, std::size_t n ) noexcept
486 {
487 char const * p0 = p;
488
489 while( n > 0 )
490 {
491 char ch = *p;
492
493 if( ch == '\n' || ch == '\r' )
494 {
495 ++p;
496 --n;
497 continue;
498 }
499
500 if( ch != ' ' && ch != '\t' )
501 {
502 break;
503 }
504
505 ++p;
506 --n;
507
508 while( n >= 16 )
509 {
510 std::size_t n2 = count_leading( p, ch );
511
512 p += n2;
513 n -= n2;
514
515 if( n2 < 16 )
516 {
517 break;
518 }
519 }
520 }
521
522 return p - p0;
523 }
524 */
525
526 #else
527
528 inline const char* count_whitespace( char const* p, const char* end ) noexcept
529 {
530
531 for(; p != end; ++p)
532 {
533 char const c = *p;
534 if( c != ' ' && c != '\n' && c != '\r' && c != '\t' ) break;
535 }
536
537 return p;
538 }
539
540 #endif
541
542 } // detail
543 } // namespace json
544 } // namespace boost
545
546 #endif
547