GCC Code Coverage Report


Directory: libs/json/include/boost/json/
File: detail/utf8.hpp
Date: 2025-12-23 17:20:53
Exec Total Coverage
Lines: 62 62 100.0%
Functions: 13 13 100.0%
Branches: 16 18 88.9%

Line Branch Exec Source
1 //
2 // Copyright (c) 2020 Krystian Stasiowski (sdkrystian@gmail.com)
3 //
4 // Distributed under the Boost Software License, Version 1.0. (See accompanying
5 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6 //
7 // Official repository: https://github.com/boostorg/json
8 //
9
10 #ifndef BOOST_JSON_DETAIL_UTF8_HPP
11 #define BOOST_JSON_DETAIL_UTF8_HPP
12
13 #include <boost/endian/conversion.hpp>
14 #include <boost/json/detail/config.hpp>
15
16 #include <cstddef>
17 #include <cstring>
18 #include <cstdint>
19
20 namespace boost {
21 namespace json {
22 namespace detail {
23
24 template<int N>
25 std::uint32_t
26 43466 load_little_endian(void const* p)
27 {
28 43466 std::uint32_t v = 0;
29 43466 std::memcpy(&v, p, N);
30 43466 endian::little_to_native_inplace(v);
31 43466 return v;
32 }
33
34 inline
35 uint16_t
36 16690 classify_utf8(char c)
37 {
38 // 0x000 = invalid
39 // 0x102 = 2 bytes, second byte [80, BF]
40 // 0x203 = 3 bytes, second byte [A0, BF]
41 // 0x303 = 3 bytes, second byte [80, BF]
42 // 0x403 = 3 bytes, second byte [80, 9F]
43 // 0x504 = 4 bytes, second byte [90, BF]
44 // 0x604 = 4 bytes, second byte [80, BF]
45 // 0x704 = 4 bytes, second byte [80, 8F]
46 static constexpr uint16_t first[128]
47 {
48 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
49 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
50 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
51 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
52 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
53 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
54 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
55 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
56
57 0x000, 0x000, 0x102, 0x102, 0x102, 0x102, 0x102, 0x102,
58 0x102, 0x102, 0x102, 0x102, 0x102, 0x102, 0x102, 0x102,
59 0x102, 0x102, 0x102, 0x102, 0x102, 0x102, 0x102, 0x102,
60 0x102, 0x102, 0x102, 0x102, 0x102, 0x102, 0x102, 0x102,
61 0x203, 0x303, 0x303, 0x303, 0x303, 0x303, 0x303, 0x303,
62 0x303, 0x303, 0x303, 0x303, 0x303, 0x403, 0x303, 0x303,
63 0x504, 0x604, 0x604, 0x604, 0x704, 0x000, 0x000, 0x000,
64 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
65 };
66 16690 return first[static_cast<unsigned char>(c & 0x7F)];
67 }
68
69 inline
70 bool
71 13177 is_valid_utf8(const char* p, uint16_t first)
72 {
73 uint32_t v;
74
8/8
✓ Branch 0 taken 362 times.
✓ Branch 1 taken 2348 times.
✓ Branch 2 taken 665 times.
✓ Branch 3 taken 3882 times.
✓ Branch 4 taken 725 times.
✓ Branch 5 taken 1310 times.
✓ Branch 6 taken 2346 times.
✓ Branch 7 taken 1539 times.
13177 switch(first >> 8)
75 {
76 362 default:
77 362 return false;
78
79 // 2 bytes, second byte [80, BF]
80 2348 case 1:
81 2348 v = load_little_endian<2>(p);
82 2348 return (v & 0xC000) == 0x8000;
83
84 // 3 bytes, second byte [A0, BF]
85 665 case 2:
86 665 v = load_little_endian<3>(p);
87 665 return (v & 0xC0E000) == 0x80A000;
88
89 // 3 bytes, second byte [80, BF]
90 3882 case 3:
91 3882 v = load_little_endian<3>(p);
92 3882 return (v & 0xC0C000) == 0x808000;
93
94 // 3 bytes, second byte [80, 9F]
95 725 case 4:
96 725 v = load_little_endian<3>(p);
97 725 return (v & 0xC0E000) == 0x808000;
98
99 // 4 bytes, second byte [90, BF]
100 1310 case 5:
101 1310 v = load_little_endian<4>(p);
102 1310 return (v & 0xC0C0FF00) + 0x7F7F7000 <= 0x2F00;
103
104 // 4 bytes, second byte [80, BF]
105 2346 case 6:
106 2346 v = load_little_endian<4>(p);
107 2346 return (v & 0xC0C0C000) == 0x80808000;
108
109 // 4 bytes, second byte [80, 8F]
110 1539 case 7:
111 1539 v = load_little_endian<4>(p);
112 1539 return (v & 0xC0C0F000) == 0x80808000;
113 }
114 }
115
116 class utf8_sequence
117 {
118 char seq_[4];
119 uint16_t first_;
120 uint8_t size_;
121
122 public:
123 void
124 3466 save(
125 const char* p,
126 std::size_t remain) noexcept
127 {
128 3466 first_ = classify_utf8(*p );
129
2/2
✓ Branch 1 taken 1560 times.
✓ Branch 2 taken 1906 times.
3466 if(remain >= length())
130 1560 size_ = length();
131 else
132 1906 size_ = static_cast<uint8_t>(remain);
133 3466 std::memcpy(seq_, p, size_);
134 3466 }
135
136 uint8_t
137 21338 length() const noexcept
138 {
139 21338 return first_ & 0xFF;
140 }
141
142 bool
143 3469 complete() const noexcept
144 {
145 3469 return size_ >= length();
146 }
147
148 // returns true if complete
149 bool
150 1864 append(
151 const char* p,
152 std::size_t remain) noexcept
153 {
154
2/2
✓ Branch 1 taken 1 times.
✓ Branch 2 taken 1863 times.
1864 if(BOOST_JSON_UNLIKELY(needed() == 0))
155 1 return true;
156
2/2
✓ Branch 1 taken 1862 times.
✓ Branch 2 taken 1 times.
1863 if(BOOST_JSON_LIKELY(remain >= needed()))
157 {
158 1862 std::memcpy(
159 1862 seq_ + size_, p, needed());
160 1862 size_ = length();
161 1862 return true;
162 }
163
1/2
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
1 if(BOOST_JSON_LIKELY(remain > 0))
164 {
165 1 std::memcpy(seq_ + size_, p, remain);
166 1 size_ += static_cast<uint8_t>(remain);
167 }
168 1 return false;
169 }
170
171 const char*
172 1658 data() const noexcept
173 {
174 1658 return seq_;
175 }
176
177 uint8_t
178 7457 needed() const noexcept
179 {
180 7457 return length() - size_;
181 }
182
183 bool
184 1866 valid() const noexcept
185 {
186
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 1866 times.
1866 BOOST_ASSERT(size_ >= length());
187 1866 return is_valid_utf8(seq_, first_);
188 }
189 };
190
191 } // detail
192 } // namespace json
193 } // namespace boost
194
195 #endif
196