GCC Code Coverage Report


Directory: libs/json/include/boost/json/
File: detail/ryu/detail/d2s_intrinsics.hpp
Date: 2025-12-23 17:20:53
Exec Total Coverage
Lines: 28 28 100.0%
Functions: 9 9 100.0%
Branches: 4 6 66.7%

Line Branch Exec Source
1 // Copyright 2018 Ulf Adams
2 //
3 // The contents of this file may be used under the terms of the Apache License,
4 // Version 2.0.
5 //
6 // (See accompanying file LICENSE-Apache or copy at
7 // http://www.apache.org/licenses/LICENSE-2.0)
8 //
9 // Alternatively, the contents of this file may be used under the terms of
10 // the Boost Software License, Version 1.0.
11 // (See accompanying file LICENSE-Boost or copy at
12 // https://www.boost.org/LICENSE_1_0.txt)
13 //
14 // Unless required by applicable law or agreed to in writing, this software
15 // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 // KIND, either express or implied.
17
18 /*
19 This is a derivative work
20 */
21
22 #ifndef BOOST_JSON_DETAIL_RYU_DETAIL_D2S_INTRINSICS_HPP
23 #define BOOST_JSON_DETAIL_RYU_DETAIL_D2S_INTRINSICS_HPP
24
25 #include <boost/json/detail/config.hpp>
26
27 // This sets BOOST_JSON_RYU_32_BIT_PLATFORM as a side effect if applicable.
28 #include <boost/json/detail/ryu/detail/common.hpp>
29
30 #if defined(BOOST_JSON_RYU_HAS_64_BIT_INTRINSICS)
31 #include <intrin.h>
32 #endif
33
34 namespace boost {
35 namespace json {
36 namespace detail {
37
38 namespace ryu {
39 namespace detail {
40
41 #if defined(BOOST_JSON_RYU_HAS_64_BIT_INTRINSICS)
42
43 inline uint64_t umul128(const uint64_t a, const uint64_t b, uint64_t* const productHi) {
44 return _umul128(a, b, productHi);
45 }
46
47 inline uint64_t shiftright128(const uint64_t lo, const uint64_t hi, const uint32_t dist) {
48 // For the __shiftright128 intrinsic, the shift value is always
49 // modulo 64.
50 // In the current implementation of the double-precision version
51 // of Ryu, the shift value is always < 64. (In the case
52 // RYU_OPTIMIZE_SIZE == 0, the shift value is in the range [49, 58].
53 // Otherwise in the range [2, 59].)
54 // Check this here in case a future change requires larger shift
55 // values. In this case this function needs to be adjusted.
56 BOOST_ASSERT(dist < 64);
57 return __shiftright128(lo, hi, (unsigned char) dist);
58 }
59
60 #else // defined(HAS_64_BIT_INTRINSICS)
61
62 inline uint64_t umul128(const uint64_t a, const uint64_t b, uint64_t* const productHi) {
63 // The casts here help MSVC to avoid calls to the __allmul library function.
64 const uint32_t aLo = (uint32_t)a;
65 const uint32_t aHi = (uint32_t)(a >> 32);
66 const uint32_t bLo = (uint32_t)b;
67 const uint32_t bHi = (uint32_t)(b >> 32);
68
69 const uint64_t b00 = (uint64_t)aLo * bLo;
70 const uint64_t b01 = (uint64_t)aLo * bHi;
71 const uint64_t b10 = (uint64_t)aHi * bLo;
72 const uint64_t b11 = (uint64_t)aHi * bHi;
73
74 const uint32_t b00Lo = (uint32_t)b00;
75 const uint32_t b00Hi = (uint32_t)(b00 >> 32);
76
77 const uint64_t mid1 = b10 + b00Hi;
78 const uint32_t mid1Lo = (uint32_t)(mid1);
79 const uint32_t mid1Hi = (uint32_t)(mid1 >> 32);
80
81 const uint64_t mid2 = b01 + mid1Lo;
82 const uint32_t mid2Lo = (uint32_t)(mid2);
83 const uint32_t mid2Hi = (uint32_t)(mid2 >> 32);
84
85 const uint64_t pHi = b11 + mid1Hi + mid2Hi;
86 const uint64_t pLo = ((uint64_t)mid2Lo << 32) | b00Lo;
87
88 *productHi = pHi;
89 return pLo;
90 }
91
92 inline uint64_t shiftright128(const uint64_t lo, const uint64_t hi, const uint32_t dist) {
93 // We don't need to handle the case dist >= 64 here (see above).
94 BOOST_ASSERT(dist < 64);
95 #if defined(RYU_OPTIMIZE_SIZE) || !defined(RYU_32_BIT_PLATFORM)
96 BOOST_ASSERT(dist > 0);
97 return (hi << (64 - dist)) | (lo >> dist);
98 #else
99 // Avoid a 64-bit shift by taking advantage of the range of shift values.
100 BOOST_ASSERT(dist >= 32);
101 return (hi << (64 - dist)) | ((uint32_t)(lo >> 32) >> (dist - 32));
102 #endif
103 }
104
105 #endif // defined(HAS_64_BIT_INTRINSICS)
106
107 #ifdef RYU_32_BIT_PLATFORM
108
109 // Returns the high 64 bits of the 128-bit product of a and b.
110 inline uint64_t umulh(const uint64_t a, const uint64_t b) {
111 // Reuse the umul128 implementation.
112 // Optimizers will likely eliminate the instructions used to compute the
113 // low part of the product.
114 uint64_t hi;
115 umul128(a, b, &hi);
116 return hi;
117 }
118
119 // On 32-bit platforms, compilers typically generate calls to library
120 // functions for 64-bit divisions, even if the divisor is a constant.
121 //
122 // E.g.:
123 // https://bugs.llvm.org/show_bug.cgi?id=37932
124 // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=17958
125 // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=37443
126 //
127 // The functions here perform division-by-constant using multiplications
128 // in the same way as 64-bit compilers would do.
129 //
130 // NB:
131 // The multipliers and shift values are the ones generated by clang x64
132 // for expressions like x/5, x/10, etc.
133
134 inline uint64_t div5(const uint64_t x) {
135 return umulh(x, 0xCCCCCCCCCCCCCCCDu) >> 2;
136 }
137
138 inline uint64_t div10(const uint64_t x) {
139 return umulh(x, 0xCCCCCCCCCCCCCCCDu) >> 3;
140 }
141
142 inline uint64_t div100(const uint64_t x) {
143 return umulh(x >> 2, 0x28F5C28F5C28F5C3u) >> 2;
144 }
145
146 inline uint64_t div1e8(const uint64_t x) {
147 return umulh(x, 0xABCC77118461CEFDu) >> 26;
148 }
149
150 inline uint64_t div1e9(const uint64_t x) {
151 return umulh(x >> 9, 0x44B82FA09B5A53u) >> 11;
152 }
153
154 inline uint32_t mod1e9(const uint64_t x) {
155 // Avoid 64-bit math as much as possible.
156 // Returning (uint32_t) (x - 1000000000 * div1e9(x)) would
157 // perform 32x64-bit multiplication and 64-bit subtraction.
158 // x and 1000000000 * div1e9(x) are guaranteed to differ by
159 // less than 10^9, so their highest 32 bits must be identical,
160 // so we can truncate both sides to uint32_t before subtracting.
161 // We can also simplify (uint32_t) (1000000000 * div1e9(x)).
162 // We can truncate before multiplying instead of after, as multiplying
163 // the highest 32 bits of div1e9(x) can't affect the lowest 32 bits.
164 return ((uint32_t) x) - 1000000000 * ((uint32_t) div1e9(x));
165 }
166
167 #else // RYU_32_BIT_PLATFORM
168
169 1974 inline uint64_t div5(const uint64_t x) {
170 1974 return x / 5;
171 }
172
173 12198 inline uint64_t div10(const uint64_t x) {
174 12198 return x / 10;
175 }
176
177 497 inline uint64_t div100(const uint64_t x) {
178 497 return x / 100;
179 }
180
181 59 inline uint64_t div1e8(const uint64_t x) {
182 59 return x / 100000000;
183 }
184
185 17 inline uint64_t div1e9(const uint64_t x) {
186 17 return x / 1000000000;
187 }
188
189 17 inline uint32_t mod1e9(const uint64_t x) {
190 17 return (uint32_t) (x - 1000000000 * div1e9(x));
191 }
192
193 #endif // RYU_32_BIT_PLATFORM
194
195 114 inline uint32_t pow5Factor(uint64_t value) {
196 114 uint32_t count = 0;
197 for (;;) {
198
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1860 times.
1860 BOOST_ASSERT(value != 0);
199 1860 const uint64_t q = div5(value);
200 1860 const uint32_t r = ((uint32_t) value) - 5 * ((uint32_t) q);
201
2/2
✓ Branch 0 taken 114 times.
✓ Branch 1 taken 1746 times.
1860 if (r != 0) {
202 114 break;
203 }
204 1746 value = q;
205 1746 ++count;
206 1746 }
207 114 return count;
208 }
209
210 // Returns true if value is divisible by 5^p.
211 114 inline bool multipleOfPowerOf5(const uint64_t value, const uint32_t p) {
212 // I tried a case distinction on p, but there was no performance difference.
213 114 return pow5Factor(value) >= p;
214 }
215
216 // Returns true if value is divisible by 2^p.
217 96 inline bool multipleOfPowerOf2(const uint64_t value, const uint32_t p) {
218
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 96 times.
96 BOOST_ASSERT(value != 0);
219 // return __builtin_ctzll(value) >= p;
220 96 return (value & ((1ull << p) - 1)) == 0;
221 }
222
223
224 } // detail
225 } // ryu
226
227 } // detail
228 } // namespace json
229 } // namespace boost
230
231 #endif
232