Line data Source code
1 : //
2 : // Copyright (c) 2019 Vinnie Falco (vinnie.falco@gmail.com)
3 : // Copyright (c) 2020 Krystian Stasiowski (sdkrystian@gmail.com)
4 : //
5 : // Distributed under the Boost Software License, Version 1.0. (See accompanying
6 : // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
7 : //
8 : // Official repository: https://github.com/boostorg/json
9 : //
10 :
11 : #ifndef BOOST_JSON_BASIC_PARSER_HPP
12 : #define BOOST_JSON_BASIC_PARSER_HPP
13 :
14 : #include <boost/json/detail/config.hpp>
15 : #include <boost/json/detail/except.hpp>
16 : #include <boost/json/error.hpp>
17 : #include <boost/json/kind.hpp>
18 : #include <boost/json/parse_options.hpp>
19 : #include <boost/json/detail/stack.hpp>
20 : #include <boost/json/detail/stream.hpp>
21 : #include <boost/json/detail/utf8.hpp>
22 : #include <boost/json/detail/sbo_buffer.hpp>
23 :
24 : namespace boost {
25 : namespace json {
26 :
27 : /** An incremental SAX parser for serialized JSON.
28 :
29 : This implements a SAX-style parser, invoking a
30 : caller-supplied handler with each parsing event.
31 : To use, first declare a variable of type
32 : `basic_parser<T>` where `T` meets the handler
33 : requirements specified below. Then call
34 : @ref write_some one or more times with the input,
35 : setting `more = false` on the final buffer.
36 : The parsing events are realized through member
37 : function calls on the handler, which exists
38 : as a data member of the parser.
39 : \n
40 : The parser may dynamically allocate intermediate
41 : storage as needed to accommodate the nesting level
42 : of the input JSON. On subsequent invocations, the
43 : parser can cheaply re-use this memory, improving
44 : performance. This storage is freed when the
45 : parser is destroyed
46 :
47 : @par Usage
48 :
49 : To get the declaration and function definitions
50 : for this class it is necessary to include this
51 : file instead:
52 : @code
53 : #include <boost/json/basic_parser_impl.hpp>
54 : @endcode
55 :
56 : Users who wish to parse JSON into the DOM container
57 : @ref value will not use this class directly; instead
58 : they will create an instance of @ref parser or
59 : @ref stream_parser and use that instead. Alternatively,
60 : they may call the function @ref parse. This class is
61 : designed for users who wish to perform custom actions
62 : instead of building a @ref value. For example, to
63 : produce a DOM from an external library.
64 : \n
65 : @note
66 :
67 : By default, only conforming JSON using UTF-8
68 : encoding is accepted. However, select non-compliant
69 : syntax can be allowed by construction using a
70 : @ref parse_options set to desired values.
71 :
72 : @par Handler
73 :
74 : The handler provided must be implemented as an
75 : object of class type which defines each of the
76 : required event member functions below. The event
77 : functions return a `bool` where `true` indicates
78 : success, and `false` indicates failure. If the
79 : member function returns `false`, it must set
80 : the error code to a suitable value. This error
81 : code will be returned by the write function to
82 : the caller.
83 : \n
84 : Handlers are required to declare the maximum
85 : limits on various elements. If these limits
86 : are exceeded during parsing, then parsing
87 : fails with an error.
88 : \n
89 : The following declaration meets the parser's
90 : handler requirements:
91 :
92 : @code
93 : struct handler
94 : {
95 : /// The maximum number of elements allowed in an array
96 : static constexpr std::size_t max_array_size = -1;
97 :
98 : /// The maximum number of elements allowed in an object
99 : static constexpr std::size_t max_object_size = -1;
100 :
101 : /// The maximum number of characters allowed in a string
102 : static constexpr std::size_t max_string_size = -1;
103 :
104 : /// The maximum number of characters allowed in a key
105 : static constexpr std::size_t max_key_size = -1;
106 :
107 : /// Called once when the JSON parsing begins.
108 : ///
109 : /// @return `true` on success.
110 : /// @param ec Set to the error, if any occurred.
111 : ///
112 : bool on_document_begin( error_code& ec );
113 :
114 : /// Called when the JSON parsing is done.
115 : ///
116 : /// @return `true` on success.
117 : /// @param ec Set to the error, if any occurred.
118 : ///
119 : bool on_document_end( error_code& ec );
120 :
121 : /// Called when the beginning of an array is encountered.
122 : ///
123 : /// @return `true` on success.
124 : /// @param ec Set to the error, if any occurred.
125 : ///
126 : bool on_array_begin( error_code& ec );
127 :
128 : /// Called when the end of the current array is encountered.
129 : ///
130 : /// @return `true` on success.
131 : /// @param n The number of elements in the array.
132 : /// @param ec Set to the error, if any occurred.
133 : ///
134 : bool on_array_end( std::size_t n, error_code& ec );
135 :
136 : /// Called when the beginning of an object is encountered.
137 : ///
138 : /// @return `true` on success.
139 : /// @param ec Set to the error, if any occurred.
140 : ///
141 : bool on_object_begin( error_code& ec );
142 :
143 : /// Called when the end of the current object is encountered.
144 : ///
145 : /// @return `true` on success.
146 : /// @param n The number of elements in the object.
147 : /// @param ec Set to the error, if any occurred.
148 : ///
149 : bool on_object_end( std::size_t n, error_code& ec );
150 :
151 : /// Called with characters corresponding to part of the current string.
152 : ///
153 : /// @return `true` on success.
154 : /// @param s The partial characters
155 : /// @param n The total size of the string thus far
156 : /// @param ec Set to the error, if any occurred.
157 : ///
158 : bool on_string_part( string_view s, std::size_t n, error_code& ec );
159 :
160 : /// Called with the last characters corresponding to the current string.
161 : ///
162 : /// @return `true` on success.
163 : /// @param s The remaining characters
164 : /// @param n The total size of the string
165 : /// @param ec Set to the error, if any occurred.
166 : ///
167 : bool on_string( string_view s, std::size_t n, error_code& ec );
168 :
169 : /// Called with characters corresponding to part of the current key.
170 : ///
171 : /// @return `true` on success.
172 : /// @param s The partial characters
173 : /// @param n The total size of the key thus far
174 : /// @param ec Set to the error, if any occurred.
175 : ///
176 : bool on_key_part( string_view s, std::size_t n, error_code& ec );
177 :
178 : /// Called with the last characters corresponding to the current key.
179 : ///
180 : /// @return `true` on success.
181 : /// @param s The remaining characters
182 : /// @param n The total size of the key
183 : /// @param ec Set to the error, if any occurred.
184 : ///
185 : bool on_key( string_view s, std::size_t n, error_code& ec );
186 :
187 : /// Called with the characters corresponding to part of the current number.
188 : ///
189 : /// @return `true` on success.
190 : /// @param s The partial characters
191 : /// @param ec Set to the error, if any occurred.
192 : ///
193 : bool on_number_part( string_view s, error_code& ec );
194 :
195 : /// Called when a signed integer is parsed.
196 : ///
197 : /// @return `true` on success.
198 : /// @param i The value
199 : /// @param s The remaining characters
200 : /// @param ec Set to the error, if any occurred.
201 : ///
202 : bool on_int64( int64_t i, string_view s, error_code& ec );
203 :
204 : /// Called when an unsigend integer is parsed.
205 : ///
206 : /// @return `true` on success.
207 : /// @param u The value
208 : /// @param s The remaining characters
209 : /// @param ec Set to the error, if any occurred.
210 : ///
211 : bool on_uint64( uint64_t u, string_view s, error_code& ec );
212 :
213 : /// Called when a double is parsed.
214 : ///
215 : /// @return `true` on success.
216 : /// @param d The value
217 : /// @param s The remaining characters
218 : /// @param ec Set to the error, if any occurred.
219 : ///
220 : bool on_double( double d, string_view s, error_code& ec );
221 :
222 : /// Called when a boolean is parsed.
223 : ///
224 : /// @return `true` on success.
225 : /// @param b The value
226 : /// @param s The remaining characters
227 : /// @param ec Set to the error, if any occurred.
228 : ///
229 : bool on_bool( bool b, error_code& ec );
230 :
231 : /// Called when a null is parsed.
232 : ///
233 : /// @return `true` on success.
234 : /// @param ec Set to the error, if any occurred.
235 : ///
236 : bool on_null( error_code& ec );
237 :
238 : /// Called with characters corresponding to part of the current comment.
239 : ///
240 : /// @return `true` on success.
241 : /// @param s The partial characters.
242 : /// @param ec Set to the error, if any occurred.
243 : ///
244 : bool on_comment_part( string_view s, error_code& ec );
245 :
246 : /// Called with the last characters corresponding to the current comment.
247 : ///
248 : /// @return `true` on success.
249 : /// @param s The remaining characters
250 : /// @param ec Set to the error, if any occurred.
251 : ///
252 : bool on_comment( string_view s, error_code& ec );
253 : };
254 : @endcode
255 :
256 : @see
257 : @ref parse,
258 : @ref stream_parser,
259 : [Validating parser example](../../doc/html/json/examples.html#json.examples.validate).
260 :
261 : @headerfile <boost/json/basic_parser.hpp>
262 : */
263 : template<class Handler>
264 : class basic_parser
265 : {
266 : enum class state : char
267 : {
268 : doc1, doc3,
269 : com1, com2, com3, com4,
270 : lit1,
271 : str1, str2, str3, str4,
272 : str5, str6, str7, str8,
273 : sur1, sur2, sur3,
274 : sur4, sur5, sur6,
275 : obj1, obj2, obj3, obj4,
276 : obj5, obj6, obj7, obj8,
277 : obj9, obj10, obj11,
278 : arr1, arr2, arr3,
279 : arr4, arr5, arr6,
280 : num1, num2, num3, num4,
281 : num5, num6, num7, num8,
282 : exp1, exp2, exp3,
283 : val1, val2, val3
284 : };
285 :
286 : struct number
287 : {
288 : uint64_t mant;
289 : int bias;
290 : int exp;
291 : bool frac;
292 : bool neg;
293 : };
294 :
295 : template< bool StackEmpty_, char First_ >
296 : struct parse_number_helper;
297 :
298 : // optimization: must come first
299 : Handler h_;
300 :
301 : number num_;
302 : system::error_code ec_;
303 : detail::stack st_;
304 : detail::utf8_sequence seq_;
305 : unsigned u1_;
306 : unsigned u2_;
307 : bool more_; // false for final buffer
308 : bool done_ = false; // true on complete parse
309 : bool clean_ = true; // write_some exited cleanly
310 : const char* end_;
311 : detail::sbo_buffer<16 + 16 + 1 + 1> num_buf_;
312 : parse_options opt_;
313 : // how many levels deeper the parser can go
314 : std::size_t depth_ = opt_.max_depth;
315 : unsigned char cur_lit_ = 0;
316 : unsigned char lit_offset_ = 0;
317 :
318 : inline void reserve();
319 : inline const char* sentinel();
320 : inline bool incomplete(
321 : const detail::const_stream_wrapper& cs);
322 :
323 : #ifdef __INTEL_COMPILER
324 : #pragma warning push
325 : #pragma warning disable 2196
326 : #endif
327 :
328 : BOOST_NOINLINE
329 : inline
330 : const char*
331 : suspend_or_fail(state st);
332 :
333 : BOOST_NOINLINE
334 : inline
335 : const char*
336 : suspend_or_fail(
337 : state st,
338 : std::size_t n);
339 :
340 : BOOST_NOINLINE
341 : inline
342 : const char*
343 : fail(const char* p) noexcept;
344 :
345 : BOOST_NOINLINE
346 : inline
347 : const char*
348 : fail(
349 : const char* p,
350 : error ev,
351 : source_location const* loc) noexcept;
352 :
353 : BOOST_NOINLINE
354 : inline
355 : const char*
356 : maybe_suspend(
357 : const char* p,
358 : state st);
359 :
360 : BOOST_NOINLINE
361 : inline
362 : const char*
363 : maybe_suspend(
364 : const char* p,
365 : state st,
366 : std::size_t n);
367 :
368 : BOOST_NOINLINE
369 : inline
370 : const char*
371 : maybe_suspend(
372 : const char* p,
373 : state st,
374 : const number& num);
375 :
376 : BOOST_NOINLINE
377 : inline
378 : const char*
379 : suspend(
380 : const char* p,
381 : state st);
382 :
383 : BOOST_NOINLINE
384 : inline
385 : const char*
386 : suspend(
387 : const char* p,
388 : state st,
389 : const number& num);
390 :
391 : #ifdef __INTEL_COMPILER
392 : #pragma warning pop
393 : #endif
394 :
395 : template<bool StackEmpty_/*, bool Terminal_*/>
396 : const char* parse_comment(const char* p,
397 : std::integral_constant<bool, StackEmpty_> stack_empty,
398 : /*std::integral_constant<bool, Terminal_>*/ bool terminal);
399 :
400 : template<bool StackEmpty_>
401 : const char* parse_document(const char* p,
402 : std::integral_constant<bool, StackEmpty_> stack_empty);
403 :
404 : template<bool StackEmpty_, bool AllowComments_/*,
405 : bool AllowTrailing_, bool AllowBadUTF8_*/>
406 : const char* parse_value(const char* p,
407 : std::integral_constant<bool, StackEmpty_> stack_empty,
408 : std::integral_constant<bool, AllowComments_> allow_comments,
409 : /*std::integral_constant<bool, AllowTrailing_>*/ bool allow_trailing,
410 : /*std::integral_constant<bool, AllowBadUTF8_>*/ bool allow_bad_utf8,
411 : bool allow_bad_utf16);
412 :
413 : template<bool AllowComments_/*,
414 : bool AllowTrailing_, bool AllowBadUTF8_*/>
415 : const char* resume_value(const char* p,
416 : std::integral_constant<bool, AllowComments_> allow_comments,
417 : /*std::integral_constant<bool, AllowTrailing_>*/ bool allow_trailing,
418 : /*std::integral_constant<bool, AllowBadUTF8_>*/ bool allow_bad_utf8,
419 : bool allow_bad_utf16);
420 :
421 : template<bool StackEmpty_, bool AllowComments_/*,
422 : bool AllowTrailing_, bool AllowBadUTF8_*/>
423 : const char* parse_object(const char* p,
424 : std::integral_constant<bool, StackEmpty_> stack_empty,
425 : std::integral_constant<bool, AllowComments_> allow_comments,
426 : /*std::integral_constant<bool, AllowTrailing_>*/ bool allow_trailing,
427 : /*std::integral_constant<bool, AllowBadUTF8_>*/ bool allow_bad_utf8,
428 : bool allow_bad_utf16);
429 :
430 : template<bool StackEmpty_, bool AllowComments_/*,
431 : bool AllowTrailing_, bool AllowBadUTF8_*/>
432 : const char* parse_array(const char* p,
433 : std::integral_constant<bool, StackEmpty_> stack_empty,
434 : std::integral_constant<bool, AllowComments_> allow_comments,
435 : /*std::integral_constant<bool, AllowTrailing_>*/ bool allow_trailing,
436 : /*std::integral_constant<bool, AllowBadUTF8_>*/ bool allow_bad_utf8,
437 : bool allow_bad_utf16);
438 :
439 : template<class Literal>
440 : const char* parse_literal(const char* p, Literal literal);
441 :
442 : template<bool StackEmpty_, bool IsKey_>
443 : const char* parse_string(const char* p,
444 : std::integral_constant<bool, StackEmpty_> stack_empty,
445 : std::integral_constant<bool, IsKey_> is_key,
446 : bool allow_bad_utf8,
447 : bool allow_bad_utf16);
448 :
449 : template<bool StackEmpty_>
450 : const char* parse_escaped(
451 : const char* p,
452 : std::size_t& total,
453 : std::integral_constant<bool, StackEmpty_> stack_empty,
454 : bool is_key,
455 : bool allow_bad_utf16);
456 :
457 : template<bool StackEmpty_, char First_, number_precision Numbers_>
458 : const char* parse_number(const char* p,
459 : std::integral_constant<bool, StackEmpty_> stack_empty,
460 : std::integral_constant<char, First_> first,
461 : std::integral_constant<number_precision, Numbers_> numbers);
462 :
463 : // intentionally private
464 : std::size_t
465 173315 : depth() const noexcept
466 : {
467 173315 : return opt_.max_depth - depth_;
468 : }
469 :
470 : public:
471 : /// Copy constructor (deleted)
472 : basic_parser(
473 : basic_parser const&) = delete;
474 :
475 : /// Copy assignment (deleted)
476 : basic_parser& operator=(
477 : basic_parser const&) = delete;
478 :
479 : /** Destructor.
480 :
481 : All dynamically allocated internal memory is freed.
482 :
483 : @par Effects
484 : @code
485 : this->handler().~Handler()
486 : @endcode
487 :
488 : @par Complexity
489 : Same as `~Handler()`.
490 :
491 : @par Exception Safety
492 : Same as `~Handler()`.
493 : */
494 2164639 : ~basic_parser() = default;
495 :
496 : /** Constructor.
497 :
498 : This function constructs the parser with
499 : the specified options, with any additional
500 : arguments forwarded to the handler's constructor.
501 :
502 : @par Complexity
503 : Same as `Handler( std::forward< Args >( args )... )`.
504 :
505 : @par Exception Safety
506 : Same as `Handler( std::forward< Args >( args )... )`.
507 :
508 : @param opt Configuration settings for the parser.
509 : If this structure is default constructed, the
510 : parser will accept only standard JSON.
511 :
512 : @param args Optional additional arguments
513 : forwarded to the handler's constructor.
514 : */
515 : template<class... Args>
516 : explicit
517 : basic_parser(
518 : parse_options const& opt,
519 : Args&&... args);
520 :
521 : /** Return a reference to the handler.
522 :
523 : This function provides access to the constructed
524 : instance of the handler owned by the parser.
525 :
526 : @par Complexity
527 : Constant.
528 :
529 : @par Exception Safety
530 : No-throw guarantee.
531 : */
532 : Handler&
533 6310634 : handler() noexcept
534 : {
535 6310634 : return h_;
536 : }
537 :
538 : /** Return a reference to the handler.
539 :
540 : This function provides access to the constructed
541 : instance of the handler owned by the parser.
542 :
543 : @par Complexity
544 : Constant.
545 :
546 : @par Exception Safety
547 : No-throw guarantee.
548 : */
549 : Handler const&
550 24 : handler() const noexcept
551 : {
552 24 : return h_;
553 : }
554 :
555 : /** Return the last error.
556 :
557 : This returns the last error code which
558 : was generated in the most recent call
559 : to @ref write_some.
560 :
561 : @par Complexity
562 : Constant.
563 :
564 : @par Exception Safety
565 : No-throw guarantee.
566 : */
567 : system::error_code
568 8 : last_error() const noexcept
569 : {
570 8 : return ec_;
571 : }
572 :
573 : /** Return true if a complete JSON has been parsed.
574 :
575 : This function returns `true` when all of these
576 : conditions are met:
577 :
578 : @li A complete serialized JSON has been
579 : presented to the parser, and
580 :
581 : @li No error or exception has occurred since the
582 : parser was constructed, or since the last call
583 : to @ref reset,
584 :
585 : @par Complexity
586 : Constant.
587 :
588 : @par Exception Safety
589 : No-throw guarantee.
590 : */
591 : bool
592 4078231 : done() const noexcept
593 : {
594 4078231 : return done_;
595 : }
596 :
597 : /** Reset the state, to parse a new document.
598 :
599 : This function discards the current parsing
600 : state, to prepare for parsing a new document.
601 : Dynamically allocated temporary memory used
602 : by the implementation is not deallocated.
603 :
604 : @par Complexity
605 : Constant.
606 :
607 : @par Exception Safety
608 : No-throw guarantee.
609 : */
610 : void
611 : reset() noexcept;
612 :
613 : /** Indicate a parsing failure.
614 :
615 : This changes the state of the parser to indicate
616 : that the parse has failed. A parser implementation
617 : can use this to fail the parser if needed due to
618 : external inputs.
619 :
620 : @note
621 :
622 : If `!ec`, the stored error code is unspecified.
623 :
624 : @par Complexity
625 : Constant.
626 :
627 : @par Exception Safety
628 : No-throw guarantee.
629 :
630 : @param ec The error code to set. If the code does
631 : not indicate failure, an implementation-defined
632 : error code that indicates failure will be stored
633 : instead.
634 : */
635 : void
636 : fail(system::error_code ec) noexcept;
637 :
638 : /** Parse some of an input string as JSON, incrementally.
639 :
640 : This function parses the JSON in the specified
641 : buffer, calling the handler to emit each SAX
642 : parsing event. The parse proceeds from the
643 : current state, which is at the beginning of a
644 : new JSON or in the middle of the current JSON
645 : if any characters were already parsed.
646 : \n
647 : The characters in the buffer are processed
648 : starting from the beginning, until one of the
649 : following conditions is met:
650 :
651 : @li All of the characters in the buffer
652 : have been parsed, or
653 :
654 : @li Some of the characters in the buffer
655 : have been parsed and the JSON is complete, or
656 :
657 : @li A parsing error occurs.
658 :
659 : The supplied buffer does not need to contain the
660 : entire JSON. Subsequent calls can provide more
661 : serialized data, allowing JSON to be processed
662 : incrementally. The end of the serialized JSON
663 : can be indicated by passing `more = false`.
664 :
665 : @par Complexity
666 : Linear in `size`.
667 :
668 : @par Exception Safety
669 : Basic guarantee.
670 : Calls to the handler may throw.
671 : Upon error or exception, subsequent calls will
672 : fail until @ref reset is called to parse a new JSON.
673 :
674 : @return The number of characters successfully
675 : parsed, which may be smaller than `size`.
676 :
677 : @param more `true` if there are possibly more
678 : buffers in the current JSON, otherwise `false`.
679 :
680 : @param data A pointer to a buffer of `size`
681 : characters to parse.
682 :
683 : @param size The number of characters pointed to
684 : by `data`.
685 :
686 : @param ec Set to the error, if any occurred.
687 : */
688 : /** @{ */
689 : std::size_t
690 : write_some(
691 : bool more,
692 : char const* data,
693 : std::size_t size,
694 : system::error_code& ec);
695 :
696 : std::size_t
697 : write_some(
698 : bool more,
699 : char const* data,
700 : std::size_t size,
701 : std::error_code& ec);
702 : /** @} */
703 : };
704 :
705 : } // namespace json
706 : } // namespace boost
707 :
708 : #endif
|