From 7dfb40cb585829d8d167f0819a19d947b47b6a07 Mon Sep 17 00:00:00 2001 From: Sadie Powell Date: Tue, 26 Aug 2025 18:06:48 +0100 Subject: [PATCH] Update yyjson (for real this time). --- vendor/yyjson/yyjson.c | 8139 +++++++++++++++++++++------------------- vendor/yyjson/yyjson.h | 290 +- 2 files changed, 4370 insertions(+), 4059 deletions(-) diff --git a/vendor/yyjson/yyjson.c b/vendor/yyjson/yyjson.c index 20c3050ae..c16d92581 100644 --- a/vendor/yyjson/yyjson.c +++ b/vendor/yyjson/yyjson.c @@ -26,7 +26,7 @@ /*============================================================================== - * Warning Suppress + * MARK: - Warning Suppress (Private) *============================================================================*/ #if defined(__clang__) @@ -52,7 +52,7 @@ /*============================================================================== - * Version + * MARK: - Version (Public) *============================================================================*/ uint32_t yyjson_version(void) { @@ -62,7 +62,7 @@ uint32_t yyjson_version(void) { /*============================================================================== - * Flags + * MARK: - Flags (Private) *============================================================================*/ /* msvc intrinsic */ @@ -129,6 +129,8 @@ uint32_t yyjson_version(void) { # define YYJSON_HAS_IEEE_754 1 #else # define YYJSON_HAS_IEEE_754 0 +# undef YYJSON_DISABLE_FAST_FP_CONV +# define YYJSON_DISABLE_FAST_FP_CONV 1 #endif /* @@ -185,7 +187,14 @@ uint32_t yyjson_version(void) { # define YYJSON_DOUBLE_MATH_CORRECT 1 #endif -/* endian */ +/* + Detect the endianness at compile-time. + YYJSON_ENDIAN == YYJSON_BIG_ENDIAN + YYJSON_ENDIAN == YYJSON_LITTLE_ENDIAN + */ +#define YYJSON_BIG_ENDIAN 4321 +#define YYJSON_LITTLE_ENDIAN 1234 + #if yyjson_has_include() # include /* POSIX */ #endif @@ -197,23 +206,18 @@ uint32_t yyjson_version(void) { # include /* BSD, Darwin */ #endif -#define YYJSON_BIG_ENDIAN 4321 -#define YYJSON_LITTLE_ENDIAN 1234 - #if defined(BYTE_ORDER) && BYTE_ORDER # if defined(BIG_ENDIAN) && (BYTE_ORDER == BIG_ENDIAN) # define YYJSON_ENDIAN YYJSON_BIG_ENDIAN # elif defined(LITTLE_ENDIAN) && (BYTE_ORDER == LITTLE_ENDIAN) # define YYJSON_ENDIAN YYJSON_LITTLE_ENDIAN # endif - #elif defined(__BYTE_ORDER) && __BYTE_ORDER # if defined(__BIG_ENDIAN) && (__BYTE_ORDER == __BIG_ENDIAN) # define YYJSON_ENDIAN YYJSON_BIG_ENDIAN # elif defined(__LITTLE_ENDIAN) && (__BYTE_ORDER == __LITTLE_ENDIAN) # define YYJSON_ENDIAN YYJSON_LITTLE_ENDIAN # endif - #elif defined(__BYTE_ORDER__) && __BYTE_ORDER__ # if defined(__ORDER_BIG_ENDIAN__) && \ (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) @@ -222,7 +226,6 @@ uint32_t yyjson_version(void) { (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) # define YYJSON_ENDIAN YYJSON_LITTLE_ENDIAN # endif - #elif (defined(__LITTLE_ENDIAN__) && __LITTLE_ENDIAN__ == 1) || \ defined(__i386) || defined(__i386__) || \ defined(_X86_) || defined(__X86__) || \ @@ -236,13 +239,11 @@ uint32_t yyjson_version(void) { defined(__EMSCRIPTEN__) || defined(__wasm__) || \ defined(__loongarch__) # define YYJSON_ENDIAN YYJSON_LITTLE_ENDIAN - #elif (defined(__BIG_ENDIAN__) && __BIG_ENDIAN__ == 1) || \ defined(__ARMEB__) || defined(__THUMBEB__) || defined(__AARCH64EB__) || \ defined(_MIPSEB) || defined(__MIPSEB) || defined(__MIPSEB__) || \ defined(__or1k__) || defined(__OR1K__) # define YYJSON_ENDIAN YYJSON_BIG_ENDIAN - #else # define YYJSON_ENDIAN 0 /* unknown endian, detect at run-time */ #endif @@ -341,12 +342,11 @@ uint32_t yyjson_version(void) { /*============================================================================== - * Macros + * MARK: - Macros (Private) *============================================================================*/ /* Macros used for loop unrolling and other purpose. */ #define repeat2(x) { x x } -#define repeat3(x) { x x x } #define repeat4(x) { x x x x } #define repeat8(x) { x x x x x x x x } #define repeat16(x) { x x x x x x x x x x x x x x x x } @@ -387,13 +387,40 @@ uint32_t yyjson_version(void) { /* Used to cast away (remove) const qualifier. */ #define constcast(type) (type)(void *)(size_t)(const void *) +/* + Compiler barriers for single variables. + + These macros inform GCC that a read or write access to the given memory + location will occur, preventing certain compiler optimizations or reordering + around the access to 'val'. They do not emit any actual instructions. + + This is useful when GCC's default optimization strategies are suboptimal and + precise control over memory access patterns is required. + These barriers are not needed when using Clang or MSVC. + */ +#if YYJSON_IS_REAL_GCC +# define gcc_load_barrier(val) __asm__ volatile(""::"m"(val)) +# define gcc_store_barrier(val) __asm__ volatile("":"=m"(val)) +# define gcc_full_barrier(val) __asm__ volatile("":"=m"(val):"m"(val)) +#else +# define gcc_load_barrier(val) +# define gcc_store_barrier(val) +# define gcc_full_barrier(val) +#endif + + + +/*============================================================================== + * MARK: - Constants (Private) + *============================================================================*/ + /* Common error messages. */ #define MSG_FOPEN "failed to open file" #define MSG_FREAD "failed to read file" #define MSG_FWRITE "failed to write file" #define MSG_FCLOSE "failed to close file" #define MSG_MALLOC "failed to allocate memory" -#define MSG_CHAT_T "invalid literal, expected 'true'" +#define MSG_CHAR_T "invalid literal, expected 'true'" #define MSG_CHAR_F "invalid literal, expected 'false'" #define MSG_CHAR_N "invalid literal, expected 'null'" #define MSG_CHAR "unexpected character, expected a JSON value" @@ -405,46 +432,13 @@ uint32_t yyjson_version(void) { #define MSG_NOT_END "unexpected end of data" #define MSG_COMMENT "unclosed multiline comment" #define MSG_COMMA "trailing comma is not allowed" -#define MSG_INF_NAN "nan or inf number is not allowed" +#define MSG_NAN_INF "nan or inf number is not allowed" #define MSG_ERR_TYPE "invalid JSON value type" -#define MSG_ERR_UTF8 "invalid utf-8 encoding in string" #define MSG_ERR_BOM "UTF-8 byte order mark (BOM) is not supported" +#define MSG_ERR_UTF8 "invalid utf-8 encoding in string" #define MSG_ERR_UTF16 "UTF-16 encoding is not supported" #define MSG_ERR_UTF32 "UTF-32 encoding is not supported" -/* - Check flags using a function to avoid `always false` warnings. - When non-standard features are disabled, unnecessary checks - will be evaluated and optimized out at compile-time. - */ -static_inline bool read_flag_eq(yyjson_read_flag flg, yyjson_read_flag chk) { -#if YYJSON_DISABLE_NON_STANDARD - if (chk == YYJSON_READ_ALLOW_INF_AND_NAN || - chk == YYJSON_READ_ALLOW_COMMENTS || - chk == YYJSON_READ_ALLOW_TRAILING_COMMAS || - chk == YYJSON_READ_ALLOW_INVALID_UNICODE || - chk == YYJSON_READ_ALLOW_BOM) - return false; -#endif - return (flg & chk) != 0; -} -static_inline bool write_flag_eq(yyjson_write_flag flg, yyjson_write_flag chk) { -#if YYJSON_DISABLE_NON_STANDARD - if (chk == YYJSON_WRITE_ALLOW_INF_AND_NAN || - chk == YYJSON_WRITE_ALLOW_INVALID_UNICODE) - return false; -#endif - return (flg & chk) != 0; -} -#define has_read_flag(_flag) unlikely(read_flag_eq(flg, YYJSON_READ_##_flag)) -#define has_write_flag(_flag) unlikely(write_flag_eq(flg, YYJSON_WRITE_##_flag)) - - - -/*============================================================================== - * Number Constants - *============================================================================*/ - /* U64 constant values */ #undef U64_MAX #define U64_MAX U64(0xFFFFFFFF, 0xFFFFFFFF) @@ -461,14 +455,14 @@ static_inline bool write_flag_eq(yyjson_write_flag flg, yyjson_write_flag chk) { #undef USIZE_SAFE_DIG #define USIZE_SAFE_DIG (sizeof(usize) == 8 ? U64_SAFE_DIG : U32_SAFE_DIG) -/* Inf raw value (positive) */ -#define F64_RAW_INF U64(0x7FF00000, 0x00000000) +/* Inf bits (positive) */ +#define F64_BITS_INF U64(0x7FF00000, 0x00000000) -/* NaN raw value (quiet NaN, no payload, no sign) */ +/* NaN bits (quiet NaN, no payload, no sign) */ #if defined(__hppa__) || (defined(__mips__) && !defined(__mips_nan2008)) -#define F64_RAW_NAN U64(0x7FF7FFFF, 0xFFFFFFFF) +#define F64_BITS_NAN U64(0x7FF7FFFF, 0xFFFFFFFF) #else -#define F64_RAW_NAN U64(0x7FF80000, 0x00000000) +#define F64_BITS_NAN U64(0x7FF80000, 0x00000000) #endif /* maximum significant digits count in decimal when reading double number */ @@ -527,7 +521,7 @@ static_inline bool write_flag_eq(yyjson_write_flag flg, yyjson_write_flag chk) { /*============================================================================== - * Types + * MARK: - Types (Private) *============================================================================*/ /** Type define for primitive types. */ @@ -562,13 +556,14 @@ typedef union v64_uni { v64 v; u64 u; } v64_uni; /*============================================================================== - * Load/Store Utils + * MARK: - Load/Store Utils (Private) *============================================================================*/ #define byte_move_idx(x) ((char *)dst)[x] = ((const char *)src)[x]; #define byte_move_src(x) ((char *)tmp)[x] = ((const char *)src)[x]; #define byte_move_dst(x) ((char *)dst)[x] = ((const char *)tmp)[x]; +/** Same as `memcpy(dst, src, 2)`, no overlap. */ static_inline void byte_copy_2(void *dst, const void *src) { #if !YYJSON_DISABLE_UNALIGNED_MEMORY_ACCESS memcpy(dst, src, 2); @@ -577,6 +572,7 @@ static_inline void byte_copy_2(void *dst, const void *src) { #endif } +/** Same as `memcpy(dst, src, 4)`, no overlap. */ static_inline void byte_copy_4(void *dst, const void *src) { #if !YYJSON_DISABLE_UNALIGNED_MEMORY_ACCESS memcpy(dst, src, 4); @@ -585,6 +581,7 @@ static_inline void byte_copy_4(void *dst, const void *src) { #endif } +/** Same as `memcpy(dst, src, 8)`, no overlap. */ static_inline void byte_copy_8(void *dst, const void *src) { #if !YYJSON_DISABLE_UNALIGNED_MEMORY_ACCESS memcpy(dst, src, 8); @@ -593,6 +590,7 @@ static_inline void byte_copy_8(void *dst, const void *src) { #endif } +/** Same as `memcpy(dst, src, 16)`, no overlap. */ static_inline void byte_copy_16(void *dst, const void *src) { #if !YYJSON_DISABLE_UNALIGNED_MEMORY_ACCESS memcpy(dst, src, 16); @@ -601,6 +599,7 @@ static_inline void byte_copy_16(void *dst, const void *src) { #endif } +/** Same as `memmove(dst, src, 2)`, allows overlap. */ static_inline void byte_move_2(void *dst, const void *src) { #if !YYJSON_DISABLE_UNALIGNED_MEMORY_ACCESS u16 tmp; @@ -613,6 +612,7 @@ static_inline void byte_move_2(void *dst, const void *src) { #endif } +/** Same as `memmove(dst, src, 4)`, allows overlap. */ static_inline void byte_move_4(void *dst, const void *src) { #if !YYJSON_DISABLE_UNALIGNED_MEMORY_ACCESS u32 tmp; @@ -625,6 +625,7 @@ static_inline void byte_move_4(void *dst, const void *src) { #endif } +/** Same as `memmove(dst, src, 8)`, allows overlap. */ static_inline void byte_move_8(void *dst, const void *src) { #if !YYJSON_DISABLE_UNALIGNED_MEMORY_ACCESS u64 tmp; @@ -637,6 +638,7 @@ static_inline void byte_move_8(void *dst, const void *src) { #endif } +/** Same as `memmove(dst, src, 16)`, allows overlap. */ static_inline void byte_move_16(void *dst, const void *src) { #if !YYJSON_DISABLE_UNALIGNED_MEMORY_ACCESS char *pdst = (char *)dst; @@ -653,6 +655,17 @@ static_inline void byte_move_16(void *dst, const void *src) { #endif } +/** Same as `memmove(dst, src, n)`, but only `dst <= src` and `n <= 16`. */ +static_inline void byte_move_forward(void *dst, void *src, usize n) { + char *d = (char *)dst, *s = (char *)src; + n += (n % 2); /* round up to even */ + if (n == 16) { byte_move_16(d, s); return; } + if (n >= 8) { byte_move_8(d, s); n -= 8; d += 8; s += 8; } + if (n >= 4) { byte_move_4(d, s); n -= 4; d += 4; s += 4; } + if (n >= 2) { byte_move_2(d, s); } +} + +/** Same as `memcmp(buf, pat, 2) == 0`. */ static_inline bool byte_match_2(void *buf, const char *pat) { #if !YYJSON_DISABLE_UNALIGNED_MEMORY_ACCESS v16_uni u1, u2; @@ -665,6 +678,7 @@ static_inline bool byte_match_2(void *buf, const char *pat) { #endif } +/** Same as `memcmp(buf, pat, 4) == 0`. */ static_inline bool byte_match_4(void *buf, const char *pat) { #if !YYJSON_DISABLE_UNALIGNED_MEMORY_ACCESS v32_uni u1, u2; @@ -679,6 +693,7 @@ static_inline bool byte_match_4(void *buf, const char *pat) { #endif } +/** Loads 2 bytes from `src` as a u16 (native-endian). */ static_inline u16 byte_load_2(const void *src) { v16_uni uni; #if !YYJSON_DISABLE_UNALIGNED_MEMORY_ACCESS @@ -690,6 +705,7 @@ static_inline u16 byte_load_2(const void *src) { return uni.u; } +/** Loads 3 bytes from `src` as a u32 (native-endian). */ static_inline u32 byte_load_3(const void *src) { v32_uni uni; #if !YYJSON_DISABLE_UNALIGNED_MEMORY_ACCESS @@ -705,6 +721,7 @@ static_inline u32 byte_load_3(const void *src) { return uni.u; } +/** Loads 4 bytes from `src` as a u32 (native-endian). */ static_inline u32 byte_load_4(const void *src) { v32_uni uni; #if !YYJSON_DISABLE_UNALIGNED_MEMORY_ACCESS @@ -721,2135 +738,477 @@ static_inline u32 byte_load_4(const void *src) { /*============================================================================== - * Number Utils - * These functions are used to detect and convert NaN and Inf numbers. - * The `memcpy` is used to avoid violating the strict aliasing rule. + * MARK: - Character Utils (Private) + * These lookup tables were generated by `misc/make_tables.c`. *============================================================================*/ -/** Convert raw binary to double. */ -static_inline f64 f64_from_raw(u64 u) { - f64 f; - memcpy(&f, &u, sizeof(u)); - return f; -} +/* char_table1 */ +#define CHAR_TYPE_ASCII (1 << 0) /* Except: ["\], [0x00-0x1F, 0x80-0xFF] */ +#define CHAR_TYPE_ASCII_SQ (1 << 1) /* Except: ['\], [0x00-0x1F, 0x80-0xFF] */ +#define CHAR_TYPE_SPACE (1 << 2) /* Whitespace: [ \t\n\r] */ +#define CHAR_TYPE_SPACE_EXT (1 << 3) /* Whitespace: [ \t\n\r\v\f], JSON5 */ +#define CHAR_TYPE_NUM (1 << 4) /* Number: [.-+0-9] */ +#define CHAR_TYPE_COMMENT (1 << 5) /* Comment: [/] */ -/** Convert raw binary to float. */ -static_inline f32 f32_from_raw(u32 u) { - f32 f; - memcpy(&f, &u, sizeof(u)); - return f; -} +/* char_table2 */ +#define CHAR_TYPE_EOL (1 << 0) /* End of line: [\r\n] */ +#define CHAR_TYPE_EOL_EXT (1 << 1) /* End of line: [\r\n], JSON5 */ +#define CHAR_TYPE_ID_START (1 << 2) /* ID start: [_$A-Za-z\], U+0080+ */ +#define CHAR_TYPE_ID_NEXT (1 << 3) /* ID next: [_$A-Za-z0-9\], U+0080+ */ +#define CHAR_TYPE_ID_ASCII (1 << 4) /* ID next ASCII: [_$A-Za-z0-9] */ -/** Convert double to raw binary. */ -static_inline u64 f64_to_raw(f64 f) { - u64 u; - memcpy(&u, &f, sizeof(u)); - return u; -} +/* char_table3 */ +#define CHAR_TYPE_SIGN (1 << 0) /* [-+] */ +#define CHAR_TYPE_DIGIT (1 << 1) /* [0-9] */ +#define CHAR_TYPE_NONZERO (1 << 2) /* [1-9] */ +#define CHAR_TYPE_EXP (1 << 3) /* [eE] */ +#define CHAR_TYPE_DOT (1 << 4) /* [.] */ -/** Convert double to raw binary. */ -static_inline u32 f32_to_raw(f32 f) { - u32 u; - memcpy(&u, &f, sizeof(u)); - return u; -} - -/** Get raw 'infinity' with sign. */ -static_inline u64 f64_raw_get_inf(bool sign) { -#if YYJSON_HAS_IEEE_754 - return F64_RAW_INF | ((u64)sign << 63); -#elif defined(INFINITY) - return f64_to_raw(sign ? -INFINITY : INFINITY); -#else - return f64_to_raw(sign ? -HUGE_VAL : HUGE_VAL); -#endif -} - -/** Get raw 'nan' with sign. */ -static_inline u64 f64_raw_get_nan(bool sign) { -#if YYJSON_HAS_IEEE_754 - return F64_RAW_NAN | ((u64)sign << 63); -#elif defined(NAN) - return f64_to_raw(sign ? (f64)-NAN : (f64)NAN); -#else - return f64_to_raw((sign ? -0.0 : 0.0) / 0.0); -#endif -} - -/** Casting double to float, allow overflow. */ -#if yyjson_has_attribute(no_sanitize) -__attribute__((no_sanitize("undefined"))) -#elif yyjson_gcc_available(4, 9, 0) -__attribute__((__no_sanitize_undefined__)) -#endif -static_inline f32 f64_to_f32(f64 val) { - return (f32)val; -} - - - -/*============================================================================== - * Size Utils - * These functions are used for memory allocation. - *============================================================================*/ - -/** Returns whether the size is overflow after increment. */ -static_inline bool size_add_is_overflow(usize size, usize add) { - return size > (size + add); -} - -/** Returns whether the size is power of 2 (size should not be 0). */ -static_inline bool size_is_pow2(usize size) { - return (size & (size - 1)) == 0; -} - -/** Align size upwards (may overflow). */ -static_inline usize size_align_up(usize size, usize align) { - if (size_is_pow2(align)) { - return (size + (align - 1)) & ~(align - 1); - } else { - return size + align - (size + align - 1) % align - 1; - } -} - -/** Align size downwards. */ -static_inline usize size_align_down(usize size, usize align) { - if (size_is_pow2(align)) { - return size & ~(align - 1); - } else { - return size - (size % align); - } -} - -/** Align address upwards (may overflow). */ -static_inline void *mem_align_up(void *mem, usize align) { - usize size; - memcpy(&size, &mem, sizeof(usize)); - size = size_align_up(size, align); - memcpy(&mem, &size, sizeof(usize)); - return mem; -} - - - -/*============================================================================== - * Bits Utils - * These functions are used by the floating-point number reader and writer. - *============================================================================*/ - -/** Returns the number of leading 0-bits in value (input should not be 0). */ -static_inline u32 u64_lz_bits(u64 v) { -#if GCC_HAS_CLZLL - return (u32)__builtin_clzll(v); -#elif MSC_HAS_BIT_SCAN_64 - unsigned long r; - _BitScanReverse64(&r, v); - return (u32)63 - (u32)r; -#elif MSC_HAS_BIT_SCAN - unsigned long hi, lo; - bool hi_set = _BitScanReverse(&hi, (u32)(v >> 32)) != 0; - _BitScanReverse(&lo, (u32)v); - hi |= 32; - return (u32)63 - (u32)(hi_set ? hi : lo); -#else - /* branchless, use De Bruijn sequence */ - /* see: https://www.chessprogramming.org/BitScan */ - const u8 table[64] = { - 63, 16, 62, 7, 15, 36, 61, 3, 6, 14, 22, 26, 35, 47, 60, 2, - 9, 5, 28, 11, 13, 21, 42, 19, 25, 31, 34, 40, 46, 52, 59, 1, - 17, 8, 37, 4, 23, 27, 48, 10, 29, 12, 43, 20, 32, 41, 53, 18, - 38, 24, 49, 30, 44, 33, 54, 39, 50, 45, 55, 51, 56, 57, 58, 0 - }; - v |= v >> 1; - v |= v >> 2; - v |= v >> 4; - v |= v >> 8; - v |= v >> 16; - v |= v >> 32; - return table[(v * U64(0x03F79D71, 0xB4CB0A89)) >> 58]; -#endif -} - -/** Returns the number of trailing 0-bits in value (input should not be 0). */ -static_inline u32 u64_tz_bits(u64 v) { -#if GCC_HAS_CTZLL - return (u32)__builtin_ctzll(v); -#elif MSC_HAS_BIT_SCAN_64 - unsigned long r; - _BitScanForward64(&r, v); - return (u32)r; -#elif MSC_HAS_BIT_SCAN - unsigned long lo, hi; - bool lo_set = _BitScanForward(&lo, (u32)(v)) != 0; - _BitScanForward(&hi, (u32)(v >> 32)); - hi += 32; - return lo_set ? lo : hi; -#else - /* branchless, use De Bruijn sequence */ - /* see: https://www.chessprogramming.org/BitScan */ - const u8 table[64] = { - 0, 1, 2, 53, 3, 7, 54, 27, 4, 38, 41, 8, 34, 55, 48, 28, - 62, 5, 39, 46, 44, 42, 22, 9, 24, 35, 59, 56, 49, 18, 29, 11, - 63, 52, 6, 26, 37, 40, 33, 47, 61, 45, 43, 21, 23, 58, 17, 10, - 51, 25, 36, 32, 60, 20, 57, 16, 50, 31, 19, 15, 30, 14, 13, 12 - }; - return table[((v & (~v + 1)) * U64(0x022FDD63, 0xCC95386D)) >> 58]; -#endif -} - - - -/*============================================================================== - * 128-bit Integer Utils - * These functions are used by the floating-point number reader and writer. - *============================================================================*/ - -/** Multiplies two 64-bit unsigned integers (a * b), - returns the 128-bit result as 'hi' and 'lo'. */ -static_inline void u128_mul(u64 a, u64 b, u64 *hi, u64 *lo) { -#if YYJSON_HAS_INT128 - u128 m = (u128)a * b; - *hi = (u64)(m >> 64); - *lo = (u64)(m); -#elif MSC_HAS_UMUL128 - *lo = _umul128(a, b, hi); -#else - u32 a0 = (u32)(a), a1 = (u32)(a >> 32); - u32 b0 = (u32)(b), b1 = (u32)(b >> 32); - u64 p00 = (u64)a0 * b0, p01 = (u64)a0 * b1; - u64 p10 = (u64)a1 * b0, p11 = (u64)a1 * b1; - u64 m0 = p01 + (p00 >> 32); - u32 m00 = (u32)(m0), m01 = (u32)(m0 >> 32); - u64 m1 = p10 + m00; - u32 m10 = (u32)(m1), m11 = (u32)(m1 >> 32); - *hi = p11 + m01 + m11; - *lo = ((u64)m10 << 32) | (u32)p00; -#endif -} - -/** Multiplies two 64-bit unsigned integers and add a value (a * b + c), - returns the 128-bit result as 'hi' and 'lo'. */ -static_inline void u128_mul_add(u64 a, u64 b, u64 c, u64 *hi, u64 *lo) { -#if YYJSON_HAS_INT128 - u128 m = (u128)a * b + c; - *hi = (u64)(m >> 64); - *lo = (u64)(m); -#else - u64 h, l, t; - u128_mul(a, b, &h, &l); - t = l + c; - h += (u64)(((t < l) | (t < c))); - *hi = h; - *lo = t; -#endif -} - - - -/*============================================================================== - * File Utils - * These functions are used to read and write JSON files. - *============================================================================*/ - -#define YYJSON_FOPEN_EXT -#if !defined(_MSC_VER) && defined(__GLIBC__) && defined(__GLIBC_PREREQ) -# if __GLIBC_PREREQ(2, 7) -# undef YYJSON_FOPEN_EXT -# define YYJSON_FOPEN_EXT "e" /* glibc extension to enable O_CLOEXEC */ -# endif -#endif - -static_inline FILE *fopen_safe(const char *path, const char *mode) { -#if YYJSON_MSC_VER >= 1400 - FILE *file = NULL; - if (fopen_s(&file, path, mode) != 0) return NULL; - return file; -#else - return fopen(path, mode); -#endif -} - -static_inline FILE *fopen_readonly(const char *path) { - return fopen_safe(path, "rb" YYJSON_FOPEN_EXT); -} - -static_inline FILE *fopen_writeonly(const char *path) { - return fopen_safe(path, "wb" YYJSON_FOPEN_EXT); -} - -static_inline usize fread_safe(void *buf, usize size, FILE *file) { -#if YYJSON_MSC_VER >= 1400 - return fread_s(buf, size, 1, size, file); -#else - return fread(buf, 1, size, file); -#endif -} - - - -/*============================================================================== - * Default Memory Allocator - * - * This is a simple libc memory allocator wrapper. - *============================================================================*/ - -static void *default_malloc(void *ctx, usize size) { - return malloc(size); -} - -static void *default_realloc(void *ctx, void *ptr, usize old_size, usize size) { - return realloc(ptr, size); -} - -static void default_free(void *ctx, void *ptr) { - free(ptr); -} - -static const yyjson_alc YYJSON_DEFAULT_ALC = { - default_malloc, default_realloc, default_free, NULL +static const u8 char_table1[256] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x0C, 0x0C, 0x08, 0x08, 0x0C, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x0F, 0x03, 0x02, 0x03, 0x03, 0x03, 0x03, 0x01, + 0x03, 0x03, 0x03, 0x13, 0x03, 0x13, 0x13, 0x23, + 0x13, 0x13, 0x13, 0x13, 0x13, 0x13, 0x13, 0x13, + 0x13, 0x13, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, + 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, + 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, + 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, + 0x03, 0x03, 0x03, 0x03, 0x00, 0x03, 0x03, 0x03, + 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, + 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, + 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, + 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x08, 0x08, 0x08, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; - - -/*============================================================================== - * Null Memory Allocator - * - * This allocator is just a placeholder to ensure that the internal - * malloc/realloc/free function pointers are not null. - *============================================================================*/ - -static void *null_malloc(void *ctx, usize size) { - return NULL; -} - -static void *null_realloc(void *ctx, void *ptr, usize old_size, usize size) { - return NULL; -} - -static void null_free(void *ctx, void *ptr) { - return; -} - -static const yyjson_alc YYJSON_NULL_ALC = { - null_malloc, null_realloc, null_free, NULL +static const u8 char_table2[256] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x03, 0x00, 0x00, 0x03, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x1C, 0x1C, 0x1C, 0x1C, 0x1C, 0x1C, 0x1C, + 0x1C, 0x1C, 0x1C, 0x1C, 0x1C, 0x1C, 0x1C, 0x1C, + 0x1C, 0x1C, 0x1C, 0x1C, 0x1C, 0x1C, 0x1C, 0x1C, + 0x1C, 0x1C, 0x1C, 0x00, 0x0C, 0x00, 0x00, 0x1C, + 0x00, 0x1C, 0x1C, 0x1C, 0x1C, 0x1C, 0x1C, 0x1C, + 0x1C, 0x1C, 0x1C, 0x1C, 0x1C, 0x1C, 0x1C, 0x1C, + 0x1C, 0x1C, 0x1C, 0x1C, 0x1C, 0x1C, 0x1C, 0x1C, + 0x1C, 0x1C, 0x1C, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, + 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, + 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, + 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, + 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, + 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, + 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, + 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, + 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, + 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, + 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, + 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, + 0x0C, 0x0C, 0x0E, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, + 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, + 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, + 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C }; +static const u8 char_table3[256] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x01, 0x00, 0x01, 0x10, 0x00, + 0x02, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, + 0x06, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 +}; - -/*============================================================================== - * Pool Memory Allocator - * - * This allocator is initialized with a fixed-size buffer. - * The buffer is split into multiple memory chunks for memory allocation. - *============================================================================*/ - -/** memory chunk header */ -typedef struct pool_chunk { - usize size; /* chunk memory size, include chunk header */ - struct pool_chunk *next; /* linked list, nullable */ - /* char mem[]; flexible array member */ -} pool_chunk; - -/** allocator ctx header */ -typedef struct pool_ctx { - usize size; /* total memory size, include ctx header */ - pool_chunk *free_list; /* linked list, nullable */ - /* pool_chunk chunks[]; flexible array member */ -} pool_ctx; - -/** align up the input size to chunk size */ -static_inline void pool_size_align(usize *size) { - *size = size_align_up(*size, sizeof(pool_chunk)) + sizeof(pool_chunk); +/** Match a whitespace: [ \t\n\r]. */ +static_inline bool char_is_space(u8 c) { + return !!(char_table1[c] & CHAR_TYPE_SPACE); } -static void *pool_malloc(void *ctx_ptr, usize size) { - /* assert(size != 0) */ - pool_ctx *ctx = (pool_ctx *)ctx_ptr; - pool_chunk *next, *prev = NULL, *cur = ctx->free_list; - - if (unlikely(size >= ctx->size)) return NULL; - pool_size_align(&size); - - while (cur) { - if (cur->size < size) { - /* not enough space, try next chunk */ - prev = cur; - cur = cur->next; - continue; - } - if (cur->size >= size + sizeof(pool_chunk) * 2) { - /* too much space, split this chunk */ - next = (pool_chunk *)(void *)((u8 *)cur + size); - next->size = cur->size - size; - next->next = cur->next; - cur->size = size; - } else { - /* just enough space, use whole chunk */ - next = cur->next; - } - if (prev) prev->next = next; - else ctx->free_list = next; - return (void *)(cur + 1); - } - return NULL; +/** Match an extended whitespace: [ \t\n\r\\v\\f], JSON5 whitespace. */ +static_inline bool char_is_space_ext(u8 c) { + return !!(char_table1[c] & CHAR_TYPE_SPACE_EXT); } -static void pool_free(void *ctx_ptr, void *ptr) { - /* assert(ptr != NULL) */ - pool_ctx *ctx = (pool_ctx *)ctx_ptr; - pool_chunk *cur = ((pool_chunk *)ptr) - 1; - pool_chunk *prev = NULL, *next = ctx->free_list; - - while (next && next < cur) { - prev = next; - next = next->next; - } - if (prev) prev->next = cur; - else ctx->free_list = cur; - cur->next = next; - - if (next && ((u8 *)cur + cur->size) == (u8 *)next) { - /* merge cur to higher chunk */ - cur->size += next->size; - cur->next = next->next; - } - if (prev && ((u8 *)prev + prev->size) == (u8 *)cur) { - /* merge cur to lower chunk */ - prev->size += cur->size; - prev->next = cur->next; - } +/** Match a JSON number: [.-+0-9]. */ +static_inline bool char_is_num(u8 c) { + return !!(char_table1[c] & CHAR_TYPE_NUM); } -static void *pool_realloc(void *ctx_ptr, void *ptr, - usize old_size, usize size) { - /* assert(ptr != NULL && size != 0 && old_size < size) */ - pool_ctx *ctx = (pool_ctx *)ctx_ptr; - pool_chunk *cur = ((pool_chunk *)ptr) - 1, *prev, *next, *tmp; - - /* check size */ - if (unlikely(size >= ctx->size)) return NULL; - pool_size_align(&old_size); - pool_size_align(&size); - if (unlikely(old_size == size)) return ptr; - - /* find next and prev chunk */ - prev = NULL; - next = ctx->free_list; - while (next && next < cur) { - prev = next; - next = next->next; - } - - if ((u8 *)cur + cur->size == (u8 *)next && cur->size + next->size >= size) { - /* merge to higher chunk if they are contiguous */ - usize free_size = cur->size + next->size - size; - if (free_size > sizeof(pool_chunk) * 2) { - tmp = (pool_chunk *)(void *)((u8 *)cur + size); - if (prev) prev->next = tmp; - else ctx->free_list = tmp; - tmp->next = next->next; - tmp->size = free_size; - cur->size = size; - } else { - if (prev) prev->next = next->next; - else ctx->free_list = next->next; - cur->size += next->size; - } - return ptr; - } else { - /* fallback to malloc and memcpy */ - void *new_ptr = pool_malloc(ctx_ptr, size - sizeof(pool_chunk)); - if (new_ptr) { - memcpy(new_ptr, ptr, cur->size - sizeof(pool_chunk)); - pool_free(ctx_ptr, ptr); - } - return new_ptr; - } +/** Match an ASCII character in string: ["\], [0x00-0x1F, 0x80-0xFF]. */ +static_inline bool char_is_ascii_skip(u8 c) { + return !!(char_table1[c] & CHAR_TYPE_ASCII); } -bool yyjson_alc_pool_init(yyjson_alc *alc, void *buf, usize size) { - pool_chunk *chunk; - pool_ctx *ctx; - - if (unlikely(!alc)) return false; - *alc = YYJSON_NULL_ALC; - if (size < sizeof(pool_ctx) * 4) return false; - ctx = (pool_ctx *)mem_align_up(buf, sizeof(pool_ctx)); - if (unlikely(!ctx)) return false; - size -= (usize)((u8 *)ctx - (u8 *)buf); - size = size_align_down(size, sizeof(pool_ctx)); - - chunk = (pool_chunk *)(ctx + 1); - chunk->size = size - sizeof(pool_ctx); - chunk->next = NULL; - ctx->size = size; - ctx->free_list = chunk; - - alc->malloc = pool_malloc; - alc->realloc = pool_realloc; - alc->free = pool_free; - alc->ctx = (void *)ctx; - return true; +/** Match an ASCII character single-quoted: ['\], [0x00-0x1F, 0x80-0xFF]. */ +static_inline bool char_is_ascii_skip_sq(u8 c) { + return !!(char_table1[c] & CHAR_TYPE_ASCII_SQ); } - - -/*============================================================================== - * Dynamic Memory Allocator - * - * This allocator allocates memory on demand and does not immediately release - * unused memory. Instead, it places the unused memory into a freelist for - * potential reuse in the future. It is only when the entire allocator is - * destroyed that all previously allocated memory is released at once. - *============================================================================*/ - -/** memory chunk header */ -typedef struct dyn_chunk { - usize size; /* chunk size, include header */ - struct dyn_chunk *next; - /* char mem[]; flexible array member */ -} dyn_chunk; - -/** allocator ctx header */ -typedef struct { - dyn_chunk free_list; /* dummy header, sorted from small to large */ - dyn_chunk used_list; /* dummy header */ -} dyn_ctx; - -/** align up the input size to chunk size */ -static_inline bool dyn_size_align(usize *size) { - usize alc_size = *size + sizeof(dyn_chunk); - alc_size = size_align_up(alc_size, YYJSON_ALC_DYN_MIN_SIZE); - if (unlikely(alc_size < *size)) return false; /* overflow */ - *size = alc_size; - return true; +/** Match a trivia character: extended whitespace or comment. */ +static_inline bool char_is_trivia(u8 c) { + return !!(char_table1[c] & (CHAR_TYPE_SPACE_EXT | CHAR_TYPE_COMMENT)); } -/** remove a chunk from list (the chunk must already be in the list) */ -static_inline void dyn_chunk_list_remove(dyn_chunk *list, dyn_chunk *chunk) { - dyn_chunk *prev = list, *cur; - for (cur = prev->next; cur; cur = cur->next) { - if (cur == chunk) { - prev->next = cur->next; - cur->next = NULL; - return; - } - prev = cur; - } +/** Match a line end character: [\r\n]. */ +static_inline bool char_is_eol(u8 c) { + return !!(char_table2[c] & CHAR_TYPE_EOL); } -/** add a chunk to list header (the chunk must not be in the list) */ -static_inline void dyn_chunk_list_add(dyn_chunk *list, dyn_chunk *chunk) { - chunk->next = list->next; - list->next = chunk; +/** Match an extended line end character: [\r\n], JSON5 line terminator. */ +static_inline bool char_is_eol_ext(u8 c) { + return !!(char_table2[c] & CHAR_TYPE_EOL_EXT); } -static void *dyn_malloc(void *ctx_ptr, usize size) { - /* assert(size != 0) */ - const yyjson_alc def = YYJSON_DEFAULT_ALC; - dyn_ctx *ctx = (dyn_ctx *)ctx_ptr; - dyn_chunk *chunk, *prev; - if (unlikely(!dyn_size_align(&size))) return NULL; - - /* freelist is empty, create new chunk */ - if (!ctx->free_list.next) { - chunk = (dyn_chunk *)def.malloc(def.ctx, size); - if (unlikely(!chunk)) return NULL; - chunk->size = size; - chunk->next = NULL; - dyn_chunk_list_add(&ctx->used_list, chunk); - return (void *)(chunk + 1); - } - - /* find a large enough chunk, or resize the largest chunk */ - prev = &ctx->free_list; - while (true) { - chunk = prev->next; - if (chunk->size >= size) { /* enough size, reuse this chunk */ - prev->next = chunk->next; - dyn_chunk_list_add(&ctx->used_list, chunk); - return (void *)(chunk + 1); - } - if (!chunk->next) { /* resize the largest chunk */ - chunk = (dyn_chunk *)def.realloc(def.ctx, chunk, chunk->size, size); - if (unlikely(!chunk)) return NULL; - prev->next = NULL; - chunk->size = size; - dyn_chunk_list_add(&ctx->used_list, chunk); - return (void *)(chunk + 1); - } - prev = chunk; - } +/** Match an identifier name start: [_$A-Za-z\], U+0080+. */ +static_inline bool char_is_id_start(u8 c) { + return !!(char_table2[c] & CHAR_TYPE_ID_START); } -static void *dyn_realloc(void *ctx_ptr, void *ptr, - usize old_size, usize size) { - /* assert(ptr != NULL && size != 0 && old_size < size) */ - const yyjson_alc def = YYJSON_DEFAULT_ALC; - dyn_ctx *ctx = (dyn_ctx *)ctx_ptr; - dyn_chunk *new_chunk, *chunk = (dyn_chunk *)ptr - 1; - if (unlikely(!dyn_size_align(&size))) return NULL; - if (chunk->size >= size) return ptr; - - dyn_chunk_list_remove(&ctx->used_list, chunk); - new_chunk = (dyn_chunk *)def.realloc(def.ctx, chunk, chunk->size, size); - if (likely(new_chunk)) { - new_chunk->size = size; - chunk = new_chunk; - } - dyn_chunk_list_add(&ctx->used_list, chunk); - return new_chunk ? (void *)(new_chunk + 1) : NULL; +/** Match an identifier name next: [_$A-Za-z0-9\], U+0080+. */ +static_inline bool char_is_id_next(u8 c) { + return !!(char_table2[c] & CHAR_TYPE_ID_NEXT); } -static void dyn_free(void *ctx_ptr, void *ptr) { - /* assert(ptr != NULL) */ - dyn_ctx *ctx = (dyn_ctx *)ctx_ptr; - dyn_chunk *chunk = (dyn_chunk *)ptr - 1, *prev; - - dyn_chunk_list_remove(&ctx->used_list, chunk); - for (prev = &ctx->free_list; prev; prev = prev->next) { - if (!prev->next || prev->next->size >= chunk->size) { - chunk->next = prev->next; - prev->next = chunk; - break; - } - } +/** Match an identifier name ASCII: [_$A-Za-z0-9]. */ +static_inline bool char_is_id_ascii(u8 c) { + return !!(char_table2[c] & CHAR_TYPE_ID_ASCII); } -yyjson_alc *yyjson_alc_dyn_new(void) { - const yyjson_alc def = YYJSON_DEFAULT_ALC; - usize hdr_len = sizeof(yyjson_alc) + sizeof(dyn_ctx); - yyjson_alc *alc = (yyjson_alc *)def.malloc(def.ctx, hdr_len); - dyn_ctx *ctx = (dyn_ctx *)(void *)(alc + 1); - if (unlikely(!alc)) return NULL; - alc->malloc = dyn_malloc; - alc->realloc = dyn_realloc; - alc->free = dyn_free; - alc->ctx = alc + 1; - memset(ctx, 0, sizeof(*ctx)); - return alc; +/** Match a sign: [+-] */ +static_inline bool char_is_sign(u8 d) { + return !!(char_table3[d] & CHAR_TYPE_SIGN); } -void yyjson_alc_dyn_free(yyjson_alc *alc) { - const yyjson_alc def = YYJSON_DEFAULT_ALC; - dyn_ctx *ctx = (dyn_ctx *)(void *)(alc + 1); - dyn_chunk *chunk, *next; - if (unlikely(!alc)) return; - for (chunk = ctx->free_list.next; chunk; chunk = next) { - next = chunk->next; - def.free(def.ctx, chunk); - } - for (chunk = ctx->used_list.next; chunk; chunk = next) { - next = chunk->next; - def.free(def.ctx, chunk); - } - def.free(def.ctx, alc); +/** Match a none-zero digit: [1-9] */ +static_inline bool char_is_nonzero(u8 d) { + return !!(char_table3[d] & CHAR_TYPE_NONZERO); } - - -/*============================================================================== - * JSON document and value - *============================================================================*/ - -static_inline void unsafe_yyjson_str_pool_release(yyjson_str_pool *pool, - yyjson_alc *alc) { - yyjson_str_chunk *chunk = pool->chunks, *next; - while (chunk) { - next = chunk->next; - alc->free(alc->ctx, chunk); - chunk = next; - } +/** Match a digit: [0-9] */ +static_inline bool char_is_digit(u8 d) { + return !!(char_table3[d] & CHAR_TYPE_DIGIT); } -static_inline void unsafe_yyjson_val_pool_release(yyjson_val_pool *pool, - yyjson_alc *alc) { - yyjson_val_chunk *chunk = pool->chunks, *next; - while (chunk) { - next = chunk->next; - alc->free(alc->ctx, chunk); - chunk = next; - } +/** Match an exponent sign: [eE]. */ +static_inline bool char_is_exp(u8 d) { + return !!(char_table3[d] & CHAR_TYPE_EXP); } -bool unsafe_yyjson_str_pool_grow(yyjson_str_pool *pool, - const yyjson_alc *alc, usize len) { - yyjson_str_chunk *chunk; - usize size, max_len; - - /* create a new chunk */ - max_len = USIZE_MAX - sizeof(yyjson_str_chunk); - if (unlikely(len > max_len)) return false; - size = len + sizeof(yyjson_str_chunk); - size = yyjson_max(pool->chunk_size, size); - chunk = (yyjson_str_chunk *)alc->malloc(alc->ctx, size); - if (unlikely(!chunk)) return false; - - /* insert the new chunk as the head of the linked list */ - chunk->next = pool->chunks; - chunk->chunk_size = size; - pool->chunks = chunk; - pool->cur = (char *)chunk + sizeof(yyjson_str_chunk); - pool->end = (char *)chunk + size; - - /* the next chunk is twice the size of the current one */ - size = yyjson_min(pool->chunk_size * 2, pool->chunk_size_max); - if (size < pool->chunk_size) size = pool->chunk_size_max; /* overflow */ - pool->chunk_size = size; - return true; +/** Match a floating point indicator: [.eE]. */ +static_inline bool char_is_fp(u8 d) { + return !!(char_table3[d] & (CHAR_TYPE_DOT | CHAR_TYPE_EXP)); } -bool unsafe_yyjson_val_pool_grow(yyjson_val_pool *pool, - const yyjson_alc *alc, usize count) { - yyjson_val_chunk *chunk; - usize size, max_count; - - /* create a new chunk */ - max_count = USIZE_MAX / sizeof(yyjson_mut_val) - 1; - if (unlikely(count > max_count)) return false; - size = (count + 1) * sizeof(yyjson_mut_val); - size = yyjson_max(pool->chunk_size, size); - chunk = (yyjson_val_chunk *)alc->malloc(alc->ctx, size); - if (unlikely(!chunk)) return false; - - /* insert the new chunk as the head of the linked list */ - chunk->next = pool->chunks; - chunk->chunk_size = size; - pool->chunks = chunk; - pool->cur = (yyjson_mut_val *)(void *)((u8 *)chunk) + 1; - pool->end = (yyjson_mut_val *)(void *)((u8 *)chunk + size); - - /* the next chunk is twice the size of the current one */ - size = yyjson_min(pool->chunk_size * 2, pool->chunk_size_max); - if (size < pool->chunk_size) size = pool->chunk_size_max; /* overflow */ - pool->chunk_size = size; - return true; +/** Match a digit or floating point indicator: [0-9.eE]. */ +static_inline bool char_is_digit_or_fp(u8 d) { + return !!(char_table3[d] & (CHAR_TYPE_DIGIT | CHAR_TYPE_DOT | + CHAR_TYPE_EXP)); } -bool yyjson_mut_doc_set_str_pool_size(yyjson_mut_doc *doc, size_t len) { - usize max_size = USIZE_MAX - sizeof(yyjson_str_chunk); - if (!doc || !len || len > max_size) return false; - doc->str_pool.chunk_size = len + sizeof(yyjson_str_chunk); - return true; +/** Match a JSON container: `{` or `[`. */ +static_inline bool char_is_ctn(u8 c) { + return (c & 0xDF) == 0x5B; /* '[': 0x5B, '{': 0x7B */ } -bool yyjson_mut_doc_set_val_pool_size(yyjson_mut_doc *doc, size_t count) { - usize max_count = USIZE_MAX / sizeof(yyjson_mut_val) - 1; - if (!doc || !count || count > max_count) return false; - doc->val_pool.chunk_size = (count + 1) * sizeof(yyjson_mut_val); - return true; +/** Convert ASCII letter to lowercase; valid only for [A-Za-z]. */ +static_inline u8 char_to_lower(u8 c) { + return c | 0x20; } -void yyjson_mut_doc_free(yyjson_mut_doc *doc) { - if (doc) { - yyjson_alc alc = doc->alc; - memset(&doc->alc, 0, sizeof(alc)); - unsafe_yyjson_str_pool_release(&doc->str_pool, &alc); - unsafe_yyjson_val_pool_release(&doc->val_pool, &alc); - alc.free(alc.ctx, doc); - } +/** Match UTF-8 byte order mask. */ +static_inline bool is_utf8_bom(const u8 *cur) { + return byte_load_3(cur) == byte_load_3("\xEF\xBB\xBF"); } -yyjson_mut_doc *yyjson_mut_doc_new(const yyjson_alc *alc) { - yyjson_mut_doc *doc; - if (!alc) alc = &YYJSON_DEFAULT_ALC; - doc = (yyjson_mut_doc *)alc->malloc(alc->ctx, sizeof(yyjson_mut_doc)); - if (!doc) return NULL; - memset(doc, 0, sizeof(yyjson_mut_doc)); - - doc->alc = *alc; - doc->str_pool.chunk_size = YYJSON_MUT_DOC_STR_POOL_INIT_SIZE; - doc->str_pool.chunk_size_max = YYJSON_MUT_DOC_STR_POOL_MAX_SIZE; - doc->val_pool.chunk_size = YYJSON_MUT_DOC_VAL_POOL_INIT_SIZE; - doc->val_pool.chunk_size_max = YYJSON_MUT_DOC_VAL_POOL_MAX_SIZE; - return doc; +/** Match UTF-16 byte order mask. */ +static_inline bool is_utf16_bom(const u8 *cur) { + return byte_load_2(cur) == byte_load_2("\xFE\xFF") || + byte_load_2(cur) == byte_load_2("\xFF\xFE"); } -yyjson_mut_doc *yyjson_doc_mut_copy(yyjson_doc *doc, const yyjson_alc *alc) { - yyjson_mut_doc *m_doc; - yyjson_mut_val *m_val; - - if (!doc || !doc->root) return NULL; - m_doc = yyjson_mut_doc_new(alc); - if (!m_doc) return NULL; - m_val = yyjson_val_mut_copy(m_doc, doc->root); - if (!m_val) { - yyjson_mut_doc_free(m_doc); - return NULL; - } - yyjson_mut_doc_set_root(m_doc, m_val); - return m_doc; +/** Match UTF-32 byte order mask, need length check to avoid zero padding. */ +static_inline bool is_utf32_bom(const u8 *cur) { + return byte_load_4(cur) == byte_load_4("\x00\x00\xFE\xFF") || + byte_load_4(cur) == byte_load_4("\xFF\xFE\x00\x00"); } -yyjson_mut_doc *yyjson_mut_doc_mut_copy(yyjson_mut_doc *doc, - const yyjson_alc *alc) { - yyjson_mut_doc *m_doc; - yyjson_mut_val *m_val; - - if (!doc) return NULL; - if (!doc->root) return yyjson_mut_doc_new(alc); - - m_doc = yyjson_mut_doc_new(alc); - if (!m_doc) return NULL; - m_val = yyjson_mut_val_mut_copy(m_doc, doc->root); - if (!m_val) { - yyjson_mut_doc_free(m_doc); - return NULL; - } - yyjson_mut_doc_set_root(m_doc, m_val); - return m_doc; +/** Get the extended line end length. Used with `char_is_eol_ext`. */ +static_inline usize ext_eol_len(const u8 *cur) { + if (cur[0] < 0x80) return 1; + if (cur[1] == 0x80 && (cur[2] == 0xA8 || cur[2] == 0xA9)) return 3; + return 0; } -yyjson_mut_val *yyjson_val_mut_copy(yyjson_mut_doc *m_doc, - yyjson_val *i_vals) { - /* - The immutable object or array stores all sub-values in a contiguous memory, - We copy them to another contiguous memory as mutable values, - then reconnect the mutable values with the original relationship. - */ - usize i_vals_len; - yyjson_mut_val *m_vals, *m_val; - yyjson_val *i_val, *i_end; - - if (!m_doc || !i_vals) return NULL; - i_end = unsafe_yyjson_get_next(i_vals); - i_vals_len = (usize)(unsafe_yyjson_get_next(i_vals) - i_vals); - m_vals = unsafe_yyjson_mut_val(m_doc, i_vals_len); - if (!m_vals) return NULL; - i_val = i_vals; - m_val = m_vals; - - for (; i_val < i_end; i_val++, m_val++) { - yyjson_type type = unsafe_yyjson_get_type(i_val); - m_val->tag = i_val->tag; - m_val->uni.u64 = i_val->uni.u64; - if (type == YYJSON_TYPE_STR || type == YYJSON_TYPE_RAW) { - const char *str = i_val->uni.str; - usize str_len = unsafe_yyjson_get_len(i_val); - m_val->uni.str = unsafe_yyjson_mut_strncpy(m_doc, str, str_len); - if (!m_val->uni.str) return NULL; - } else if (type == YYJSON_TYPE_ARR) { - usize len = unsafe_yyjson_get_len(i_val); - if (len > 0) { - yyjson_val *ii_val = i_val + 1, *ii_next; - yyjson_mut_val *mm_val = m_val + 1, *mm_ctn = m_val, *mm_next; - while (len-- > 1) { - ii_next = unsafe_yyjson_get_next(ii_val); - mm_next = mm_val + (ii_next - ii_val); - mm_val->next = mm_next; - ii_val = ii_next; - mm_val = mm_next; - } - mm_val->next = mm_ctn + 1; - mm_ctn->uni.ptr = mm_val; - } - } else if (type == YYJSON_TYPE_OBJ) { - usize len = unsafe_yyjson_get_len(i_val); - if (len > 0) { - yyjson_val *ii_key = i_val + 1, *ii_nextkey; - yyjson_mut_val *mm_key = m_val + 1, *mm_ctn = m_val; - yyjson_mut_val *mm_nextkey; - while (len-- > 1) { - ii_nextkey = unsafe_yyjson_get_next(ii_key + 1); - mm_nextkey = mm_key + (ii_nextkey - ii_key); - mm_key->next = mm_key + 1; - mm_key->next->next = mm_nextkey; - ii_key = ii_nextkey; - mm_key = mm_nextkey; - } - mm_key->next = mm_key + 1; - mm_key->next->next = mm_ctn + 1; - mm_ctn->uni.ptr = mm_key; - } - } - } - return m_vals; -} - -static yyjson_mut_val *unsafe_yyjson_mut_val_mut_copy(yyjson_mut_doc *m_doc, - yyjson_mut_val *m_vals) { - /* - The mutable object or array stores all sub-values in a circular linked - list, so we can traverse them in the same loop. The traversal starts from - the last item, continues with the first item in a list, and ends with the - second to last item, which needs to be linked to the last item to close the - circle. - */ - yyjson_mut_val *m_val = unsafe_yyjson_mut_val(m_doc, 1); - if (unlikely(!m_val)) return NULL; - m_val->tag = m_vals->tag; - - switch (unsafe_yyjson_get_type(m_vals)) { - case YYJSON_TYPE_OBJ: - case YYJSON_TYPE_ARR: - if (unsafe_yyjson_get_len(m_vals) > 0) { - yyjson_mut_val *last = (yyjson_mut_val *)m_vals->uni.ptr; - yyjson_mut_val *next = last->next, *prev; - prev = unsafe_yyjson_mut_val_mut_copy(m_doc, last); - if (!prev) return NULL; - m_val->uni.ptr = (void *)prev; - while (next != last) { - prev->next = unsafe_yyjson_mut_val_mut_copy(m_doc, next); - if (!prev->next) return NULL; - prev = prev->next; - next = next->next; - } - prev->next = (yyjson_mut_val *)m_val->uni.ptr; - } - break; - case YYJSON_TYPE_RAW: - case YYJSON_TYPE_STR: { - const char *str = m_vals->uni.str; - usize str_len = unsafe_yyjson_get_len(m_vals); - m_val->uni.str = unsafe_yyjson_mut_strncpy(m_doc, str, str_len); - if (!m_val->uni.str) return NULL; - break; - } - default: - m_val->uni = m_vals->uni; - break; - } - return m_val; -} - -yyjson_mut_val *yyjson_mut_val_mut_copy(yyjson_mut_doc *doc, - yyjson_mut_val *val) { - if (doc && val) return unsafe_yyjson_mut_val_mut_copy(doc, val); - return NULL; -} - -/* Count the number of values and the total length of the strings. */ -static void yyjson_mut_stat(yyjson_mut_val *val, - usize *val_sum, usize *str_sum) { - yyjson_type type = unsafe_yyjson_get_type(val); - *val_sum += 1; - if (type == YYJSON_TYPE_ARR || type == YYJSON_TYPE_OBJ) { - yyjson_mut_val *child = (yyjson_mut_val *)val->uni.ptr; - usize len = unsafe_yyjson_get_len(val), i; - len <<= (u8)(type == YYJSON_TYPE_OBJ); - *val_sum += len; - for (i = 0; i < len; i++) { - yyjson_type stype = unsafe_yyjson_get_type(child); - if (stype == YYJSON_TYPE_STR || stype == YYJSON_TYPE_RAW) { - *str_sum += unsafe_yyjson_get_len(child) + 1; - } else if (stype == YYJSON_TYPE_ARR || stype == YYJSON_TYPE_OBJ) { - yyjson_mut_stat(child, val_sum, str_sum); - *val_sum -= 1; - } - child = child->next; - } - } else if (type == YYJSON_TYPE_STR || type == YYJSON_TYPE_RAW) { - *str_sum += unsafe_yyjson_get_len(val) + 1; - } -} - -/* Copy mutable values to immutable value pool. */ -static usize yyjson_imut_copy(yyjson_val **val_ptr, char **buf_ptr, - yyjson_mut_val *mval) { - yyjson_val *val = *val_ptr; - yyjson_type type = unsafe_yyjson_get_type(mval); - if (type == YYJSON_TYPE_ARR || type == YYJSON_TYPE_OBJ) { - yyjson_mut_val *child = (yyjson_mut_val *)mval->uni.ptr; - usize len = unsafe_yyjson_get_len(mval), i; - usize val_sum = 1; - if (type == YYJSON_TYPE_OBJ) { - if (len) child = child->next->next; - len <<= 1; - } else { - if (len) child = child->next; - } - *val_ptr = val + 1; - for (i = 0; i < len; i++) { - val_sum += yyjson_imut_copy(val_ptr, buf_ptr, child); - child = child->next; - } - val->tag = mval->tag; - val->uni.ofs = val_sum * sizeof(yyjson_val); - return val_sum; - } else if (type == YYJSON_TYPE_STR || type == YYJSON_TYPE_RAW) { - char *buf = *buf_ptr; - usize len = unsafe_yyjson_get_len(mval); - memcpy((void *)buf, (const void *)mval->uni.str, len); - buf[len] = '\0'; - val->tag = mval->tag; - val->uni.str = buf; - *val_ptr = val + 1; - *buf_ptr = buf + len + 1; +/** Get the extended whitespace length. Used with `char_is_space_ext`. */ +static_inline usize ext_space_len(const u8 *cur) { + if (cur[0] < 0x80) { return 1; + } else if (byte_load_2(cur) == byte_load_2("\xC2\xA0")) { + return 2; + } else if (byte_load_2(cur) == byte_load_2("\xE2\x80")) { + if (cur[2] >= 0x80 && cur[2] <= 0x8A) return 3; + if (cur[2] == 0xA8 || cur[2] == 0xA9 || cur[2] == 0xAF) return 3; } else { - val->tag = mval->tag; - val->uni = mval->uni; - *val_ptr = val + 1; - return 1; + u32 uni = byte_load_3(cur); + if (uni == byte_load_3("\xE1\x9A\x80") || + uni == byte_load_3("\xE2\x81\x9F") || + uni == byte_load_3("\xE3\x80\x80") || + uni == byte_load_3("\xEF\xBB\xBF")) return 3; } -} - -yyjson_doc *yyjson_mut_doc_imut_copy(yyjson_mut_doc *mdoc, - const yyjson_alc *alc) { - if (!mdoc) return NULL; - return yyjson_mut_val_imut_copy(mdoc->root, alc); -} - -yyjson_doc *yyjson_mut_val_imut_copy(yyjson_mut_val *mval, - const yyjson_alc *alc) { - usize val_num = 0, str_sum = 0, hdr_size, buf_size; - yyjson_doc *doc = NULL; - yyjson_val *val_hdr = NULL; - - /* This value should be NULL here. Setting a non-null value suppresses - warning from the clang analyzer. */ - char *str_hdr = (char *)(void *)&str_sum; - if (!mval) return NULL; - if (!alc) alc = &YYJSON_DEFAULT_ALC; - - /* traverse the input value to get pool size */ - yyjson_mut_stat(mval, &val_num, &str_sum); - - /* create doc and val pool */ - hdr_size = size_align_up(sizeof(yyjson_doc), sizeof(yyjson_val)); - buf_size = hdr_size + val_num * sizeof(yyjson_val); - doc = (yyjson_doc *)alc->malloc(alc->ctx, buf_size); - if (!doc) return NULL; - memset(doc, 0, sizeof(yyjson_doc)); - val_hdr = (yyjson_val *)(void *)((char *)(void *)doc + hdr_size); - doc->root = val_hdr; - doc->alc = *alc; - - /* create str pool */ - if (str_sum > 0) { - str_hdr = (char *)alc->malloc(alc->ctx, str_sum); - doc->str_pool = str_hdr; - if (!str_hdr) { - alc->free(alc->ctx, (void *)doc); - return NULL; - } - } - - /* copy vals and strs */ - doc->val_read = yyjson_imut_copy(&val_hdr, &str_hdr, mval); - doc->dat_read = str_sum + 1; - return doc; -} - -static_inline bool unsafe_yyjson_num_equals(void *lhs, void *rhs) { - yyjson_val_uni *luni = &((yyjson_val *)lhs)->uni; - yyjson_val_uni *runi = &((yyjson_val *)rhs)->uni; - yyjson_subtype lt = unsafe_yyjson_get_subtype(lhs); - yyjson_subtype rt = unsafe_yyjson_get_subtype(rhs); - if (lt == rt) return luni->u64 == runi->u64; - if (lt == YYJSON_SUBTYPE_SINT && rt == YYJSON_SUBTYPE_UINT) { - return luni->i64 >= 0 && luni->u64 == runi->u64; - } - if (lt == YYJSON_SUBTYPE_UINT && rt == YYJSON_SUBTYPE_SINT) { - return runi->i64 >= 0 && luni->u64 == runi->u64; - } - return false; -} - -static_inline bool unsafe_yyjson_str_equals(void *lhs, void *rhs) { - usize len = unsafe_yyjson_get_len(lhs); - if (len != unsafe_yyjson_get_len(rhs)) return false; - return !memcmp(unsafe_yyjson_get_str(lhs), - unsafe_yyjson_get_str(rhs), len); -} - -bool unsafe_yyjson_equals(yyjson_val *lhs, yyjson_val *rhs) { - yyjson_type type = unsafe_yyjson_get_type(lhs); - if (type != unsafe_yyjson_get_type(rhs)) return false; - - switch (type) { - case YYJSON_TYPE_OBJ: { - usize len = unsafe_yyjson_get_len(lhs); - if (len != unsafe_yyjson_get_len(rhs)) return false; - if (len > 0) { - yyjson_obj_iter iter; - yyjson_obj_iter_init(rhs, &iter); - lhs = unsafe_yyjson_get_first(lhs); - while (len-- > 0) { - rhs = yyjson_obj_iter_getn(&iter, lhs->uni.str, - unsafe_yyjson_get_len(lhs)); - if (!rhs) return false; - if (!unsafe_yyjson_equals(lhs + 1, rhs)) return false; - lhs = unsafe_yyjson_get_next(lhs + 1); - } - } - /* yyjson allows duplicate keys, so the check may be inaccurate */ - return true; - } - - case YYJSON_TYPE_ARR: { - usize len = unsafe_yyjson_get_len(lhs); - if (len != unsafe_yyjson_get_len(rhs)) return false; - if (len > 0) { - lhs = unsafe_yyjson_get_first(lhs); - rhs = unsafe_yyjson_get_first(rhs); - while (len-- > 0) { - if (!unsafe_yyjson_equals(lhs, rhs)) return false; - lhs = unsafe_yyjson_get_next(lhs); - rhs = unsafe_yyjson_get_next(rhs); - } - } - return true; - } - - case YYJSON_TYPE_NUM: - return unsafe_yyjson_num_equals(lhs, rhs); - - case YYJSON_TYPE_RAW: - case YYJSON_TYPE_STR: - return unsafe_yyjson_str_equals(lhs, rhs); - - case YYJSON_TYPE_NULL: - case YYJSON_TYPE_BOOL: - return lhs->tag == rhs->tag; - - default: - return false; - } -} - -bool unsafe_yyjson_mut_equals(yyjson_mut_val *lhs, yyjson_mut_val *rhs) { - yyjson_type type = unsafe_yyjson_get_type(lhs); - if (type != unsafe_yyjson_get_type(rhs)) return false; - - switch (type) { - case YYJSON_TYPE_OBJ: { - usize len = unsafe_yyjson_get_len(lhs); - if (len != unsafe_yyjson_get_len(rhs)) return false; - if (len > 0) { - yyjson_mut_obj_iter iter; - yyjson_mut_obj_iter_init(rhs, &iter); - lhs = (yyjson_mut_val *)lhs->uni.ptr; - while (len-- > 0) { - rhs = yyjson_mut_obj_iter_getn(&iter, lhs->uni.str, - unsafe_yyjson_get_len(lhs)); - if (!rhs) return false; - if (!unsafe_yyjson_mut_equals(lhs->next, rhs)) return false; - lhs = lhs->next->next; - } - } - /* yyjson allows duplicate keys, so the check may be inaccurate */ - return true; - } - - case YYJSON_TYPE_ARR: { - usize len = unsafe_yyjson_get_len(lhs); - if (len != unsafe_yyjson_get_len(rhs)) return false; - if (len > 0) { - lhs = (yyjson_mut_val *)lhs->uni.ptr; - rhs = (yyjson_mut_val *)rhs->uni.ptr; - while (len-- > 0) { - if (!unsafe_yyjson_mut_equals(lhs, rhs)) return false; - lhs = lhs->next; - rhs = rhs->next; - } - } - return true; - } - - case YYJSON_TYPE_NUM: - return unsafe_yyjson_num_equals(lhs, rhs); - - case YYJSON_TYPE_RAW: - case YYJSON_TYPE_STR: - return unsafe_yyjson_str_equals(lhs, rhs); - - case YYJSON_TYPE_NULL: - case YYJSON_TYPE_BOOL: - return lhs->tag == rhs->tag; - - default: - return false; - } -} - -static_inline bool is_utf8_bom(const u8 *hdr) { - return hdr[0] == 0xEF && hdr[1] == 0xBB && hdr[2] == 0xBF; -} - -static_inline bool is_utf16_bom(const u8 *hdr) { - return ((hdr[0] == 0xFE && hdr[1] == 0xFF) || - (hdr[0] == 0xFF && hdr[1] == 0xFE)); -} - -static_inline bool is_utf32_bom(const u8 *hdr) { - /* need check length to avoid zero padding */ - return ((hdr[0] == 0x00 && hdr[1] == 0x00 && - hdr[2] == 0xFE && hdr[3] == 0xFF) || - (hdr[0] == 0xFF && hdr[1] == 0xFE && - hdr[2] == 0x00 && hdr[3] == 0x00)); -} - -bool yyjson_locate_pos(const char *str, size_t len, size_t pos, - size_t *line, size_t *col, size_t *chr) { - usize line_sum = 0, line_pos = 0, chr_sum = 0; - const u8 *cur = (const u8 *)str; - const u8 *end = cur + pos; - - if (!str || pos > len) { - if (line) *line = 0; - if (col) *col = 0; - if (chr) *chr = 0; - return false; - } - - if (pos >= 3 && is_utf8_bom(cur)) cur += 3; /* don't count BOM */ - while (cur < end) { - u8 c = *cur; - chr_sum += 1; - if (likely(c < 0x80)) { /* 0xxxxxxx (0x00-0x7F) ASCII */ - if (c == '\n') { - line_sum += 1; - line_pos = chr_sum; - } - cur += 1; - } - else if (c < 0xC0) cur += 1; /* 10xxxxxx (0x80-0xBF) Invalid */ - else if (c < 0xE0) cur += 2; /* 110xxxxx (0xC0-0xDF) 2-byte UTF-8 */ - else if (c < 0xF0) cur += 3; /* 1110xxxx (0xE0-0xEF) 3-byte UTF-8 */ - else if (c < 0xF8) cur += 4; /* 11110xxx (0xF0-0xF7) 4-byte UTF-8 */ - else cur += 1; /* 11111xxx (0xF8-0xFF) Invalid */ - } - if (line) *line = line_sum + 1; - if (col) *col = chr_sum - line_pos + 1; - if (chr) *chr = chr_sum; - return true; + return 0; } -#if !YYJSON_DISABLE_UTILS - /*============================================================================== - * JSON Pointer API (RFC 6901) + * MARK: - Hex Character Reader (Private) + * This function is used by JSON reader to read escaped characters. *============================================================================*/ /** - Get a token from JSON pointer string. - @param ptr [in] string that points to current token prefix `/` - [out] string that points to next token prefix `/`, or string end - @param end [in] end of the entire JSON Pointer string - @param len [out] unescaped token length - @param esc [out] number of escaped characters in this token - @return head of the token, or NULL if syntax error + This table is used to convert 4 hex character sequence to a number. + A valid hex character [0-9A-Fa-f] will mapped to it's raw number [0x00, 0x0F], + an invalid hex character will mapped to [0xF0]. + (generate with misc/make_tables.c) */ -static_inline const char *ptr_next_token(const char **ptr, const char *end, - usize *len, usize *esc) { - const char *hdr = *ptr + 1; - const char *cur = hdr; - /* skip unescaped characters */ - while (cur < end && *cur != '/' && *cur != '~') cur++; - if (likely(cur == end || *cur != '~')) { - /* no escaped characters, return */ - *ptr = cur; - *len = (usize)(cur - hdr); - *esc = 0; - return hdr; - } else { - /* handle escaped characters */ - usize esc_num = 0; - while (cur < end && *cur != '/') { - if (*cur++ == '~') { - if (cur == end || (*cur != '0' && *cur != '1')) { - *ptr = cur - 1; - return NULL; - } - esc_num++; - } - } - *ptr = cur; - *len = (usize)(cur - hdr) - esc_num; - *esc = esc_num; - return hdr; - } +static const u8 hex_conv_table[256] = { + 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, + 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, + 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, + 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, + 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, + 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, + 0xF0, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0xF0, + 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, + 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, + 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, + 0xF0, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0xF0, + 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, + 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, + 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, + 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, + 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, + 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, + 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, + 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, + 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, + 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, + 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, + 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, + 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, + 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, + 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, + 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, + 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, + 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, + 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0 +}; + +/** Load 4 hex characters to `u16`, return true on valid input. */ +static_inline bool hex_load_4(const u8 *src, u16 *dst) { + u16 c0 = hex_conv_table[src[0]]; + u16 c1 = hex_conv_table[src[1]]; + u16 c2 = hex_conv_table[src[2]]; + u16 c3 = hex_conv_table[src[3]]; + u16 t0 = (u16)((c0 << 8) | c2); + u16 t1 = (u16)((c1 << 8) | c3); + *dst = (u16)((t0 << 4) | t1); + return ((t0 | t1) & (u16)0xF0F0) == 0; } -/** - Convert token string to index. - @param cur [in] token head - @param len [in] token length - @param idx [out] the index number, or USIZE_MAX if token is '-' - @return true if token is a valid array index - */ -static_inline bool ptr_token_to_idx(const char *cur, usize len, usize *idx) { - const char *end = cur + len; - usize num = 0, add; - if (unlikely(len == 0 || len > USIZE_SAFE_DIG)) return false; - if (*cur == '0') { - if (unlikely(len > 1)) return false; - *idx = 0; - return true; - } - if (*cur == '-') { - if (unlikely(len > 1)) return false; - *idx = USIZE_MAX; - return true; - } - for (; cur < end && (add = (usize)((u8)*cur - (u8)'0')) <= 9; cur++) { - num = num * 10 + add; - } - if (unlikely(num == 0 || cur < end)) return false; - *idx = num; - return true; +/** Load 2 hex characters to `u8`, return true on valid input. */ +static_inline bool hex_load_2(const u8 *src, u8 *dst) { + u8 c0 = hex_conv_table[src[0]]; + u8 c1 = hex_conv_table[src[1]]; + *dst = (u8)((c0 << 4) | c1); + return ((c0 | c1) & 0xF0) == 0; } -/** - Compare JSON key with token. - @param key a string key (yyjson_val or yyjson_mut_val) - @param token a JSON pointer token - @param len unescaped token length - @param esc number of escaped characters in this token - @return true if `str` is equals to `token` - */ -static_inline bool ptr_token_eq(void *key, - const char *token, usize len, usize esc) { - yyjson_val *val = (yyjson_val *)key; - if (unsafe_yyjson_get_len(val) != len) return false; - if (likely(!esc)) { - return memcmp(val->uni.str, token, len) == 0; - } else { - const char *str = val->uni.str; - for (; len-- > 0; token++, str++) { - if (*token == '~') { - if (*str != (*++token == '0' ? '~' : '/')) return false; - } else { - if (*str != *token) return false; - } - } - return true; - } +/** Match a hexadecimal numeric character: [0-9a-fA-F]. */ +static_inline bool char_is_hex(u8 c) { + return hex_conv_table[c] != 0xF0; } -/** - Get a value from array by token. - @param arr an array, should not be NULL or non-array type - @param token a JSON pointer token - @param len unescaped token length - @param esc number of escaped characters in this token - @return value at index, or NULL if token is not index or index is out of range - */ -static_inline yyjson_val *ptr_arr_get(yyjson_val *arr, const char *token, - usize len, usize esc) { - yyjson_val *val = unsafe_yyjson_get_first(arr); - usize num = unsafe_yyjson_get_len(arr), idx = 0; - if (unlikely(num == 0)) return NULL; - if (unlikely(!ptr_token_to_idx(token, len, &idx))) return NULL; - if (unlikely(idx >= num)) return NULL; - if (unsafe_yyjson_arr_is_flat(arr)) { - return val + idx; - } else { - while (idx-- > 0) val = unsafe_yyjson_get_next(val); - return val; - } -} - -/** - Get a value from object by token. - @param obj [in] an object, should not be NULL or non-object type - @param token [in] a JSON pointer token - @param len [in] unescaped token length - @param esc [in] number of escaped characters in this token - @return value associated with the token, or NULL if no value - */ -static_inline yyjson_val *ptr_obj_get(yyjson_val *obj, const char *token, - usize len, usize esc) { - yyjson_val *key = unsafe_yyjson_get_first(obj); - usize num = unsafe_yyjson_get_len(obj); - if (unlikely(num == 0)) return NULL; - for (; num > 0; num--, key = unsafe_yyjson_get_next(key + 1)) { - if (ptr_token_eq(key, token, len, esc)) return key + 1; - } - return NULL; -} - -/** - Get a value from array by token. - @param arr [in] an array, should not be NULL or non-array type - @param token [in] a JSON pointer token - @param len [in] unescaped token length - @param esc [in] number of escaped characters in this token - @param pre [out] previous (sibling) value of the returned value - @param last [out] whether index is last - @return value at index, or NULL if token is not index or index is out of range - */ -static_inline yyjson_mut_val *ptr_mut_arr_get(yyjson_mut_val *arr, - const char *token, - usize len, usize esc, - yyjson_mut_val **pre, - bool *last) { - yyjson_mut_val *val = (yyjson_mut_val *)arr->uni.ptr; /* last (tail) */ - usize num = unsafe_yyjson_get_len(arr), idx; - if (last) *last = false; - if (pre) *pre = NULL; - if (unlikely(num == 0)) { - if (last && len == 1 && (*token == '0' || *token == '-')) *last = true; - return NULL; - } - if (unlikely(!ptr_token_to_idx(token, len, &idx))) return NULL; - if (last) *last = (idx == num || idx == USIZE_MAX); - if (unlikely(idx >= num)) return NULL; - while (idx-- > 0) val = val->next; - if (pre) *pre = val; - return val->next; -} - -/** - Get a value from object by token. - @param obj [in] an object, should not be NULL or non-object type - @param token [in] a JSON pointer token - @param len [in] unescaped token length - @param esc [in] number of escaped characters in this token - @param pre [out] previous (sibling) key of the returned value's key - @return value associated with the token, or NULL if no value - */ -static_inline yyjson_mut_val *ptr_mut_obj_get(yyjson_mut_val *obj, - const char *token, - usize len, usize esc, - yyjson_mut_val **pre) { - yyjson_mut_val *pre_key = (yyjson_mut_val *)obj->uni.ptr, *key; - usize num = unsafe_yyjson_get_len(obj); - if (pre) *pre = NULL; - if (unlikely(num == 0)) return NULL; - for (; num > 0; num--, pre_key = key) { - key = pre_key->next->next; - if (ptr_token_eq(key, token, len, esc)) { - if (pre) *pre = pre_key; - return key->next; - } - } - return NULL; -} - -/** - Create a string value with JSON pointer token. - @param token [in] a JSON pointer token - @param len [in] unescaped token length - @param esc [in] number of escaped characters in this token - @param doc [in] used for memory allocation when creating value - @return new string value, or NULL if memory allocation failed - */ -static_inline yyjson_mut_val *ptr_new_key(const char *token, - usize len, usize esc, - yyjson_mut_doc *doc) { - const char *src = token; - if (likely(!esc)) { - return yyjson_mut_strncpy(doc, src, len); - } else { - const char *end = src + len + esc; - char *dst = unsafe_yyjson_mut_str_alc(doc, len + esc); - char *str = dst; - if (unlikely(!dst)) return NULL; - for (; src < end; src++, dst++) { - if (*src != '~') *dst = *src; - else *dst = (*++src == '0' ? '~' : '/'); - } - *dst = '\0'; - return yyjson_mut_strn(doc, str, len); - } -} - -/* macros for yyjson_ptr */ -#define return_err(_ret, _code, _pos, _msg) do { \ - if (err) { \ - err->code = YYJSON_PTR_ERR_##_code; \ - err->msg = _msg; \ - err->pos = (usize)(_pos); \ - } \ - return _ret; \ -} while (false) - -#define return_err_resolve(_ret, _pos) \ - return_err(_ret, RESOLVE, _pos, "JSON pointer cannot be resolved") -#define return_err_syntax(_ret, _pos) \ - return_err(_ret, SYNTAX, _pos, "invalid escaped character") -#define return_err_alloc(_ret) \ - return_err(_ret, MEMORY_ALLOCATION, 0, "failed to create value") - -yyjson_val *unsafe_yyjson_ptr_getx(yyjson_val *val, - const char *ptr, size_t ptr_len, - yyjson_ptr_err *err) { - - const char *hdr = ptr, *end = ptr + ptr_len, *token; - usize len, esc; - yyjson_type type; - - while (true) { - token = ptr_next_token(&ptr, end, &len, &esc); - if (unlikely(!token)) return_err_syntax(NULL, ptr - hdr); - type = unsafe_yyjson_get_type(val); - if (type == YYJSON_TYPE_OBJ) { - val = ptr_obj_get(val, token, len, esc); - } else if (type == YYJSON_TYPE_ARR) { - val = ptr_arr_get(val, token, len, esc); - } else { - val = NULL; - } - if (!val) return_err_resolve(NULL, token - hdr); - if (ptr == end) return val; - } -} - -yyjson_mut_val *unsafe_yyjson_mut_ptr_getx( - yyjson_mut_val *val, const char *ptr, size_t ptr_len, - yyjson_ptr_ctx *ctx, yyjson_ptr_err *err) { - - const char *hdr = ptr, *end = ptr + ptr_len, *token; - usize len, esc; - yyjson_mut_val *ctn, *pre = NULL; - yyjson_type type; - bool idx_is_last = false; - - while (true) { - token = ptr_next_token(&ptr, end, &len, &esc); - if (unlikely(!token)) return_err_syntax(NULL, ptr - hdr); - ctn = val; - type = unsafe_yyjson_get_type(val); - if (type == YYJSON_TYPE_OBJ) { - val = ptr_mut_obj_get(val, token, len, esc, &pre); - } else if (type == YYJSON_TYPE_ARR) { - val = ptr_mut_arr_get(val, token, len, esc, &pre, &idx_is_last); - } else { - val = NULL; - } - if (ctx && (ptr == end)) { - if (type == YYJSON_TYPE_OBJ || - (type == YYJSON_TYPE_ARR && (val || idx_is_last))) { - ctx->ctn = ctn; - ctx->pre = pre; - } - } - if (!val) return_err_resolve(NULL, token - hdr); - if (ptr == end) return val; - } -} - -bool unsafe_yyjson_mut_ptr_putx( - yyjson_mut_val *val, const char *ptr, size_t ptr_len, - yyjson_mut_val *new_val, yyjson_mut_doc *doc, bool create_parent, - bool insert_new, yyjson_ptr_ctx *ctx, yyjson_ptr_err *err) { - - const char *hdr = ptr, *end = ptr + ptr_len, *token; - usize token_len, esc, ctn_len; - yyjson_mut_val *ctn, *key, *pre = NULL; - yyjson_mut_val *sep_ctn = NULL, *sep_key = NULL, *sep_val = NULL; - yyjson_type ctn_type; - bool idx_is_last = false; - - /* skip exist parent nodes */ - while (true) { - token = ptr_next_token(&ptr, end, &token_len, &esc); - if (unlikely(!token)) return_err_syntax(false, ptr - hdr); - ctn = val; - ctn_type = unsafe_yyjson_get_type(ctn); - if (ctn_type == YYJSON_TYPE_OBJ) { - val = ptr_mut_obj_get(ctn, token, token_len, esc, &pre); - } else if (ctn_type == YYJSON_TYPE_ARR) { - val = ptr_mut_arr_get(ctn, token, token_len, esc, &pre, - &idx_is_last); - } else return_err_resolve(false, token - hdr); - if (!val) break; - if (ptr == end) break; /* is last token */ - } - - /* create parent nodes if not exist */ - if (unlikely(ptr != end)) { /* not last token */ - if (!create_parent) return_err_resolve(false, token - hdr); - - /* add value at last index if container is array */ - if (ctn_type == YYJSON_TYPE_ARR) { - if (!idx_is_last || !insert_new) { - return_err_resolve(false, token - hdr); - } - val = yyjson_mut_obj(doc); - if (!val) return_err_alloc(false); - - /* delay attaching until all operations are completed */ - sep_ctn = ctn; - sep_key = NULL; - sep_val = val; - - /* move to next token */ - ctn = val; - val = NULL; - ctn_type = YYJSON_TYPE_OBJ; - token = ptr_next_token(&ptr, end, &token_len, &esc); - if (unlikely(!token)) return_err_resolve(false, token - hdr); - } - - /* container is object, create parent nodes */ - while (ptr != end) { /* not last token */ - key = ptr_new_key(token, token_len, esc, doc); - if (!key) return_err_alloc(false); - val = yyjson_mut_obj(doc); - if (!val) return_err_alloc(false); - - /* delay attaching until all operations are completed */ - if (!sep_ctn) { - sep_ctn = ctn; - sep_key = key; - sep_val = val; - } else { - yyjson_mut_obj_add(ctn, key, val); - } - - /* move to next token */ - ctn = val; - val = NULL; - token = ptr_next_token(&ptr, end, &token_len, &esc); - if (unlikely(!token)) return_err_syntax(false, ptr - hdr); - } - } - - /* JSON pointer is resolved, insert or replace target value */ - ctn_len = unsafe_yyjson_get_len(ctn); - if (ctn_type == YYJSON_TYPE_OBJ) { - if (ctx) ctx->ctn = ctn; - if (!val || insert_new) { - /* insert new key-value pair */ - key = ptr_new_key(token, token_len, esc, doc); - if (unlikely(!key)) return_err_alloc(false); - if (ctx) ctx->pre = ctn_len ? (yyjson_mut_val *)ctn->uni.ptr : key; - unsafe_yyjson_mut_obj_add(ctn, key, new_val, ctn_len); - } else { - /* replace exist value */ - key = pre->next->next; - if (ctx) ctx->pre = pre; - if (ctx) ctx->old = val; - yyjson_mut_obj_put(ctn, key, new_val); - } - } else { - /* array */ - if (ctx && (val || idx_is_last)) ctx->ctn = ctn; - if (insert_new) { - /* append new value */ - if (val) { - pre->next = new_val; - new_val->next = val; - if (ctx) ctx->pre = pre; - unsafe_yyjson_set_len(ctn, ctn_len + 1); - } else if (idx_is_last) { - if (ctx) ctx->pre = ctn_len ? - (yyjson_mut_val *)ctn->uni.ptr : new_val; - yyjson_mut_arr_append(ctn, new_val); - } else { - return_err_resolve(false, token - hdr); - } - } else { - /* replace exist value */ - if (!val) return_err_resolve(false, token - hdr); - if (ctn_len > 1) { - new_val->next = val->next; - pre->next = new_val; - if (ctn->uni.ptr == val) ctn->uni.ptr = new_val; - } else { - new_val->next = new_val; - ctn->uni.ptr = new_val; - pre = new_val; - } - if (ctx) ctx->pre = pre; - if (ctx) ctx->old = val; - } - } - - /* all operations are completed, attach the new components to the target */ - if (unlikely(sep_ctn)) { - if (sep_key) yyjson_mut_obj_add(sep_ctn, sep_key, sep_val); - else yyjson_mut_arr_append(sep_ctn, sep_val); - } - return true; -} - -yyjson_mut_val *unsafe_yyjson_mut_ptr_replacex( - yyjson_mut_val *val, const char *ptr, size_t len, yyjson_mut_val *new_val, - yyjson_ptr_ctx *ctx, yyjson_ptr_err *err) { - - yyjson_mut_val *cur_val; - yyjson_ptr_ctx cur_ctx; - memset(&cur_ctx, 0, sizeof(cur_ctx)); - if (!ctx) ctx = &cur_ctx; - cur_val = unsafe_yyjson_mut_ptr_getx(val, ptr, len, ctx, err); - if (!cur_val) return NULL; - - if (yyjson_mut_is_obj(ctx->ctn)) { - yyjson_mut_val *key = ctx->pre->next->next; - yyjson_mut_obj_put(ctx->ctn, key, new_val); - } else { - yyjson_ptr_ctx_replace(ctx, new_val); - } - ctx->old = cur_val; - return cur_val; -} - -yyjson_mut_val *unsafe_yyjson_mut_ptr_removex( - yyjson_mut_val *val, const char *ptr, size_t len, - yyjson_ptr_ctx *ctx, yyjson_ptr_err *err) { - - yyjson_mut_val *cur_val; - yyjson_ptr_ctx cur_ctx; - memset(&cur_ctx, 0, sizeof(cur_ctx)); - if (!ctx) ctx = &cur_ctx; - cur_val = unsafe_yyjson_mut_ptr_getx(val, ptr, len, ctx, err); - if (cur_val) { - if (yyjson_mut_is_obj(ctx->ctn)) { - yyjson_mut_val *key = ctx->pre->next->next; - yyjson_mut_obj_put(ctx->ctn, key, NULL); - } else { - yyjson_ptr_ctx_remove(ctx); - } - ctx->pre = NULL; - ctx->old = cur_val; - } - return cur_val; -} - -/* macros for yyjson_ptr */ -#undef return_err -#undef return_err_resolve -#undef return_err_syntax -#undef return_err_alloc - /*============================================================================== - * JSON Patch API (RFC 6902) + * MARK: - UTF8 Validation (Private) + * Each Unicode code point is encoded using 1 to 4 bytes in UTF-8. + * Validation is performed using a 4-byte mask and pattern-based approach, + * which requires the input data to be padded with four zero bytes at the end. *============================================================================*/ -/* JSON Patch operation */ -typedef enum patch_op { - PATCH_OP_ADD, /* path, value */ - PATCH_OP_REMOVE, /* path */ - PATCH_OP_REPLACE, /* path, value */ - PATCH_OP_MOVE, /* from, path */ - PATCH_OP_COPY, /* from, path */ - PATCH_OP_TEST, /* path, value */ - PATCH_OP_NONE /* invalid */ -} patch_op; +/* Macro for concatenating four u8 into a u32 and keeping the byte order. */ +#if YYJSON_ENDIAN == YYJSON_LITTLE_ENDIAN +# define utf8_seq_def(name, a, b, c, d) \ + static const u32 utf8_seq_##name = 0x##d##c##b##a##UL; +# define utf8_seq(name) utf8_seq_##name +#elif YYJSON_ENDIAN == YYJSON_BIG_ENDIAN +# define utf8_seq_def(name, a, b, c, d) \ + static const u32 utf8_seq_##name = 0x##a##b##c##d##UL; +# define utf8_seq(name) utf8_seq_##name +#else +# define utf8_seq_def(name, a, b, c, d) \ + static const v32_uni utf8_uni_##name = {{ 0x##a, 0x##b, 0x##c, 0x##d }}; +# define utf8_seq(name) utf8_uni_##name.u +#endif -static patch_op patch_op_get(yyjson_val *op) { - const char *str = op->uni.str; - switch (unsafe_yyjson_get_len(op)) { - case 3: - if (!memcmp(str, "add", 3)) return PATCH_OP_ADD; - return PATCH_OP_NONE; - case 4: - if (!memcmp(str, "move", 4)) return PATCH_OP_MOVE; - if (!memcmp(str, "copy", 4)) return PATCH_OP_COPY; - if (!memcmp(str, "test", 4)) return PATCH_OP_TEST; - return PATCH_OP_NONE; - case 6: - if (!memcmp(str, "remove", 6)) return PATCH_OP_REMOVE; - return PATCH_OP_NONE; - case 7: - if (!memcmp(str, "replace", 7)) return PATCH_OP_REPLACE; - return PATCH_OP_NONE; - default: - return PATCH_OP_NONE; - } -} +/* + 1-byte sequence (U+0000 to U+007F) + bit min [.......0] (U+0000) + bit max [.1111111] (U+007F) + bit mask [x.......] (80) + bit pattern [0.......] (00) + */ +utf8_seq_def(b1_mask, 80, 00, 00, 00) +utf8_seq_def(b1_patt, 00, 00, 00, 00) +#define is_utf8_seq1(uni) ( \ + ((uni & utf8_seq(b1_mask)) == utf8_seq(b1_patt)) ) -/* macros for yyjson_patch */ -#define return_err(_code, _msg) do { \ - if (err->ptr.code == YYJSON_PTR_ERR_MEMORY_ALLOCATION) { \ - err->code = YYJSON_PATCH_ERROR_MEMORY_ALLOCATION; \ - err->msg = _msg; \ - memset(&err->ptr, 0, sizeof(yyjson_ptr_err)); \ - } else { \ - err->code = YYJSON_PATCH_ERROR_##_code; \ - err->msg = _msg; \ - err->idx = iter.idx ? iter.idx - 1 : 0; \ - } \ - return NULL; \ -} while (false) +/* + 2-byte sequence (U+0080 to U+07FF) + bit min [......10 ..000000] (U+0080) + bit max [...11111 ..111111] (U+07FF) + bit mask [xxx..... xx......] (E0 C0) + bit pattern [110..... 10......] (C0 80) + bit require [...xxxx. ........] (1E 00) + */ +utf8_seq_def(b2_mask, E0, C0, 00, 00) +utf8_seq_def(b2_patt, C0, 80, 00, 00) +utf8_seq_def(b2_requ, 1E, 00, 00, 00) +#define is_utf8_seq2(uni) ( \ + ((uni & utf8_seq(b2_mask)) == utf8_seq(b2_patt)) && \ + ((uni & utf8_seq(b2_requ))) ) -#define return_err_copy() \ - return_err(MEMORY_ALLOCATION, "failed to copy value") -#define return_err_key(_key) \ - return_err(MISSING_KEY, "missing key " _key) -#define return_err_val(_key) \ - return_err(INVALID_MEMBER, "invalid member " _key) +/* + 3-byte sequence (U+0800 to U+FFFF) + bit min [........ ..100000 ..000000] (U+0800) + bit max [....1111 ..111111 ..111111] (U+FFFF) + bit mask [xxxx.... xx...... xx......] (F0 C0 C0) + bit pattern [1110.... 10...... 10......] (E0 80 80) + bit require [....xxxx ..x..... ........] (0F 20 00) -#define ptr_get(_ptr) yyjson_mut_ptr_getx( \ - root, _ptr->uni.str, _ptr##_len, NULL, &err->ptr) -#define ptr_add(_ptr, _val) yyjson_mut_ptr_addx( \ - root, _ptr->uni.str, _ptr##_len, _val, doc, false, NULL, &err->ptr) -#define ptr_remove(_ptr) yyjson_mut_ptr_removex( \ - root, _ptr->uni.str, _ptr##_len, NULL, &err->ptr) -#define ptr_replace(_ptr, _val)yyjson_mut_ptr_replacex( \ - root, _ptr->uni.str, _ptr##_len, _val, NULL, &err->ptr) + 3-byte invalid sequence, reserved for surrogate halves (U+D800 to U+DFFF) + bit min [....1101 ..100000 ..000000] (U+D800) + bit max [....1101 ..111111 ..111111] (U+DFFF) + bit mask [....xxxx ..x..... ........] (0F 20 00) + bit pattern [....1101 ..1..... ........] (0D 20 00) + */ +utf8_seq_def(b3_mask, F0, C0, C0, 00) +utf8_seq_def(b3_patt, E0, 80, 80, 00) +utf8_seq_def(b3_requ, 0F, 20, 00, 00) +utf8_seq_def(b3_erro, 0D, 20, 00, 00) +#define is_utf8_seq3(uni) ( \ + ((uni & utf8_seq(b3_mask)) == utf8_seq(b3_patt)) && \ + ((tmp = (uni & utf8_seq(b3_requ)))) && \ + ((tmp != utf8_seq(b3_erro))) ) -yyjson_mut_val *yyjson_patch(yyjson_mut_doc *doc, - yyjson_val *orig, - yyjson_val *patch, - yyjson_patch_err *err) { - - yyjson_mut_val *root; - yyjson_val *obj; - yyjson_arr_iter iter; - yyjson_patch_err err_tmp; - if (!err) err = &err_tmp; - memset(err, 0, sizeof(*err)); - memset(&iter, 0, sizeof(iter)); - - if (unlikely(!doc || !orig || !patch)) { - return_err(INVALID_PARAMETER, "input parameter is NULL"); - } - if (unlikely(!yyjson_is_arr(patch))) { - return_err(INVALID_PARAMETER, "input patch is not array"); - } - root = yyjson_val_mut_copy(doc, orig); - if (unlikely(!root)) return_err_copy(); - - /* iterate through the patch array */ - yyjson_arr_iter_init(patch, &iter); - while ((obj = yyjson_arr_iter_next(&iter))) { - patch_op op_enum; - yyjson_val *op, *path, *from = NULL, *value; - yyjson_mut_val *val = NULL, *test; - usize path_len, from_len = 0; - if (unlikely(!unsafe_yyjson_is_obj(obj))) { - return_err(INVALID_OPERATION, "JSON patch operation is not object"); - } - - /* get required member: op */ - op = yyjson_obj_get(obj, "op"); - if (unlikely(!op)) return_err_key("`op`"); - if (unlikely(!yyjson_is_str(op))) return_err_val("`op`"); - op_enum = patch_op_get(op); - - /* get required member: path */ - path = yyjson_obj_get(obj, "path"); - if (unlikely(!path)) return_err_key("`path`"); - if (unlikely(!yyjson_is_str(path))) return_err_val("`path`"); - path_len = unsafe_yyjson_get_len(path); - - /* get required member: value, from */ - switch ((int)op_enum) { - case PATCH_OP_ADD: case PATCH_OP_REPLACE: case PATCH_OP_TEST: - value = yyjson_obj_get(obj, "value"); - if (unlikely(!value)) return_err_key("`value`"); - val = yyjson_val_mut_copy(doc, value); - if (unlikely(!val)) return_err_copy(); - break; - case PATCH_OP_MOVE: case PATCH_OP_COPY: - from = yyjson_obj_get(obj, "from"); - if (unlikely(!from)) return_err_key("`from`"); - if (unlikely(!yyjson_is_str(from))) return_err_val("`from`"); - from_len = unsafe_yyjson_get_len(from); - break; - default: - break; - } - - /* perform an operation */ - switch ((int)op_enum) { - case PATCH_OP_ADD: /* add(path, val) */ - if (unlikely(path_len == 0)) { root = val; break; } - if (unlikely(!ptr_add(path, val))) { - return_err(POINTER, "failed to add `path`"); - } - break; - case PATCH_OP_REMOVE: /* remove(path) */ - if (unlikely(!ptr_remove(path))) { - return_err(POINTER, "failed to remove `path`"); - } - break; - case PATCH_OP_REPLACE: /* replace(path, val) */ - if (unlikely(path_len == 0)) { root = val; break; } - if (unlikely(!ptr_replace(path, val))) { - return_err(POINTER, "failed to replace `path`"); - } - break; - case PATCH_OP_MOVE: /* val = remove(from), add(path, val) */ - if (unlikely(from_len == 0 && path_len == 0)) break; - val = ptr_remove(from); - if (unlikely(!val)) { - return_err(POINTER, "failed to remove `from`"); - } - if (unlikely(path_len == 0)) { root = val; break; } - if (unlikely(!ptr_add(path, val))) { - return_err(POINTER, "failed to add `path`"); - } - break; - case PATCH_OP_COPY: /* val = get(from).copy, add(path, val) */ - val = ptr_get(from); - if (unlikely(!val)) { - return_err(POINTER, "failed to get `from`"); - } - if (unlikely(path_len == 0)) { root = val; break; } - val = yyjson_mut_val_mut_copy(doc, val); - if (unlikely(!val)) return_err_copy(); - if (unlikely(!ptr_add(path, val))) { - return_err(POINTER, "failed to add `path`"); - } - break; - case PATCH_OP_TEST: /* test = get(path), test.eq(val) */ - test = ptr_get(path); - if (unlikely(!test)) { - return_err(POINTER, "failed to get `path`"); - } - if (unlikely(!yyjson_mut_equals(val, test))) { - return_err(EQUAL, "failed to test equal"); - } - break; - default: - return_err(INVALID_MEMBER, "unsupported `op`"); - } - } - return root; -} - -yyjson_mut_val *yyjson_mut_patch(yyjson_mut_doc *doc, - yyjson_mut_val *orig, - yyjson_mut_val *patch, - yyjson_patch_err *err) { - yyjson_mut_val *root, *obj; - yyjson_mut_arr_iter iter; - yyjson_patch_err err_tmp; - if (!err) err = &err_tmp; - memset(err, 0, sizeof(*err)); - memset(&iter, 0, sizeof(iter)); - - if (unlikely(!doc || !orig || !patch)) { - return_err(INVALID_PARAMETER, "input parameter is NULL"); - } - if (unlikely(!yyjson_mut_is_arr(patch))) { - return_err(INVALID_PARAMETER, "input patch is not array"); - } - root = yyjson_mut_val_mut_copy(doc, orig); - if (unlikely(!root)) return_err_copy(); - - /* iterate through the patch array */ - yyjson_mut_arr_iter_init(patch, &iter); - while ((obj = yyjson_mut_arr_iter_next(&iter))) { - patch_op op_enum; - yyjson_mut_val *op, *path, *from = NULL, *value; - yyjson_mut_val *val = NULL, *test; - usize path_len, from_len = 0; - if (!unsafe_yyjson_is_obj(obj)) { - return_err(INVALID_OPERATION, "JSON patch operation is not object"); - } - - /* get required member: op */ - op = yyjson_mut_obj_get(obj, "op"); - if (unlikely(!op)) return_err_key("`op`"); - if (unlikely(!yyjson_mut_is_str(op))) return_err_val("`op`"); - op_enum = patch_op_get((yyjson_val *)(void *)op); - - /* get required member: path */ - path = yyjson_mut_obj_get(obj, "path"); - if (unlikely(!path)) return_err_key("`path`"); - if (unlikely(!yyjson_mut_is_str(path))) return_err_val("`path`"); - path_len = unsafe_yyjson_get_len(path); - - /* get required member: value, from */ - switch ((int)op_enum) { - case PATCH_OP_ADD: case PATCH_OP_REPLACE: case PATCH_OP_TEST: - value = yyjson_mut_obj_get(obj, "value"); - if (unlikely(!value)) return_err_key("`value`"); - val = yyjson_mut_val_mut_copy(doc, value); - if (unlikely(!val)) return_err_copy(); - break; - case PATCH_OP_MOVE: case PATCH_OP_COPY: - from = yyjson_mut_obj_get(obj, "from"); - if (unlikely(!from)) return_err_key("`from`"); - if (unlikely(!yyjson_mut_is_str(from))) { - return_err_val("`from`"); - } - from_len = unsafe_yyjson_get_len(from); - break; - default: - break; - } - - /* perform an operation */ - switch ((int)op_enum) { - case PATCH_OP_ADD: /* add(path, val) */ - if (unlikely(path_len == 0)) { root = val; break; } - if (unlikely(!ptr_add(path, val))) { - return_err(POINTER, "failed to add `path`"); - } - break; - case PATCH_OP_REMOVE: /* remove(path) */ - if (unlikely(!ptr_remove(path))) { - return_err(POINTER, "failed to remove `path`"); - } - break; - case PATCH_OP_REPLACE: /* replace(path, val) */ - if (unlikely(path_len == 0)) { root = val; break; } - if (unlikely(!ptr_replace(path, val))) { - return_err(POINTER, "failed to replace `path`"); - } - break; - case PATCH_OP_MOVE: /* val = remove(from), add(path, val) */ - if (unlikely(from_len == 0 && path_len == 0)) break; - val = ptr_remove(from); - if (unlikely(!val)) { - return_err(POINTER, "failed to remove `from`"); - } - if (unlikely(path_len == 0)) { root = val; break; } - if (unlikely(!ptr_add(path, val))) { - return_err(POINTER, "failed to add `path`"); - } - break; - case PATCH_OP_COPY: /* val = get(from).copy, add(path, val) */ - val = ptr_get(from); - if (unlikely(!val)) { - return_err(POINTER, "failed to get `from`"); - } - if (unlikely(path_len == 0)) { root = val; break; } - val = yyjson_mut_val_mut_copy(doc, val); - if (unlikely(!val)) return_err_copy(); - if (unlikely(!ptr_add(path, val))) { - return_err(POINTER, "failed to add `path`"); - } - break; - case PATCH_OP_TEST: /* test = get(path), test.eq(val) */ - test = ptr_get(path); - if (unlikely(!test)) { - return_err(POINTER, "failed to get `path`"); - } - if (unlikely(!yyjson_mut_equals(val, test))) { - return_err(EQUAL, "failed to test equal"); - } - break; - default: - return_err(INVALID_MEMBER, "unsupported `op`"); - } - } - return root; -} - -/* macros for yyjson_patch */ -#undef return_err -#undef return_err_copy -#undef return_err_key -#undef return_err_val -#undef ptr_get -#undef ptr_add -#undef ptr_remove -#undef ptr_replace +/* + 4-byte sequence (U+10000 to U+10FFFF) + bit min [........ ...10000 ..000000 ..000000] (U+10000) + bit max [.....100 ..001111 ..111111 ..111111] (U+10FFFF) + bit mask [xxxxx... xx...... xx...... xx......] (F8 C0 C0 C0) + bit pattern [11110... 10...... 10...... 10......] (F0 80 80 80) + bit require [.....xxx ..xx.... ........ ........] (07 30 00 00) + bit require 1 [.....x.. ........ ........ ........] (04 00 00 00) + bit require 2 [......xx ..xx.... ........ ........] (03 30 00 00) + */ +utf8_seq_def(b4_mask, F8, C0, C0, C0) +utf8_seq_def(b4_patt, F0, 80, 80, 80) +utf8_seq_def(b4_requ, 07, 30, 00, 00) +utf8_seq_def(b4_req1, 04, 00, 00, 00) +utf8_seq_def(b4_req2, 03, 30, 00, 00) +#define is_utf8_seq4(uni) ( \ + ((uni & utf8_seq(b4_mask)) == utf8_seq(b4_patt)) && \ + ((tmp = (uni & utf8_seq(b4_requ)))) && \ + ((tmp & utf8_seq(b4_req1)) == 0 || (tmp & utf8_seq(b4_req2)) == 0) ) /*============================================================================== - * JSON Merge-Patch API (RFC 7386) - *============================================================================*/ - -yyjson_mut_val *yyjson_merge_patch(yyjson_mut_doc *doc, - yyjson_val *orig, - yyjson_val *patch) { - usize idx, max; - yyjson_val *key, *orig_val, *patch_val, local_orig; - yyjson_mut_val *builder, *mut_key, *mut_val, *merged_val; - - if (unlikely(!yyjson_is_obj(patch))) { - return yyjson_val_mut_copy(doc, patch); - } - - builder = yyjson_mut_obj(doc); - if (unlikely(!builder)) return NULL; - - memset(&local_orig, 0, sizeof(local_orig)); - if (!yyjson_is_obj(orig)) { - orig = &local_orig; - orig->tag = builder->tag; - orig->uni = builder->uni; - } - - /* If orig is contributing, copy any items not modified by the patch */ - if (orig != &local_orig) { - yyjson_obj_foreach(orig, idx, max, key, orig_val) { - patch_val = yyjson_obj_getn(patch, - unsafe_yyjson_get_str(key), - unsafe_yyjson_get_len(key)); - if (!patch_val) { - mut_key = yyjson_val_mut_copy(doc, key); - mut_val = yyjson_val_mut_copy(doc, orig_val); - if (!yyjson_mut_obj_add(builder, mut_key, mut_val)) return NULL; - } - } - } - - /* Merge items modified by the patch. */ - yyjson_obj_foreach(patch, idx, max, key, patch_val) { - /* null indicates the field is removed. */ - if (unsafe_yyjson_is_null(patch_val)) { - continue; - } - mut_key = yyjson_val_mut_copy(doc, key); - orig_val = yyjson_obj_getn(orig, - unsafe_yyjson_get_str(key), - unsafe_yyjson_get_len(key)); - merged_val = yyjson_merge_patch(doc, orig_val, patch_val); - if (!yyjson_mut_obj_add(builder, mut_key, merged_val)) return NULL; - } - - return builder; -} - -yyjson_mut_val *yyjson_mut_merge_patch(yyjson_mut_doc *doc, - yyjson_mut_val *orig, - yyjson_mut_val *patch) { - usize idx, max; - yyjson_mut_val *key, *orig_val, *patch_val, local_orig; - yyjson_mut_val *builder, *mut_key, *mut_val, *merged_val; - - if (unlikely(!yyjson_mut_is_obj(patch))) { - return yyjson_mut_val_mut_copy(doc, patch); - } - - builder = yyjson_mut_obj(doc); - if (unlikely(!builder)) return NULL; - - memset(&local_orig, 0, sizeof(local_orig)); - if (!yyjson_mut_is_obj(orig)) { - orig = &local_orig; - orig->tag = builder->tag; - orig->uni = builder->uni; - } - - /* If orig is contributing, copy any items not modified by the patch */ - if (orig != &local_orig) { - yyjson_mut_obj_foreach(orig, idx, max, key, orig_val) { - patch_val = yyjson_mut_obj_getn(patch, - unsafe_yyjson_get_str(key), - unsafe_yyjson_get_len(key)); - if (!patch_val) { - mut_key = yyjson_mut_val_mut_copy(doc, key); - mut_val = yyjson_mut_val_mut_copy(doc, orig_val); - if (!yyjson_mut_obj_add(builder, mut_key, mut_val)) return NULL; - } - } - } - - /* Merge items modified by the patch. */ - yyjson_mut_obj_foreach(patch, idx, max, key, patch_val) { - /* null indicates the field is removed. */ - if (unsafe_yyjson_is_null(patch_val)) { - continue; - } - mut_key = yyjson_mut_val_mut_copy(doc, key); - orig_val = yyjson_mut_obj_getn(orig, - unsafe_yyjson_get_str(key), - unsafe_yyjson_get_len(key)); - merged_val = yyjson_mut_merge_patch(doc, orig_val, patch_val); - if (!yyjson_mut_obj_add(builder, mut_key, merged_val)) return NULL; - } - - return builder; -} - -#endif /* YYJSON_DISABLE_UTILS */ - - - -/*============================================================================== - * Power10 Lookup Table + * MARK: - Power10 Lookup Table (Private) * These data are used by the floating-point number reader and writer. *============================================================================*/ -#if (!YYJSON_DISABLE_READER || !YYJSON_DISABLE_WRITER) && \ - (!YYJSON_DISABLE_FAST_FP_CONV) +#if !YYJSON_DISABLE_FAST_FP_CONV + +/** Maximum pow10 exponent that can be represented exactly as a float64. */ +#define F64_POW10_MAX_EXACT_EXP 22 + +/** Cached pow10 table. */ +static const f64 f64_pow10_table[F64_POW10_MAX_EXACT_EXP + 1] = { + 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 1e11, 1e12, + 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, 1e20, 1e21, 1e22 +}; + +/** Maximum pow10 exponent that can be represented exactly as a uint64. */ +#define U64_POW10_MAX_EXACT_EXP 19 + +/** Table: [ 10^0, ..., 10^19 ] (generate with misc/make_tables.c) */ +static const u64 u64_pow10_table[U64_POW10_MAX_EXACT_EXP + 1] = { + U64(0x00000000, 0x00000001), U64(0x00000000, 0x0000000A), + U64(0x00000000, 0x00000064), U64(0x00000000, 0x000003E8), + U64(0x00000000, 0x00002710), U64(0x00000000, 0x000186A0), + U64(0x00000000, 0x000F4240), U64(0x00000000, 0x00989680), + U64(0x00000000, 0x05F5E100), U64(0x00000000, 0x3B9ACA00), + U64(0x00000002, 0x540BE400), U64(0x00000017, 0x4876E800), + U64(0x000000E8, 0xD4A51000), U64(0x00000918, 0x4E72A000), + U64(0x00005AF3, 0x107A4000), U64(0x00038D7E, 0xA4C68000), + U64(0x002386F2, 0x6FC10000), U64(0x01634578, 0x5D8A0000), + U64(0x0DE0B6B3, 0xA7640000), U64(0x8AC72304, 0x89E80000) +}; /** Minimum decimal exponent in pow10_sig_table. */ #define POW10_SIG_TABLE_MIN_EXP -343 @@ -3564,272 +1923,1184 @@ static_inline void pow10_table_get_exp(i32 exp10, i32 *exp2) { /*============================================================================== - * JSON Character Matcher + * MARK: - Number and Bit Utils (Private) *============================================================================*/ -/** Character type */ -typedef u8 char_type; - -/** Whitespace character: ' ', '\\t', '\\n', '\\r'. */ -static const char_type CHAR_TYPE_SPACE = 1 << 0; - -/** Number character: '-', [0-9]. */ -static const char_type CHAR_TYPE_NUMBER = 1 << 1; - -/** JSON Escaped character: '"', '\', [0x00-0x1F]. */ -static const char_type CHAR_TYPE_ESC_ASCII = 1 << 2; - -/** Non-ASCII character: [0x80-0xFF]. */ -static const char_type CHAR_TYPE_NON_ASCII = 1 << 3; - -/** JSON container character: '{', '['. */ -static const char_type CHAR_TYPE_CONTAINER = 1 << 4; - -/** Comment character: '/'. */ -static const char_type CHAR_TYPE_COMMENT = 1 << 5; - -/** Line end character: '\\n', '\\r', '\0'. */ -static const char_type CHAR_TYPE_LINE_END = 1 << 6; - -/** Hexadecimal numeric character: [0-9a-fA-F]. */ -static const char_type CHAR_TYPE_HEX = 1 << 7; - -/** Character type table (generate with misc/make_tables.c) */ -static const char_type char_table[256] = { - 0x44, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, - 0x04, 0x05, 0x45, 0x04, 0x04, 0x45, 0x04, 0x04, - 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, - 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, - 0x01, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x20, - 0x82, 0x82, 0x82, 0x82, 0x82, 0x82, 0x82, 0x82, - 0x82, 0x82, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x10, 0x04, 0x00, 0x00, 0x00, - 0x00, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, - 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, - 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, - 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, - 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, - 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, - 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, - 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, - 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, - 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, - 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, - 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, - 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, - 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, - 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, - 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, - 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08 -}; - -/** Match a character with specified type. */ -static_inline bool char_is_type(u8 c, char_type type) { - return (char_table[c] & type) != 0; +/** Convert bits to double. */ +static_inline f64 f64_from_bits(u64 u) { + f64 f; + memcpy(&f, &u, sizeof(u)); + return f; } -/** Match a whitespace: ' ', '\\t', '\\n', '\\r'. */ -static_inline bool char_is_space(u8 c) { - return char_is_type(c, (char_type)CHAR_TYPE_SPACE); +/** Convert double to bits. */ +static_inline u64 f64_to_bits(f64 f) { + u64 u; + memcpy(&u, &f, sizeof(u)); + return u; } -/** Match a whitespace or comment: ' ', '\\t', '\\n', '\\r', '/'. */ -static_inline bool char_is_space_or_comment(u8 c) { - return char_is_type(c, (char_type)(CHAR_TYPE_SPACE | CHAR_TYPE_COMMENT)); +/** Convert double to bits. */ +static_inline u32 f32_to_bits(f32 f) { + u32 u; + memcpy(&u, &f, sizeof(u)); + return u; } -/** Match a JSON number: '-', [0-9]. */ -static_inline bool char_is_num(u8 c) { - return char_is_type(c, (char_type)CHAR_TYPE_NUMBER); +/** Get 'infinity' bits with sign. */ +static_inline u64 f64_bits_inf(bool sign) { +#if YYJSON_HAS_IEEE_754 + return F64_BITS_INF | ((u64)sign << 63); +#elif defined(INFINITY) + return f64_to_bits(sign ? -INFINITY : INFINITY); +#else + return f64_to_bits(sign ? -HUGE_VAL : HUGE_VAL); +#endif } -/** Match a JSON container: '{', '['. */ -static_inline bool char_is_container(u8 c) { - return char_is_type(c, (char_type)CHAR_TYPE_CONTAINER); +/** Get 'nan' bits with sign. */ +static_inline u64 f64_bits_nan(bool sign) { +#if YYJSON_HAS_IEEE_754 + return F64_BITS_NAN | ((u64)sign << 63); +#elif defined(NAN) + return f64_to_bits(sign ? (f64)-NAN : (f64)NAN); +#else + return f64_to_bits((sign ? -0.0 : 0.0) / 0.0); +#endif } -/** Match a stop character in ASCII string: '"', '\', [0x00-0x1F,0x80-0xFF]. */ -static_inline bool char_is_ascii_stop(u8 c) { - return char_is_type(c, (char_type)(CHAR_TYPE_ESC_ASCII | - CHAR_TYPE_NON_ASCII)); +/** Casting double to float, allow overflow. */ +#if yyjson_has_attribute(no_sanitize) +__attribute__((no_sanitize("undefined"))) +#elif yyjson_gcc_available(4, 9, 0) +__attribute__((__no_sanitize_undefined__)) +#endif +static_inline f32 f64_to_f32(f64 val) { + return (f32)val; } -/** Match a line end character: '\\n', '\\r', '\0'. */ -static_inline bool char_is_line_end(u8 c) { - return char_is_type(c, (char_type)CHAR_TYPE_LINE_END); +/** Returns the number of leading 0-bits in value (input should not be 0). */ +static_inline u32 u64_lz_bits(u64 v) { +#if GCC_HAS_CLZLL + return (u32)__builtin_clzll(v); +#elif MSC_HAS_BIT_SCAN_64 + unsigned long r; + _BitScanReverse64(&r, v); + return (u32)63 - (u32)r; +#elif MSC_HAS_BIT_SCAN + unsigned long hi, lo; + bool hi_set = _BitScanReverse(&hi, (u32)(v >> 32)) != 0; + _BitScanReverse(&lo, (u32)v); + hi |= 32; + return (u32)63 - (u32)(hi_set ? hi : lo); +#else + /* branchless, use De Bruijn sequence */ + /* see: https://www.chessprogramming.org/BitScan */ + const u8 table[64] = { + 63, 16, 62, 7, 15, 36, 61, 3, 6, 14, 22, 26, 35, 47, 60, 2, + 9, 5, 28, 11, 13, 21, 42, 19, 25, 31, 34, 40, 46, 52, 59, 1, + 17, 8, 37, 4, 23, 27, 48, 10, 29, 12, 43, 20, 32, 41, 53, 18, + 38, 24, 49, 30, 44, 33, 54, 39, 50, 45, 55, 51, 56, 57, 58, 0 + }; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + v |= v >> 32; + return table[(v * U64(0x03F79D71, 0xB4CB0A89)) >> 58]; +#endif } -/** Match a hexadecimal numeric character: [0-9a-fA-F]. */ -static_inline bool char_is_hex(u8 c) { - return char_is_type(c, (char_type)CHAR_TYPE_HEX); +/** Returns the number of trailing 0-bits in value (input should not be 0). */ +static_inline u32 u64_tz_bits(u64 v) { +#if GCC_HAS_CTZLL + return (u32)__builtin_ctzll(v); +#elif MSC_HAS_BIT_SCAN_64 + unsigned long r; + _BitScanForward64(&r, v); + return (u32)r; +#elif MSC_HAS_BIT_SCAN + unsigned long lo, hi; + bool lo_set = _BitScanForward(&lo, (u32)(v)) != 0; + _BitScanForward(&hi, (u32)(v >> 32)); + hi += 32; + return lo_set ? lo : hi; +#else + /* branchless, use De Bruijn sequence */ + /* see: https://www.chessprogramming.org/BitScan */ + const u8 table[64] = { + 0, 1, 2, 53, 3, 7, 54, 27, 4, 38, 41, 8, 34, 55, 48, 28, + 62, 5, 39, 46, 44, 42, 22, 9, 24, 35, 59, 56, 49, 18, 29, 11, + 63, 52, 6, 26, 37, 40, 33, 47, 61, 45, 43, 21, 23, 58, 17, 10, + 51, 25, 36, 32, 60, 20, 57, 16, 50, 31, 19, 15, 30, 14, 13, 12 + }; + return table[((v & (~v + 1)) * U64(0x022FDD63, 0xCC95386D)) >> 58]; +#endif +} + +/** Multiplies two 64-bit unsigned integers (a * b), + returns the 128-bit result as 'hi' and 'lo'. */ +static_inline void u128_mul(u64 a, u64 b, u64 *hi, u64 *lo) { +#if YYJSON_HAS_INT128 + u128 m = (u128)a * b; + *hi = (u64)(m >> 64); + *lo = (u64)(m); +#elif MSC_HAS_UMUL128 + *lo = _umul128(a, b, hi); +#else + u32 a0 = (u32)(a), a1 = (u32)(a >> 32); + u32 b0 = (u32)(b), b1 = (u32)(b >> 32); + u64 p00 = (u64)a0 * b0, p01 = (u64)a0 * b1; + u64 p10 = (u64)a1 * b0, p11 = (u64)a1 * b1; + u64 m0 = p01 + (p00 >> 32); + u32 m00 = (u32)(m0), m01 = (u32)(m0 >> 32); + u64 m1 = p10 + m00; + u32 m10 = (u32)(m1), m11 = (u32)(m1 >> 32); + *hi = p11 + m01 + m11; + *lo = ((u64)m10 << 32) | (u32)p00; +#endif +} + +/** Multiplies two 64-bit unsigned integers and add a value (a * b + c), + returns the 128-bit result as 'hi' and 'lo'. */ +static_inline void u128_mul_add(u64 a, u64 b, u64 c, u64 *hi, u64 *lo) { +#if YYJSON_HAS_INT128 + u128 m = (u128)a * b + c; + *hi = (u64)(m >> 64); + *lo = (u64)(m); +#else + u64 h, l, t; + u128_mul(a, b, &h, &l); + t = l + c; + h += (u64)(((t < l) | (t < c))); + *hi = h; + *lo = t; +#endif } /*============================================================================== - * Digit Character Matcher + * MARK: - File Utils (Private) + * These functions are used to read and write JSON files. *============================================================================*/ -/** Digit type */ -typedef u8 digi_type; +#define YYJSON_FOPEN_E +#if !defined(_MSC_VER) && defined(__GLIBC__) && defined(__GLIBC_PREREQ) +# if __GLIBC_PREREQ(2, 7) +# undef YYJSON_FOPEN_E +# define YYJSON_FOPEN_E "e" /* glibc extension to enable O_CLOEXEC */ +# endif +#endif -/** Digit: '0'. */ -static const digi_type DIGI_TYPE_ZERO = 1 << 0; - -/** Digit: [1-9]. */ -static const digi_type DIGI_TYPE_NONZERO = 1 << 1; - -/** Plus sign (positive): '+'. */ -static const digi_type DIGI_TYPE_POS = 1 << 2; - -/** Minus sign (negative): '-'. */ -static const digi_type DIGI_TYPE_NEG = 1 << 3; - -/** Decimal point: '.' */ -static const digi_type DIGI_TYPE_DOT = 1 << 4; - -/** Exponent sign: 'e, 'E'. */ -static const digi_type DIGI_TYPE_EXP = 1 << 5; - -/** Digit type table (generate with misc/make_tables.c) */ -static const digi_type digi_table[256] = { - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x04, 0x00, 0x08, 0x10, 0x00, - 0x01, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, - 0x02, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 -}; - -/** Match a character with specified type. */ -static_inline bool digi_is_type(u8 d, digi_type type) { - return (digi_table[d] & type) != 0; +static_inline FILE *fopen_safe(const char *path, const char *mode) { +#if YYJSON_MSC_VER >= 1400 + FILE *file = NULL; + if (fopen_s(&file, path, mode) != 0) return NULL; + return file; +#else + return fopen(path, mode); +#endif } -/** Match a sign: '+', '-' */ -static_inline bool digi_is_sign(u8 d) { - return digi_is_type(d, (digi_type)(DIGI_TYPE_POS | DIGI_TYPE_NEG)); +static_inline FILE *fopen_readonly(const char *path) { + return fopen_safe(path, "rb" YYJSON_FOPEN_E); } -/** Match a none zero digit: [1-9] */ -static_inline bool digi_is_nonzero(u8 d) { - return digi_is_type(d, (digi_type)DIGI_TYPE_NONZERO); +static_inline FILE *fopen_writeonly(const char *path) { + return fopen_safe(path, "wb" YYJSON_FOPEN_E); } -/** Match a digit: [0-9] */ -static_inline bool digi_is_digit(u8 d) { - return digi_is_type(d, (digi_type)(DIGI_TYPE_ZERO | DIGI_TYPE_NONZERO)); -} - -/** Match an exponent sign: 'e', 'E'. */ -static_inline bool digi_is_exp(u8 d) { - return digi_is_type(d, (digi_type)DIGI_TYPE_EXP); -} - -/** Match a floating point indicator: '.', 'e', 'E'. */ -static_inline bool digi_is_fp(u8 d) { - return digi_is_type(d, (digi_type)(DIGI_TYPE_DOT | DIGI_TYPE_EXP)); -} - -/** Match a digit or floating point indicator: [0-9], '.', 'e', 'E'. */ -static_inline bool digi_is_digit_or_fp(u8 d) { - return digi_is_type(d, (digi_type)(DIGI_TYPE_ZERO | DIGI_TYPE_NONZERO | - DIGI_TYPE_DOT | DIGI_TYPE_EXP)); -} - - - -#if !YYJSON_DISABLE_READER - -/*============================================================================== - * Hex Character Reader - * This function is used by JSON reader to read escaped characters. - *============================================================================*/ - -/** - This table is used to convert 4 hex character sequence to a number. - A valid hex character [0-9A-Fa-f] will mapped to it's raw number [0x00, 0x0F], - an invalid hex character will mapped to [0xF0]. - (generate with misc/make_tables.c) - */ -static const u8 hex_conv_table[256] = { - 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, - 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, - 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, - 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, - 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, - 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, - 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, - 0x08, 0x09, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, - 0xF0, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0xF0, - 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, - 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, - 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, - 0xF0, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0xF0, - 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, - 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, - 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, - 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, - 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, - 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, - 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, - 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, - 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, - 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, - 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, - 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, - 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, - 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, - 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, - 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, - 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, - 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, - 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0 -}; - -/** - Scans an escaped character sequence as a UTF-16 code unit (branchless). - e.g. "\\u005C" should pass "005C" as `cur`. - - This requires the string has 4-byte zero padding. - */ -static_inline bool read_hex_u16(const u8 *cur, u16 *val) { - u16 c0, c1, c2, c3, t0, t1; - c0 = hex_conv_table[cur[0]]; - c1 = hex_conv_table[cur[1]]; - c2 = hex_conv_table[cur[2]]; - c3 = hex_conv_table[cur[3]]; - t0 = (u16)((c0 << 8) | c2); - t1 = (u16)((c1 << 8) | c3); - *val = (u16)((t0 << 4) | t1); - return ((t0 | t1) & (u16)0xF0F0) == 0; +static_inline usize fread_safe(void *buf, usize size, FILE *file) { +#if YYJSON_MSC_VER >= 1400 + return fread_s(buf, size, 1, size, file); +#else + return fread(buf, 1, size, file); +#endif } /*============================================================================== - * JSON Reader Utils + * MARK: - Size Utils (Private) + * These functions are used for memory allocation. + *============================================================================*/ + +/** Returns whether the size is overflow after increment. */ +static_inline bool size_add_is_overflow(usize size, usize add) { + return size > (size + add); +} + +/** Returns whether the size is power of 2 (size should not be 0). */ +static_inline bool size_is_pow2(usize size) { + return (size & (size - 1)) == 0; +} + +/** Align size upwards (may overflow). */ +static_inline usize size_align_up(usize size, usize align) { + if (size_is_pow2(align)) { + return (size + (align - 1)) & ~(align - 1); + } else { + return size + align - (size + align - 1) % align - 1; + } +} + +/** Align size downwards. */ +static_inline usize size_align_down(usize size, usize align) { + if (size_is_pow2(align)) { + return size & ~(align - 1); + } else { + return size - (size % align); + } +} + +/** Align address upwards (may overflow). */ +static_inline void *mem_align_up(void *mem, usize align) { + usize size; + memcpy(&size, &mem, sizeof(usize)); + size = size_align_up(size, align); + memcpy(&mem, &size, sizeof(usize)); + return mem; +} + + + +/*============================================================================== + * MARK: - Default Memory Allocator (Private) + * This is a simple libc memory allocator wrapper. + *============================================================================*/ + +static void *default_malloc(void *ctx, usize size) { + return malloc(size); +} + +static void *default_realloc(void *ctx, void *ptr, usize old_size, usize size) { + return realloc(ptr, size); +} + +static void default_free(void *ctx, void *ptr) { + free(ptr); +} + +static const yyjson_alc YYJSON_DEFAULT_ALC = { + default_malloc, default_realloc, default_free, NULL +}; + + + +/*============================================================================== + * MARK: - Null Memory Allocator (Private) + * This allocator is just a placeholder to ensure that the internal + * malloc/realloc/free function pointers are not null. + *============================================================================*/ + +static void *null_malloc(void *ctx, usize size) { + return NULL; +} + +static void *null_realloc(void *ctx, void *ptr, usize old_size, usize size) { + return NULL; +} + +static void null_free(void *ctx, void *ptr) { + return; +} + +static const yyjson_alc YYJSON_NULL_ALC = { + null_malloc, null_realloc, null_free, NULL +}; + + + +/*============================================================================== + * MARK: - Pool Memory Allocator (Public) + * This allocator is initialized with a fixed-size buffer. + * The buffer is split into multiple memory chunks for memory allocation. + *============================================================================*/ + +/** memory chunk header */ +typedef struct pool_chunk { + usize size; /* chunk memory size, include chunk header */ + struct pool_chunk *next; /* linked list, nullable */ + /* char mem[]; flexible array member */ +} pool_chunk; + +/** allocator ctx header */ +typedef struct pool_ctx { + usize size; /* total memory size, include ctx header */ + pool_chunk *free_list; /* linked list, nullable */ + /* pool_chunk chunks[]; flexible array member */ +} pool_ctx; + +/** align up the input size to chunk size */ +static_inline void pool_size_align(usize *size) { + *size = size_align_up(*size, sizeof(pool_chunk)) + sizeof(pool_chunk); +} + +static void *pool_malloc(void *ctx_ptr, usize size) { + /* assert(size != 0) */ + pool_ctx *ctx = (pool_ctx *)ctx_ptr; + pool_chunk *next, *prev = NULL, *cur = ctx->free_list; + + if (unlikely(size >= ctx->size)) return NULL; + pool_size_align(&size); + + while (cur) { + if (cur->size < size) { + /* not enough space, try next chunk */ + prev = cur; + cur = cur->next; + continue; + } + if (cur->size >= size + sizeof(pool_chunk) * 2) { + /* too much space, split this chunk */ + next = (pool_chunk *)(void *)((u8 *)cur + size); + next->size = cur->size - size; + next->next = cur->next; + cur->size = size; + } else { + /* just enough space, use whole chunk */ + next = cur->next; + } + if (prev) prev->next = next; + else ctx->free_list = next; + return (void *)(cur + 1); + } + return NULL; +} + +static void pool_free(void *ctx_ptr, void *ptr) { + /* assert(ptr != NULL) */ + pool_ctx *ctx = (pool_ctx *)ctx_ptr; + pool_chunk *cur = ((pool_chunk *)ptr) - 1; + pool_chunk *prev = NULL, *next = ctx->free_list; + + while (next && next < cur) { + prev = next; + next = next->next; + } + if (prev) prev->next = cur; + else ctx->free_list = cur; + cur->next = next; + + if (next && ((u8 *)cur + cur->size) == (u8 *)next) { + /* merge cur to higher chunk */ + cur->size += next->size; + cur->next = next->next; + } + if (prev && ((u8 *)prev + prev->size) == (u8 *)cur) { + /* merge cur to lower chunk */ + prev->size += cur->size; + prev->next = cur->next; + } +} + +static void *pool_realloc(void *ctx_ptr, void *ptr, + usize old_size, usize size) { + /* assert(ptr != NULL && size != 0 && old_size < size) */ + pool_ctx *ctx = (pool_ctx *)ctx_ptr; + pool_chunk *cur = ((pool_chunk *)ptr) - 1, *prev, *next, *tmp; + + /* check size */ + if (unlikely(size >= ctx->size)) return NULL; + pool_size_align(&old_size); + pool_size_align(&size); + if (unlikely(old_size == size)) return ptr; + + /* find next and prev chunk */ + prev = NULL; + next = ctx->free_list; + while (next && next < cur) { + prev = next; + next = next->next; + } + + if ((u8 *)cur + cur->size == (u8 *)next && cur->size + next->size >= size) { + /* merge to higher chunk if they are contiguous */ + usize free_size = cur->size + next->size - size; + if (free_size > sizeof(pool_chunk) * 2) { + tmp = (pool_chunk *)(void *)((u8 *)cur + size); + if (prev) prev->next = tmp; + else ctx->free_list = tmp; + tmp->next = next->next; + tmp->size = free_size; + cur->size = size; + } else { + if (prev) prev->next = next->next; + else ctx->free_list = next->next; + cur->size += next->size; + } + return ptr; + } else { + /* fallback to malloc and memcpy */ + void *new_ptr = pool_malloc(ctx_ptr, size - sizeof(pool_chunk)); + if (new_ptr) { + memcpy(new_ptr, ptr, cur->size - sizeof(pool_chunk)); + pool_free(ctx_ptr, ptr); + } + return new_ptr; + } +} + +bool yyjson_alc_pool_init(yyjson_alc *alc, void *buf, usize size) { + pool_chunk *chunk; + pool_ctx *ctx; + + if (unlikely(!alc)) return false; + *alc = YYJSON_NULL_ALC; + if (size < sizeof(pool_ctx) * 4) return false; + ctx = (pool_ctx *)mem_align_up(buf, sizeof(pool_ctx)); + if (unlikely(!ctx)) return false; + size -= (usize)((u8 *)ctx - (u8 *)buf); + size = size_align_down(size, sizeof(pool_ctx)); + + chunk = (pool_chunk *)(ctx + 1); + chunk->size = size - sizeof(pool_ctx); + chunk->next = NULL; + ctx->size = size; + ctx->free_list = chunk; + + alc->malloc = pool_malloc; + alc->realloc = pool_realloc; + alc->free = pool_free; + alc->ctx = (void *)ctx; + return true; +} + + + +/*============================================================================== + * MARK: - Dynamic Memory Allocator (Public) + * This allocator allocates memory on demand and does not immediately release + * unused memory. Instead, it places the unused memory into a freelist for + * potential reuse in the future. It is only when the entire allocator is + * destroyed that all previously allocated memory is released at once. + *============================================================================*/ + +/** memory chunk header */ +typedef struct dyn_chunk { + usize size; /* chunk size, include header */ + struct dyn_chunk *next; + /* char mem[]; flexible array member */ +} dyn_chunk; + +/** allocator ctx header */ +typedef struct { + dyn_chunk free_list; /* dummy header, sorted from small to large */ + dyn_chunk used_list; /* dummy header */ +} dyn_ctx; + +/** align up the input size to chunk size */ +static_inline bool dyn_size_align(usize *size) { + usize alc_size = *size + sizeof(dyn_chunk); + alc_size = size_align_up(alc_size, YYJSON_ALC_DYN_MIN_SIZE); + if (unlikely(alc_size < *size)) return false; /* overflow */ + *size = alc_size; + return true; +} + +/** remove a chunk from list (the chunk must already be in the list) */ +static_inline void dyn_chunk_list_remove(dyn_chunk *list, dyn_chunk *chunk) { + dyn_chunk *prev = list, *cur; + for (cur = prev->next; cur; cur = cur->next) { + if (cur == chunk) { + prev->next = cur->next; + cur->next = NULL; + return; + } + prev = cur; + } +} + +/** add a chunk to list header (the chunk must not be in the list) */ +static_inline void dyn_chunk_list_add(dyn_chunk *list, dyn_chunk *chunk) { + chunk->next = list->next; + list->next = chunk; +} + +static void *dyn_malloc(void *ctx_ptr, usize size) { + /* assert(size != 0) */ + const yyjson_alc def = YYJSON_DEFAULT_ALC; + dyn_ctx *ctx = (dyn_ctx *)ctx_ptr; + dyn_chunk *chunk, *prev; + if (unlikely(!dyn_size_align(&size))) return NULL; + + /* freelist is empty, create new chunk */ + if (!ctx->free_list.next) { + chunk = (dyn_chunk *)def.malloc(def.ctx, size); + if (unlikely(!chunk)) return NULL; + chunk->size = size; + chunk->next = NULL; + dyn_chunk_list_add(&ctx->used_list, chunk); + return (void *)(chunk + 1); + } + + /* find a large enough chunk, or resize the largest chunk */ + prev = &ctx->free_list; + while (true) { + chunk = prev->next; + if (chunk->size >= size) { /* enough size, reuse this chunk */ + prev->next = chunk->next; + dyn_chunk_list_add(&ctx->used_list, chunk); + return (void *)(chunk + 1); + } + if (!chunk->next) { /* resize the largest chunk */ + chunk = (dyn_chunk *)def.realloc(def.ctx, chunk, chunk->size, size); + if (unlikely(!chunk)) return NULL; + prev->next = NULL; + chunk->size = size; + dyn_chunk_list_add(&ctx->used_list, chunk); + return (void *)(chunk + 1); + } + prev = chunk; + } +} + +static void *dyn_realloc(void *ctx_ptr, void *ptr, + usize old_size, usize size) { + /* assert(ptr != NULL && size != 0 && old_size < size) */ + const yyjson_alc def = YYJSON_DEFAULT_ALC; + dyn_ctx *ctx = (dyn_ctx *)ctx_ptr; + dyn_chunk *new_chunk, *chunk = (dyn_chunk *)ptr - 1; + if (unlikely(!dyn_size_align(&size))) return NULL; + if (chunk->size >= size) return ptr; + + dyn_chunk_list_remove(&ctx->used_list, chunk); + new_chunk = (dyn_chunk *)def.realloc(def.ctx, chunk, chunk->size, size); + if (likely(new_chunk)) { + new_chunk->size = size; + chunk = new_chunk; + } + dyn_chunk_list_add(&ctx->used_list, chunk); + return new_chunk ? (void *)(new_chunk + 1) : NULL; +} + +static void dyn_free(void *ctx_ptr, void *ptr) { + /* assert(ptr != NULL) */ + dyn_ctx *ctx = (dyn_ctx *)ctx_ptr; + dyn_chunk *chunk = (dyn_chunk *)ptr - 1, *prev; + + dyn_chunk_list_remove(&ctx->used_list, chunk); + for (prev = &ctx->free_list; prev; prev = prev->next) { + if (!prev->next || prev->next->size >= chunk->size) { + chunk->next = prev->next; + prev->next = chunk; + break; + } + } +} + +yyjson_alc *yyjson_alc_dyn_new(void) { + const yyjson_alc def = YYJSON_DEFAULT_ALC; + usize hdr_len = sizeof(yyjson_alc) + sizeof(dyn_ctx); + yyjson_alc *alc = (yyjson_alc *)def.malloc(def.ctx, hdr_len); + dyn_ctx *ctx = (dyn_ctx *)(void *)(alc + 1); + if (unlikely(!alc)) return NULL; + alc->malloc = dyn_malloc; + alc->realloc = dyn_realloc; + alc->free = dyn_free; + alc->ctx = alc + 1; + memset(ctx, 0, sizeof(*ctx)); + return alc; +} + +void yyjson_alc_dyn_free(yyjson_alc *alc) { + const yyjson_alc def = YYJSON_DEFAULT_ALC; + dyn_ctx *ctx = (dyn_ctx *)(void *)(alc + 1); + dyn_chunk *chunk, *next; + if (unlikely(!alc)) return; + for (chunk = ctx->free_list.next; chunk; chunk = next) { + next = chunk->next; + def.free(def.ctx, chunk); + } + for (chunk = ctx->used_list.next; chunk; chunk = next) { + next = chunk->next; + def.free(def.ctx, chunk); + } + def.free(def.ctx, alc); +} + + + +/*============================================================================== + * MARK: - JSON Struct Utils (Public) + * These functions are used for creating, copying, releasing, and comparing + * JSON documents and values. They are widely used throughout this library. + *============================================================================*/ + +static_inline void unsafe_yyjson_str_pool_release(yyjson_str_pool *pool, + yyjson_alc *alc) { + yyjson_str_chunk *chunk = pool->chunks, *next; + while (chunk) { + next = chunk->next; + alc->free(alc->ctx, chunk); + chunk = next; + } +} + +static_inline void unsafe_yyjson_val_pool_release(yyjson_val_pool *pool, + yyjson_alc *alc) { + yyjson_val_chunk *chunk = pool->chunks, *next; + while (chunk) { + next = chunk->next; + alc->free(alc->ctx, chunk); + chunk = next; + } +} + +bool unsafe_yyjson_str_pool_grow(yyjson_str_pool *pool, + const yyjson_alc *alc, usize len) { + yyjson_str_chunk *chunk; + usize size, max_len; + + /* create a new chunk */ + max_len = USIZE_MAX - sizeof(yyjson_str_chunk); + if (unlikely(len > max_len)) return false; + size = len + sizeof(yyjson_str_chunk); + size = yyjson_max(pool->chunk_size, size); + chunk = (yyjson_str_chunk *)alc->malloc(alc->ctx, size); + if (unlikely(!chunk)) return false; + + /* insert the new chunk as the head of the linked list */ + chunk->next = pool->chunks; + chunk->chunk_size = size; + pool->chunks = chunk; + pool->cur = (char *)chunk + sizeof(yyjson_str_chunk); + pool->end = (char *)chunk + size; + + /* the next chunk is twice the size of the current one */ + size = yyjson_min(pool->chunk_size * 2, pool->chunk_size_max); + if (size < pool->chunk_size) size = pool->chunk_size_max; /* overflow */ + pool->chunk_size = size; + return true; +} + +bool unsafe_yyjson_val_pool_grow(yyjson_val_pool *pool, + const yyjson_alc *alc, usize count) { + yyjson_val_chunk *chunk; + usize size, max_count; + + /* create a new chunk */ + max_count = USIZE_MAX / sizeof(yyjson_mut_val) - 1; + if (unlikely(count > max_count)) return false; + size = (count + 1) * sizeof(yyjson_mut_val); + size = yyjson_max(pool->chunk_size, size); + chunk = (yyjson_val_chunk *)alc->malloc(alc->ctx, size); + if (unlikely(!chunk)) return false; + + /* insert the new chunk as the head of the linked list */ + chunk->next = pool->chunks; + chunk->chunk_size = size; + pool->chunks = chunk; + pool->cur = (yyjson_mut_val *)(void *)((u8 *)chunk) + 1; + pool->end = (yyjson_mut_val *)(void *)((u8 *)chunk + size); + + /* the next chunk is twice the size of the current one */ + size = yyjson_min(pool->chunk_size * 2, pool->chunk_size_max); + if (size < pool->chunk_size) size = pool->chunk_size_max; /* overflow */ + pool->chunk_size = size; + return true; +} + +bool yyjson_mut_doc_set_str_pool_size(yyjson_mut_doc *doc, size_t len) { + usize max_size = USIZE_MAX - sizeof(yyjson_str_chunk); + if (!doc || !len || len > max_size) return false; + doc->str_pool.chunk_size = len + sizeof(yyjson_str_chunk); + return true; +} + +bool yyjson_mut_doc_set_val_pool_size(yyjson_mut_doc *doc, size_t count) { + usize max_count = USIZE_MAX / sizeof(yyjson_mut_val) - 1; + if (!doc || !count || count > max_count) return false; + doc->val_pool.chunk_size = (count + 1) * sizeof(yyjson_mut_val); + return true; +} + +void yyjson_mut_doc_free(yyjson_mut_doc *doc) { + if (doc) { + yyjson_alc alc = doc->alc; + memset(&doc->alc, 0, sizeof(alc)); + unsafe_yyjson_str_pool_release(&doc->str_pool, &alc); + unsafe_yyjson_val_pool_release(&doc->val_pool, &alc); + alc.free(alc.ctx, doc); + } +} + +yyjson_mut_doc *yyjson_mut_doc_new(const yyjson_alc *alc) { + yyjson_mut_doc *doc; + if (!alc) alc = &YYJSON_DEFAULT_ALC; + doc = (yyjson_mut_doc *)alc->malloc(alc->ctx, sizeof(yyjson_mut_doc)); + if (!doc) return NULL; + memset(doc, 0, sizeof(yyjson_mut_doc)); + + doc->alc = *alc; + doc->str_pool.chunk_size = YYJSON_MUT_DOC_STR_POOL_INIT_SIZE; + doc->str_pool.chunk_size_max = YYJSON_MUT_DOC_STR_POOL_MAX_SIZE; + doc->val_pool.chunk_size = YYJSON_MUT_DOC_VAL_POOL_INIT_SIZE; + doc->val_pool.chunk_size_max = YYJSON_MUT_DOC_VAL_POOL_MAX_SIZE; + return doc; +} + +yyjson_mut_doc *yyjson_doc_mut_copy(yyjson_doc *doc, const yyjson_alc *alc) { + yyjson_mut_doc *m_doc; + yyjson_mut_val *m_val; + + if (!doc || !doc->root) return NULL; + m_doc = yyjson_mut_doc_new(alc); + if (!m_doc) return NULL; + m_val = yyjson_val_mut_copy(m_doc, doc->root); + if (!m_val) { + yyjson_mut_doc_free(m_doc); + return NULL; + } + yyjson_mut_doc_set_root(m_doc, m_val); + return m_doc; +} + +yyjson_mut_doc *yyjson_mut_doc_mut_copy(yyjson_mut_doc *doc, + const yyjson_alc *alc) { + yyjson_mut_doc *m_doc; + yyjson_mut_val *m_val; + + if (!doc) return NULL; + if (!doc->root) return yyjson_mut_doc_new(alc); + + m_doc = yyjson_mut_doc_new(alc); + if (!m_doc) return NULL; + m_val = yyjson_mut_val_mut_copy(m_doc, doc->root); + if (!m_val) { + yyjson_mut_doc_free(m_doc); + return NULL; + } + yyjson_mut_doc_set_root(m_doc, m_val); + return m_doc; +} + +yyjson_mut_val *yyjson_val_mut_copy(yyjson_mut_doc *m_doc, + yyjson_val *i_vals) { + /* + The immutable object or array stores all sub-values in a contiguous memory, + We copy them to another contiguous memory as mutable values, + then reconnect the mutable values with the original relationship. + */ + usize i_vals_len; + yyjson_mut_val *m_vals, *m_val; + yyjson_val *i_val, *i_end; + + if (!m_doc || !i_vals) return NULL; + i_end = unsafe_yyjson_get_next(i_vals); + i_vals_len = (usize)(unsafe_yyjson_get_next(i_vals) - i_vals); + m_vals = unsafe_yyjson_mut_val(m_doc, i_vals_len); + if (!m_vals) return NULL; + i_val = i_vals; + m_val = m_vals; + + for (; i_val < i_end; i_val++, m_val++) { + yyjson_type type = unsafe_yyjson_get_type(i_val); + m_val->tag = i_val->tag; + m_val->uni.u64 = i_val->uni.u64; + if (type == YYJSON_TYPE_STR || type == YYJSON_TYPE_RAW) { + const char *str = i_val->uni.str; + usize str_len = unsafe_yyjson_get_len(i_val); + m_val->uni.str = unsafe_yyjson_mut_strncpy(m_doc, str, str_len); + if (!m_val->uni.str) return NULL; + } else if (type == YYJSON_TYPE_ARR) { + usize len = unsafe_yyjson_get_len(i_val); + if (len > 0) { + yyjson_val *ii_val = i_val + 1, *ii_next; + yyjson_mut_val *mm_val = m_val + 1, *mm_ctn = m_val, *mm_next; + while (len-- > 1) { + ii_next = unsafe_yyjson_get_next(ii_val); + mm_next = mm_val + (ii_next - ii_val); + mm_val->next = mm_next; + ii_val = ii_next; + mm_val = mm_next; + } + mm_val->next = mm_ctn + 1; + mm_ctn->uni.ptr = mm_val; + } + } else if (type == YYJSON_TYPE_OBJ) { + usize len = unsafe_yyjson_get_len(i_val); + if (len > 0) { + yyjson_val *ii_key = i_val + 1, *ii_nextkey; + yyjson_mut_val *mm_key = m_val + 1, *mm_ctn = m_val; + yyjson_mut_val *mm_nextkey; + while (len-- > 1) { + ii_nextkey = unsafe_yyjson_get_next(ii_key + 1); + mm_nextkey = mm_key + (ii_nextkey - ii_key); + mm_key->next = mm_key + 1; + mm_key->next->next = mm_nextkey; + ii_key = ii_nextkey; + mm_key = mm_nextkey; + } + mm_key->next = mm_key + 1; + mm_key->next->next = mm_ctn + 1; + mm_ctn->uni.ptr = mm_key; + } + } + } + return m_vals; +} + +static yyjson_mut_val *unsafe_yyjson_mut_val_mut_copy(yyjson_mut_doc *m_doc, + yyjson_mut_val *m_vals) { + /* + The mutable object or array stores all sub-values in a circular linked + list, so we can traverse them in the same loop. The traversal starts from + the last item, continues with the first item in a list, and ends with the + second to last item, which needs to be linked to the last item to close the + circle. + */ + yyjson_mut_val *m_val = unsafe_yyjson_mut_val(m_doc, 1); + if (unlikely(!m_val)) return NULL; + m_val->tag = m_vals->tag; + + switch (unsafe_yyjson_get_type(m_vals)) { + case YYJSON_TYPE_OBJ: + case YYJSON_TYPE_ARR: + if (unsafe_yyjson_get_len(m_vals) > 0) { + yyjson_mut_val *last = (yyjson_mut_val *)m_vals->uni.ptr; + yyjson_mut_val *next = last->next, *prev; + prev = unsafe_yyjson_mut_val_mut_copy(m_doc, last); + if (!prev) return NULL; + m_val->uni.ptr = (void *)prev; + while (next != last) { + prev->next = unsafe_yyjson_mut_val_mut_copy(m_doc, next); + if (!prev->next) return NULL; + prev = prev->next; + next = next->next; + } + prev->next = (yyjson_mut_val *)m_val->uni.ptr; + } + break; + case YYJSON_TYPE_RAW: + case YYJSON_TYPE_STR: { + const char *str = m_vals->uni.str; + usize str_len = unsafe_yyjson_get_len(m_vals); + m_val->uni.str = unsafe_yyjson_mut_strncpy(m_doc, str, str_len); + if (!m_val->uni.str) return NULL; + break; + } + default: + m_val->uni = m_vals->uni; + break; + } + return m_val; +} + +yyjson_mut_val *yyjson_mut_val_mut_copy(yyjson_mut_doc *doc, + yyjson_mut_val *val) { + if (doc && val) return unsafe_yyjson_mut_val_mut_copy(doc, val); + return NULL; +} + +/* Count the number of values and the total length of the strings. */ +static void yyjson_mut_stat(yyjson_mut_val *val, + usize *val_sum, usize *str_sum) { + yyjson_type type = unsafe_yyjson_get_type(val); + *val_sum += 1; + if (type == YYJSON_TYPE_ARR || type == YYJSON_TYPE_OBJ) { + yyjson_mut_val *child = (yyjson_mut_val *)val->uni.ptr; + usize len = unsafe_yyjson_get_len(val), i; + len <<= (u8)(type == YYJSON_TYPE_OBJ); + *val_sum += len; + for (i = 0; i < len; i++) { + yyjson_type stype = unsafe_yyjson_get_type(child); + if (stype == YYJSON_TYPE_STR || stype == YYJSON_TYPE_RAW) { + *str_sum += unsafe_yyjson_get_len(child) + 1; + } else if (stype == YYJSON_TYPE_ARR || stype == YYJSON_TYPE_OBJ) { + yyjson_mut_stat(child, val_sum, str_sum); + *val_sum -= 1; + } + child = child->next; + } + } else if (type == YYJSON_TYPE_STR || type == YYJSON_TYPE_RAW) { + *str_sum += unsafe_yyjson_get_len(val) + 1; + } +} + +/* Copy mutable values to immutable value pool. */ +static usize yyjson_imut_copy(yyjson_val **val_ptr, char **buf_ptr, + yyjson_mut_val *mval) { + yyjson_val *val = *val_ptr; + yyjson_type type = unsafe_yyjson_get_type(mval); + if (type == YYJSON_TYPE_ARR || type == YYJSON_TYPE_OBJ) { + yyjson_mut_val *child = (yyjson_mut_val *)mval->uni.ptr; + usize len = unsafe_yyjson_get_len(mval), i; + usize val_sum = 1; + if (type == YYJSON_TYPE_OBJ) { + if (len) child = child->next->next; + len <<= 1; + } else { + if (len) child = child->next; + } + *val_ptr = val + 1; + for (i = 0; i < len; i++) { + val_sum += yyjson_imut_copy(val_ptr, buf_ptr, child); + child = child->next; + } + val->tag = mval->tag; + val->uni.ofs = val_sum * sizeof(yyjson_val); + return val_sum; + } else if (type == YYJSON_TYPE_STR || type == YYJSON_TYPE_RAW) { + char *buf = *buf_ptr; + usize len = unsafe_yyjson_get_len(mval); + memcpy((void *)buf, (const void *)mval->uni.str, len); + buf[len] = '\0'; + val->tag = mval->tag; + val->uni.str = buf; + *val_ptr = val + 1; + *buf_ptr = buf + len + 1; + return 1; + } else { + val->tag = mval->tag; + val->uni = mval->uni; + *val_ptr = val + 1; + return 1; + } +} + +yyjson_doc *yyjson_mut_doc_imut_copy(yyjson_mut_doc *mdoc, + const yyjson_alc *alc) { + if (!mdoc) return NULL; + return yyjson_mut_val_imut_copy(mdoc->root, alc); +} + +yyjson_doc *yyjson_mut_val_imut_copy(yyjson_mut_val *mval, + const yyjson_alc *alc) { + usize val_num = 0, str_sum = 0, hdr_size, buf_size; + yyjson_doc *doc = NULL; + yyjson_val *val_hdr = NULL; + + /* This value should be NULL here. Setting a non-null value suppresses + warning from the clang analyzer. */ + char *str_hdr = (char *)(void *)&str_sum; + if (!mval) return NULL; + if (!alc) alc = &YYJSON_DEFAULT_ALC; + + /* traverse the input value to get pool size */ + yyjson_mut_stat(mval, &val_num, &str_sum); + + /* create doc and val pool */ + hdr_size = size_align_up(sizeof(yyjson_doc), sizeof(yyjson_val)); + buf_size = hdr_size + val_num * sizeof(yyjson_val); + doc = (yyjson_doc *)alc->malloc(alc->ctx, buf_size); + if (!doc) return NULL; + memset(doc, 0, sizeof(yyjson_doc)); + val_hdr = (yyjson_val *)(void *)((char *)(void *)doc + hdr_size); + doc->root = val_hdr; + doc->alc = *alc; + + /* create str pool */ + if (str_sum > 0) { + str_hdr = (char *)alc->malloc(alc->ctx, str_sum); + doc->str_pool = str_hdr; + if (!str_hdr) { + alc->free(alc->ctx, (void *)doc); + return NULL; + } + } + + /* copy vals and strs */ + doc->val_read = yyjson_imut_copy(&val_hdr, &str_hdr, mval); + doc->dat_read = str_sum + 1; + return doc; +} + +static_inline bool unsafe_yyjson_num_equals(void *lhs, void *rhs) { + yyjson_val_uni *luni = &((yyjson_val *)lhs)->uni; + yyjson_val_uni *runi = &((yyjson_val *)rhs)->uni; + yyjson_subtype lt = unsafe_yyjson_get_subtype(lhs); + yyjson_subtype rt = unsafe_yyjson_get_subtype(rhs); + if (lt == rt) return luni->u64 == runi->u64; + if (lt == YYJSON_SUBTYPE_SINT && rt == YYJSON_SUBTYPE_UINT) { + return luni->i64 >= 0 && luni->u64 == runi->u64; + } + if (lt == YYJSON_SUBTYPE_UINT && rt == YYJSON_SUBTYPE_SINT) { + return runi->i64 >= 0 && luni->u64 == runi->u64; + } + return false; +} + +static_inline bool unsafe_yyjson_str_equals(void *lhs, void *rhs) { + usize len = unsafe_yyjson_get_len(lhs); + if (len != unsafe_yyjson_get_len(rhs)) return false; + return !memcmp(unsafe_yyjson_get_str(lhs), + unsafe_yyjson_get_str(rhs), len); +} + +bool unsafe_yyjson_equals(yyjson_val *lhs, yyjson_val *rhs) { + yyjson_type type = unsafe_yyjson_get_type(lhs); + if (type != unsafe_yyjson_get_type(rhs)) return false; + + switch (type) { + case YYJSON_TYPE_OBJ: { + usize len = unsafe_yyjson_get_len(lhs); + if (len != unsafe_yyjson_get_len(rhs)) return false; + if (len > 0) { + yyjson_obj_iter iter; + yyjson_obj_iter_init(rhs, &iter); + lhs = unsafe_yyjson_get_first(lhs); + while (len-- > 0) { + rhs = yyjson_obj_iter_getn(&iter, lhs->uni.str, + unsafe_yyjson_get_len(lhs)); + if (!rhs) return false; + if (!unsafe_yyjson_equals(lhs + 1, rhs)) return false; + lhs = unsafe_yyjson_get_next(lhs + 1); + } + } + /* yyjson allows duplicate keys, so the check may be inaccurate */ + return true; + } + + case YYJSON_TYPE_ARR: { + usize len = unsafe_yyjson_get_len(lhs); + if (len != unsafe_yyjson_get_len(rhs)) return false; + if (len > 0) { + lhs = unsafe_yyjson_get_first(lhs); + rhs = unsafe_yyjson_get_first(rhs); + while (len-- > 0) { + if (!unsafe_yyjson_equals(lhs, rhs)) return false; + lhs = unsafe_yyjson_get_next(lhs); + rhs = unsafe_yyjson_get_next(rhs); + } + } + return true; + } + + case YYJSON_TYPE_NUM: + return unsafe_yyjson_num_equals(lhs, rhs); + + case YYJSON_TYPE_RAW: + case YYJSON_TYPE_STR: + return unsafe_yyjson_str_equals(lhs, rhs); + + case YYJSON_TYPE_NULL: + case YYJSON_TYPE_BOOL: + return lhs->tag == rhs->tag; + + default: + return false; + } +} + +bool unsafe_yyjson_mut_equals(yyjson_mut_val *lhs, yyjson_mut_val *rhs) { + yyjson_type type = unsafe_yyjson_get_type(lhs); + if (type != unsafe_yyjson_get_type(rhs)) return false; + + switch (type) { + case YYJSON_TYPE_OBJ: { + usize len = unsafe_yyjson_get_len(lhs); + if (len != unsafe_yyjson_get_len(rhs)) return false; + if (len > 0) { + yyjson_mut_obj_iter iter; + yyjson_mut_obj_iter_init(rhs, &iter); + lhs = (yyjson_mut_val *)lhs->uni.ptr; + while (len-- > 0) { + rhs = yyjson_mut_obj_iter_getn(&iter, lhs->uni.str, + unsafe_yyjson_get_len(lhs)); + if (!rhs) return false; + if (!unsafe_yyjson_mut_equals(lhs->next, rhs)) return false; + lhs = lhs->next->next; + } + } + /* yyjson allows duplicate keys, so the check may be inaccurate */ + return true; + } + + case YYJSON_TYPE_ARR: { + usize len = unsafe_yyjson_get_len(lhs); + if (len != unsafe_yyjson_get_len(rhs)) return false; + if (len > 0) { + lhs = (yyjson_mut_val *)lhs->uni.ptr; + rhs = (yyjson_mut_val *)rhs->uni.ptr; + while (len-- > 0) { + if (!unsafe_yyjson_mut_equals(lhs, rhs)) return false; + lhs = lhs->next; + rhs = rhs->next; + } + } + return true; + } + + case YYJSON_TYPE_NUM: + return unsafe_yyjson_num_equals(lhs, rhs); + + case YYJSON_TYPE_RAW: + case YYJSON_TYPE_STR: + return unsafe_yyjson_str_equals(lhs, rhs); + + case YYJSON_TYPE_NULL: + case YYJSON_TYPE_BOOL: + return lhs->tag == rhs->tag; + + default: + return false; + } +} + +bool yyjson_locate_pos(const char *str, size_t len, size_t pos, + size_t *line, size_t *col, size_t *chr) { + usize line_sum = 0, line_pos = 0, chr_sum = 0; + const u8 *cur = (const u8 *)str; + const u8 *end = cur + pos; + + if (!str || pos > len) { + if (line) *line = 0; + if (col) *col = 0; + if (chr) *chr = 0; + return false; + } + + if (pos >= 3 && is_utf8_bom(cur)) cur += 3; /* don't count BOM */ + while (cur < end) { + u8 c = *cur; + chr_sum += 1; + if (likely(c < 0x80)) { /* 0xxxxxxx (0x00-0x7F) ASCII */ + if (c == '\n') { + line_sum += 1; + line_pos = chr_sum; + } + cur += 1; + } + else if (c < 0xC0) cur += 1; /* 10xxxxxx (0x80-0xBF) Invalid */ + else if (c < 0xE0) cur += 2; /* 110xxxxx (0xC0-0xDF) 2-byte UTF-8 */ + else if (c < 0xF0) cur += 3; /* 1110xxxx (0xE0-0xEF) 3-byte UTF-8 */ + else if (c < 0xF8) cur += 4; /* 11110xxx (0xF0-0xF7) 4-byte UTF-8 */ + else cur += 1; /* 11111xxx (0xF8-0xFF) Invalid */ + } + if (line) *line = line_sum + 1; + if (col) *col = chr_sum - line_pos + 1; + if (chr) *chr = chr_sum; + return true; +} + + + +#if !YYJSON_DISABLE_READER /* reader begin */ + +/* Check read flag, avoids `always false` warning when disabled. */ +#define has_flg(_flg) unlikely(has_rflag(flg, YYJSON_READ_##_flg, 0)) +#define has_allow(_flg) unlikely(has_rflag(flg, YYJSON_READ_ALLOW_##_flg, 1)) +#define YYJSON_READ_ALLOW_TRIVIA (YYJSON_READ_ALLOW_COMMENTS | \ + YYJSON_READ_ALLOW_EXT_WHITESPACE) +static_inline bool has_rflag(yyjson_read_flag flg, yyjson_read_flag chk, + bool non_standard) { +#if YYJSON_DISABLE_NON_STANDARD + if (non_standard) return false; +#endif + return (flg & chk) != 0; +} + + + +/*============================================================================== + * MARK: - JSON Reader Utils (Private) * These functions are used by JSON reader to read literals and comments. *============================================================================*/ -/** Read 'true' literal, '*cur' should be 't'. */ +/** Read `true` literal, `*ptr[0]` should be `t`. */ static_inline bool read_true(u8 **ptr, yyjson_val *val) { u8 *cur = *ptr; if (likely(byte_match_4(cur, "true"))) { @@ -3840,7 +3111,7 @@ static_inline bool read_true(u8 **ptr, yyjson_val *val) { return false; } -/** Read 'false' literal, '*cur' should be 'f'. */ +/** Read `false` literal, `*ptr[0]` should be `f`. */ static_inline bool read_false(u8 **ptr, yyjson_val *val) { u8 *cur = *ptr; if (likely(byte_match_4(cur + 1, "alse"))) { @@ -3851,7 +3122,7 @@ static_inline bool read_false(u8 **ptr, yyjson_val *val) { return false; } -/** Read 'null' literal, '*cur' should be 'n'. */ +/** Read `null` literal, `*ptr[0]` should be `n`. */ static_inline bool read_null(u8 **ptr, yyjson_val *val) { u8 *cur = *ptr; if (likely(byte_match_4(cur, "null"))) { @@ -3862,141 +3133,157 @@ static_inline bool read_null(u8 **ptr, yyjson_val *val) { return false; } -/** Read 'Inf' or 'Infinity' literal (ignoring case). */ -static_inline bool read_inf(bool sign, u8 **ptr, u8 **pre, +/** Read `Inf` or `Infinity` literal (ignoring case). */ +static_inline bool read_inf(u8 **ptr, u8 **pre, yyjson_read_flag flg, yyjson_val *val) { - u8 *hdr = *ptr - sign; + u8 *hdr = *ptr; u8 *cur = *ptr; u8 **end = ptr; - if ((cur[0] == 'I' || cur[0] == 'i') && - (cur[1] == 'N' || cur[1] == 'n') && - (cur[2] == 'F' || cur[2] == 'f')) { - if (cur[3] == 'I' || cur[3] == 'i') { - if ((cur[4] == 'N' || cur[4] == 'n') && - (cur[5] == 'I' || cur[5] == 'i') && - (cur[6] == 'T' || cur[6] == 't') && - (cur[7] == 'Y' || cur[7] == 'y')) { + bool sign = (*cur == '-'); + if (*cur == '+' && !has_allow(EXT_NUMBER)) return false; + cur += char_is_sign(*cur); + if (char_to_lower(cur[0]) == 'i' && + char_to_lower(cur[1]) == 'n' && + char_to_lower(cur[2]) == 'f') { + if (char_to_lower(cur[3]) == 'i') { + if (char_to_lower(cur[4]) == 'n' && + char_to_lower(cur[5]) == 'i' && + char_to_lower(cur[6]) == 't' && + char_to_lower(cur[7]) == 'y') { cur += 8; } else { - /* Don't accept INF as a complete value if it's followed by I. - This is to better support incremental parsing. */ return false; } } else { cur += 3; } *end = cur; - if (has_read_flag(NUMBER_AS_RAW)) { - /* add null-terminator for previous raw string */ - if (*pre) **pre = '\0'; - *pre = cur; + if (has_flg(NUMBER_AS_RAW)) { + **pre = '\0'; /* add null-terminator for previous raw string */ + *pre = cur; /* save end position for current raw string */ val->tag = ((u64)(cur - hdr) << YYJSON_TAG_BIT) | YYJSON_TYPE_RAW; val->uni.str = (const char *)hdr; } else { val->tag = YYJSON_TYPE_NUM | YYJSON_SUBTYPE_REAL; - val->uni.u64 = f64_raw_get_inf(sign); + val->uni.u64 = f64_bits_inf(sign); } return true; } return false; } -/** Read 'NaN' literal (ignoring case). */ -static_inline bool read_nan(bool sign, u8 **ptr, u8 **pre, +/** Read `NaN` literal (ignoring case). */ +static_inline bool read_nan(u8 **ptr, u8 **pre, yyjson_read_flag flg, yyjson_val *val) { - u8 *hdr = *ptr - sign; + u8 *hdr = *ptr; u8 *cur = *ptr; u8 **end = ptr; - if ((cur[0] == 'N' || cur[0] == 'n') && - (cur[1] == 'A' || cur[1] == 'a') && - (cur[2] == 'N' || cur[2] == 'n')) { + bool sign = (*cur == '-'); + if (*cur == '+' && !has_allow(EXT_NUMBER)) return false; + cur += char_is_sign(*cur); + if (char_to_lower(cur[0]) == 'n' && + char_to_lower(cur[1]) == 'a' && + char_to_lower(cur[2]) == 'n') { cur += 3; *end = cur; - if (has_read_flag(NUMBER_AS_RAW)) { - /* add null-terminator for previous raw string */ - if (*pre) **pre = '\0'; - *pre = cur; + if (has_flg(NUMBER_AS_RAW)) { + **pre = '\0'; /* add null-terminator for previous raw string */ + *pre = cur; /* save end position for current raw string */ val->tag = ((u64)(cur - hdr) << YYJSON_TAG_BIT) | YYJSON_TYPE_RAW; val->uni.str = (const char *)hdr; } else { val->tag = YYJSON_TYPE_NUM | YYJSON_SUBTYPE_REAL; - val->uni.u64 = f64_raw_get_nan(sign); + val->uni.u64 = f64_bits_nan(sign); } return true; } return false; } -/** Read 'Inf', 'Infinity' or 'NaN' literal (ignoring case). */ -static_inline bool read_inf_or_nan(bool sign, u8 **ptr, u8 **pre, +/** Read `Inf`, `Infinity` or `NaN` literal (ignoring case). */ +static_inline bool read_inf_or_nan(u8 **ptr, u8 **pre, yyjson_read_flag flg, yyjson_val *val) { - if (read_inf(sign, ptr, pre, flg, val)) return true; - if (read_nan(sign, ptr, pre, flg, val)) return true; + if (read_inf(ptr, pre, flg, val)) return true; + if (read_nan(ptr, pre, flg, val)) return true; return false; } /** Read a JSON number as raw string. */ static_noinline bool read_num_raw(u8 **ptr, u8 **pre, yyjson_read_flag flg, yyjson_val *val, const char **msg) { - #define return_err(_pos, _msg) do { \ - *msg = _msg; \ - *end = _pos; \ - return false; \ + *msg = _msg; *end = _pos; return false; \ } while (false) #define return_raw() do { \ val->tag = ((u64)(cur - hdr) << YYJSON_TAG_BIT) | YYJSON_TYPE_RAW; \ val->uni.str = (const char *)hdr; \ - *pre = cur; *end = cur; return true; \ + **pre = '\0'; *pre = cur; *end = cur; return true; \ } while (false) u8 *hdr = *ptr; u8 *cur = *ptr; u8 **end = ptr; - /* add null-terminator for previous raw string */ - if (*pre) **pre = '\0'; - /* skip sign */ cur += (*cur == '-'); /* read first digit, check leading zero */ - if (unlikely(!digi_is_digit(*cur))) { - if (has_read_flag(ALLOW_INF_AND_NAN)) { - if (read_inf_or_nan(*hdr == '-', &cur, pre, flg, val)) return_raw(); + while (unlikely(!char_is_digit(*cur))) { + if (has_allow(EXT_NUMBER)) { + if (*cur == '+' && cur == hdr) { /* leading `+` sign */ + cur++; + continue; + } + if (*cur == '.' && char_is_digit(cur[1])) { /* e.g. '.123' */ + goto read_double; + } } - return_err(cur, "no digit after minus sign"); + if (has_allow(INF_AND_NAN)) { + if (read_inf_or_nan(ptr, pre, flg, val)) return true; + } + return_err(cur, "no digit after sign"); } /* read integral part */ if (*cur == '0') { cur++; - if (unlikely(digi_is_digit(*cur))) { + if (unlikely(char_is_digit(*cur))) { return_err(cur - 1, "number with leading zero is not allowed"); } - if (!digi_is_fp(*cur)) return_raw(); + if (!char_is_fp(*cur)) { + if (has_allow(EXT_NUMBER) && char_to_lower(*cur) == 'x') { /* hex */ + if (!char_is_hex(*++cur)) return_err(cur, "invalid hex number"); + while(char_is_hex(*cur)) cur++; + } + return_raw(); + } } else { - while (digi_is_digit(*cur)) cur++; - if (!digi_is_fp(*cur)) return_raw(); + while (char_is_digit(*cur)) cur++; + if (!char_is_fp(*cur)) return_raw(); } +read_double: /* read fraction part */ if (*cur == '.') { cur++; - if (!digi_is_digit(*cur++)) { - return_err(cur, "no digit after decimal point"); + if (!char_is_digit(*cur)) { + if (has_allow(EXT_NUMBER)) { + if (!char_is_exp(*cur)) return_raw(); + } else { + return_err(cur, "no digit after decimal point"); + } } - while (digi_is_digit(*cur)) cur++; + while (char_is_digit(*cur)) cur++; } /* read exponent part */ - if (digi_is_exp(*cur)) { - cur += 1 + digi_is_sign(cur[1]); - if (!digi_is_digit(*cur++)) { + if (char_is_exp(*cur)) { + cur += 1 + char_is_sign(cur[1]); + if (!char_is_digit(*cur++)) { return_err(cur, "no digit after exponent sign"); } - while (digi_is_digit(*cur)) cur++; + while (char_is_digit(*cur)) cur++; } return_raw(); @@ -4005,67 +3292,169 @@ static_noinline bool read_num_raw(u8 **ptr, u8 **pre, yyjson_read_flag flg, #undef return_raw } -/** - Skips spaces and comments as many as possible. - - It will return false in these cases: - 1. No character is skipped. The 'end' pointer is set as input cursor. - 2. A multiline comment is not closed. The 'end' pointer is set as the head - of this comment block. - */ -static_noinline bool skip_spaces_and_comments(u8 **ptr) { +/** Read a hex number. */ +static_noinline bool read_num_hex(u8 **ptr, u8 **pre, yyjson_read_flag flg, + yyjson_val *val, const char **msg) { u8 *hdr = *ptr; u8 *cur = *ptr; u8 **end = ptr; - while (true) { - if (byte_match_2(cur, "/*")) { - hdr = cur; - cur += 2; - while (true) { - if (byte_match_2(cur, "*/")) { - cur += 2; - break; - } - if (*cur == 0) { - *end = hdr; - return false; - } - cur++; - } - continue; - } - if (byte_match_2(cur, "//")) { - cur += 2; - while (!char_is_line_end(*cur)) cur++; - continue; - } - if (char_is_space(*cur)) { - cur += 1; - while (char_is_space(*cur)) cur++; - continue; - } - break; + u64 sig = 0, i = 0; + bool sign; + + /* skip sign and '0x' */ + sign = (*cur == '-'); + cur += (*cur == '-' || *cur == '+') + 2; + + /* read hex */ + for(; i < 16; i++) { + u8 c = hex_conv_table[cur[i]]; + if (c == 0xF0) break; + sig <<= 4; + sig |= c; } - *end = cur; - return hdr != cur; + + /* check error */ + if (unlikely(i == 0)) { + *msg = "invalid hex number"; + return false; + } + + /* check overflow */ + if (unlikely(i == 16)) { + if (char_is_hex(cur[16]) || (sign && sig > ((u64)1 << 63))) { + if (!has_flg(BIGNUM_AS_RAW)) { + *msg = "hex number overflow"; + return false; + } + cur += 16; + while (char_is_hex(*cur)) cur++; + **pre = '\0'; + val->tag = ((u64)(cur - hdr) << YYJSON_TAG_BIT) | YYJSON_TYPE_RAW; + val->uni.str = (const char *)hdr; + *pre = cur; *end = cur; + return true; + } + } + + val->tag = YYJSON_TYPE_NUM | (u64)((u8)sign << 3); + val->uni.u64 = (u64)(sign ? (u64)(~(sig) + 1) : (u64)(sig)); + *end = cur + i; + return true; +} + +/** + Skip trivia (whitespace and comments). + This function should be used only when `char_is_trivia()` returns true. + @param ptr (inout) Input current position, output end position. + @param eof JSON end position. + @param flg JSON read flags. + @return true if at least one character was skipped. + false if no characters were skipped, + or if a multi-line comment is unterminated; + in the latter case, `ptr` will be set to `eof`. + */ +static_noinline bool skip_trivia(u8 **ptr, u8 *eof, yyjson_read_flag flg) { + u8 *hdr = *ptr, *cur = *ptr; + usize len; + + while (cur < eof) { + u8 *loop_begin = cur; + + /* skip standard whitespace */ + while(char_is_space(*cur)) cur++; + + /* skip extended whitespace */ + if (has_allow(EXT_WHITESPACE)) { + while (char_is_space_ext(*cur)) { + cur += (len = ext_space_len(cur)); + if (!len) break; + } + } + + /* skip comment, do not validate encoding */ + if (has_allow(COMMENTS) && cur[0] == '/') { + if (cur[1] == '/') { /* single-line comment */ + cur += 2; + if (has_allow(EXT_WHITESPACE)) { + while (cur < eof) { + if (char_is_eol_ext(*cur)) { + cur += (len = ext_eol_len(cur)); + if (len) break; + } + cur++; + } + } else { + while (cur < eof && !char_is_eol(*cur)) cur++; + } + } else if (cur[1] == '*') { /* multi-line comment */ + cur += 2; + while (!byte_match_2(cur, "*/") && cur < eof) cur++; + if (cur == eof) { + *ptr = eof; + return false; /* unclosed comment */ + } + cur += 2; + } + } + if (cur == loop_begin) break; + } + *ptr = cur; + return cur > hdr; +} + +/** + Check truncated UTF-8 character. + Return true if `cur` starts a valid UTF-8 sequence that is truncated. + */ +static bool is_truncated_utf8(u8 *cur, u8 *eof) { + u8 c0, c1, c2; + usize len = (usize)(eof - cur); + if (cur >= eof || len >= 4) return false; + c0 = cur[0]; c1 = cur[1]; c2 = cur[2]; + /* 1-byte UTF-8, not truncated */ + if (c0 < 0x80) return false; + if (len == 1) { + /* 2-byte UTF-8, truncated */ + if ((c0 & 0xE0) == 0xC0 && (c0 & 0x1E) != 0x00) return true; + /* 3-byte UTF-8, truncated */ + if ((c0 & 0xF0) == 0xE0) return true; + /* 4-byte UTF-8, truncated */ + if ((c0 & 0xF8) == 0xF0 && (c0 & 0x07) <= 0x04) return true; + } else if (len == 2) { + /* 3-byte UTF-8, truncated */ + if ((c0 & 0xF0) == 0xE0 && (c1 & 0xC0) == 0x80) { + u8 t = (u8)(((c0 & 0x0F) << 1) | ((c1 & 0x20) >> 5)); + return 0x01 <= t && t != 0x1B; + } + /* 4-byte UTF-8, truncated */ + if ((c0 & 0xF8) == 0xF0 && (c1 & 0xC0) == 0x80) { + u8 t = (u8)(((c0 & 0x07) << 2) | ((c1 & 0x30) >> 4)); + return 0x01 <= t && t <= 0x10; + } + } else if (len == 3) { + /* 4 bytes UTF-8, truncated */ + if ((c0 & 0xF8) == 0xF0 && (c1 & 0xC0) == 0x80 && (c2 & 0xC0) == 0x80) { + u8 t = (u8)(((c0 & 0x07) << 2) | ((c1 & 0x30) >> 4)); + return 0x01 <= t && t <= 0x10; + } + } + return false; } /** Check truncated string. Returns true if `cur` match `str` but is truncated. + The `str` should be lowercase ASCII letters. */ -static_inline bool is_truncated_str(u8 *cur, u8 *end, - const char *str, - bool case_sensitive) { +static bool is_truncated_str(u8 *cur, u8 *eof, const char *str, + bool case_sensitive) { usize len = strlen(str); - if (cur + len <= end || end <= cur) return false; + if (cur + len <= eof || eof <= cur) return false; if (case_sensitive) { - return memcmp(cur, str, (usize)(end - cur)) == 0; + return memcmp(cur, str, (usize)(eof - cur)) == 0; } - for (; cur < end; cur++, str++) { - if ((*cur != (u8)*str) && (*cur != (u8)*str - 'a' + 'A')) { - return false; - } + for (; cur < eof; cur++, str++) { + if (char_to_lower(*cur) != *(const u8 *)str) return false; } return true; } @@ -4074,45 +3463,45 @@ static_inline bool is_truncated_str(u8 *cur, u8 *end, Check truncated JSON on parsing errors. Returns true if the input is valid but truncated. */ -static_noinline bool is_truncated_end(u8 *hdr, u8 *cur, u8 *end, +static_noinline bool is_truncated_end(u8 *hdr, u8 *cur, u8 *eof, yyjson_read_code code, yyjson_read_flag flg) { - if (cur >= end) return true; + if (cur >= eof) return true; if (code == YYJSON_READ_ERROR_LITERAL) { - if (is_truncated_str(cur, end, "true", true) || - is_truncated_str(cur, end, "false", true) || - is_truncated_str(cur, end, "null", true)) { + if (is_truncated_str(cur, eof, "true", true) || + is_truncated_str(cur, eof, "false", true) || + is_truncated_str(cur, eof, "null", true)) { return true; } } if (code == YYJSON_READ_ERROR_UNEXPECTED_CHARACTER || code == YYJSON_READ_ERROR_INVALID_NUMBER || code == YYJSON_READ_ERROR_LITERAL) { - if (has_read_flag(ALLOW_INF_AND_NAN)) { + if (has_allow(INF_AND_NAN)) { if (*cur == '-') cur++; - if (is_truncated_str(cur, end, "infinity", false) || - is_truncated_str(cur, end, "nan", false)) { + if (is_truncated_str(cur, eof, "infinity", false) || + is_truncated_str(cur, eof, "nan", false)) { return true; } } } if (code == YYJSON_READ_ERROR_UNEXPECTED_CONTENT) { - if (has_read_flag(ALLOW_INF_AND_NAN)) { + if (has_allow(INF_AND_NAN)) { if (hdr + 3 <= cur && - is_truncated_str(cur - 3, end, "infinity", false)) { + is_truncated_str(cur - 3, eof, "infinity", false)) { return true; /* e.g. infin would be read as inf + in */ } } } if (code == YYJSON_READ_ERROR_INVALID_STRING) { - usize len = (usize)(end - cur); + usize len = (usize)(eof - cur); /* unicode escape sequence */ if (*cur == '\\') { if (len == 1) return true; if (len <= 5) { if (*++cur != 'u') return false; - for (++cur; cur < end; cur++) { + for (++cur; cur < eof; cur++) { if (!char_is_hex(*cur)) return false; } return true; @@ -4120,77 +3509,46 @@ static_noinline bool is_truncated_end(u8 *hdr, u8 *cur, u8 *end, /* incomplete surrogate pair? */ u16 hi; if (*++cur != 'u') return false; - if (!read_hex_u16(++cur, &hi)) return false; + if (!hex_load_4(++cur, &hi)) return false; if ((hi & 0xF800) != 0xD800) return false; cur += 4; - if (cur >= end) return true; + if (cur >= eof) return true; /* valid low surrogate is DC00...DFFF */ if (*cur != '\\') return false; - if (++cur >= end) return true; + if (++cur >= eof) return true; if (*cur != 'u') return false; - if (++cur >= end) return true; + if (++cur >= eof) return true; if (*cur != 'd' && *cur != 'D') return false; - if (++cur >= end) return true; + if (++cur >= eof) return true; if ((*cur < 'c' || *cur > 'f') && (*cur < 'C' || *cur > 'F')) return false; - if (++cur >= end) return true; + if (++cur >= eof) return true; if (!char_is_hex(*cur)) return false; return true; } return false; } - /* 2 to 4 bytes UTF-8, see `read_str()` for details. */ - if (*cur & 0x80) { - u8 c0 = cur[0], c1 = cur[1], c2 = cur[2]; - if (len == 1) { - /* 2 bytes UTF-8, truncated */ - if ((c0 & 0xE0) == 0xC0 && (c0 & 0x1E) != 0x00) return true; - /* 3 bytes UTF-8, truncated */ - if ((c0 & 0xF0) == 0xE0) return true; - /* 4 bytes UTF-8, truncated */ - if ((c0 & 0xF8) == 0xF0 && (c0 & 0x07) <= 0x04) return true; - } - if (len == 2) { - /* 3 bytes UTF-8, truncated */ - if ((c0 & 0xF0) == 0xE0 && - (c1 & 0xC0) == 0x80) { - u8 pat = (u8)(((c0 & 0x0F) << 1) | ((c1 & 0x20) >> 5)); - return 0x01 <= pat && pat != 0x1B; - } - /* 4 bytes UTF-8, truncated */ - if ((c0 & 0xF8) == 0xF0 && - (c1 & 0xC0) == 0x80) { - u8 pat = (u8)(((c0 & 0x07) << 2) | ((c1 & 0x30) >> 4)); - return 0x01 <= pat && pat <= 0x10; - } - } - if (len == 3) { - /* 4 bytes UTF-8, truncated */ - if ((c0 & 0xF8) == 0xF0 && - (c1 & 0xC0) == 0x80 && - (c2 & 0xC0) == 0x80) { - u8 pat = (u8)(((c0 & 0x07) << 2) | ((c1 & 0x30) >> 4)); - return 0x01 <= pat && pat <= 0x10; - } - } + /* 2 to 4 bytes UTF-8 */ + if (is_truncated_utf8(cur, eof)) { + return true; } } - if (has_read_flag(ALLOW_COMMENTS)) { + if (has_allow(COMMENTS)) { if (code == YYJSON_READ_ERROR_INVALID_COMMENT) { /* unclosed multiline comment */ return true; } if (code == YYJSON_READ_ERROR_UNEXPECTED_CHARACTER && - *cur == '/' && cur + 1 == end) { + *cur == '/' && cur + 1 == eof) { /* truncated beginning of comment */ return true; } } if (code == YYJSON_READ_ERROR_UNEXPECTED_CHARACTER && - has_read_flag(ALLOW_BOM)) { + has_allow(BOM)) { /* truncated UTF-8 BOM */ - usize len = (usize)(end - cur); + usize len = (usize)(eof - cur); if (cur == hdr && len < 3 && !memcmp(hdr, "\xEF\xBB\xBF", len)) { return true; } @@ -4200,40 +3558,20 @@ static_noinline bool is_truncated_end(u8 *hdr, u8 *cur, u8 *end, -#if YYJSON_HAS_IEEE_754 && !YYJSON_DISABLE_FAST_FP_CONV /* FP_READER */ +#if !YYJSON_DISABLE_FAST_FP_CONV /* FP_READER */ /*============================================================================== - * BigInt For Floating Point Number Reader + * MARK: - BigInt For Floating Point Number Reader (Private) * * The bigint algorithm is used by floating-point number reader to get correctly * rounded result for numbers with lots of digits. This part of code is rarely * used for common numbers. *============================================================================*/ -/** Maximum exponent of exact pow10 */ -#define U64_POW10_MAX_EXP 19 - -/** Table: [ 10^0, ..., 10^19 ] (generate with misc/make_tables.c) */ -static const u64 u64_pow10_table[U64_POW10_MAX_EXP + 1] = { - U64(0x00000000, 0x00000001), U64(0x00000000, 0x0000000A), - U64(0x00000000, 0x00000064), U64(0x00000000, 0x000003E8), - U64(0x00000000, 0x00002710), U64(0x00000000, 0x000186A0), - U64(0x00000000, 0x000F4240), U64(0x00000000, 0x00989680), - U64(0x00000000, 0x05F5E100), U64(0x00000000, 0x3B9ACA00), - U64(0x00000002, 0x540BE400), U64(0x00000017, 0x4876E800), - U64(0x000000E8, 0xD4A51000), U64(0x00000918, 0x4E72A000), - U64(0x00005AF3, 0x107A4000), U64(0x00038D7E, 0xA4C68000), - U64(0x002386F2, 0x6FC10000), U64(0x01634578, 0x5D8A0000), - U64(0x0DE0B6B3, 0xA7640000), U64(0x8AC72304, 0x89E80000) -}; - -/** Maximum numbers of chunks used by a bigint (58 is enough here). */ -#define BIGINT_MAX_CHUNKS 64 - /** Unsigned arbitrarily large integer */ typedef struct bigint { u32 used; /* used chunks count, should not be 0 */ - u64 bits[BIGINT_MAX_CHUNKS]; /* chunks */ + u64 bits[64]; /* chunks (58 is enough here) */ } bigint; /** @@ -4310,8 +3648,8 @@ static_inline void bigint_mul_pow2(bigint *big, u32 exp) { @param exp An exponent integer (cannot be 0). */ static_inline void bigint_mul_pow10(bigint *big, i32 exp) { - for (; exp >= U64_POW10_MAX_EXP; exp -= U64_POW10_MAX_EXP) { - bigint_mul_u64(big, u64_pow10_table[U64_POW10_MAX_EXP]); + for (; exp >= U64_POW10_MAX_EXACT_EXP; exp -= U64_POW10_MAX_EXACT_EXP) { + bigint_mul_u64(big, u64_pow10_table[U64_POW10_MAX_EXACT_EXP]); } if (exp) { bigint_mul_u64(big, u64_pow10_table[exp]); @@ -4400,7 +3738,7 @@ static_noinline void bigint_set_buf(bigint *big, u64 sig, i32 *exp, /*============================================================================== - * Diy Floating Point + * MARK: - Diy Floating Point (Private) *============================================================================*/ /** "Do It Yourself Floating Point" struct. */ @@ -4446,7 +3784,7 @@ static_inline u64 diy_fp_to_ieee_raw(diy_fp fp) { if (unlikely(exp >= F64_MAX_BIN_EXP)) { /* overflow */ - return F64_RAW_INF; + return F64_BITS_INF; } else if (likely(exp >= F64_MIN_BIN_EXP - 1)) { /* normal */ exp += F64_EXP_BIAS; @@ -4463,20 +3801,9 @@ static_inline u64 diy_fp_to_ieee_raw(diy_fp fp) { /*============================================================================== - * JSON Number Reader (IEEE-754) + * MARK: - Number Reader (Private) *============================================================================*/ -/** Maximum exact pow10 exponent for double value. */ -#define F64_POW10_EXP_MAX_EXACT 22 - -#if YYJSON_DOUBLE_MATH_CORRECT -/** Cached pow10 table. */ -static const f64 f64_pow10_table[] = { - 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 1e11, 1e12, - 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, 1e20, 1e21, 1e22 -}; -#endif - /** Read a JSON number. @@ -4488,7 +3815,6 @@ static const f64 f64_pow10_table[] = { */ static_inline bool read_num(u8 **ptr, u8 **pre, yyjson_read_flag flg, yyjson_val *val, const char **msg) { - #define return_err(_pos, _msg) do { \ *msg = _msg; \ *end = _pos; \ @@ -4520,13 +3846,13 @@ static_inline bool read_num(u8 **ptr, u8 **pre, yyjson_read_flag flg, } while (false) #define return_inf() do { \ - if (has_read_flag(BIGNUM_AS_RAW)) return_raw(); \ - if (has_read_flag(ALLOW_INF_AND_NAN)) return_f64_bin(F64_RAW_INF); \ + if (has_flg(BIGNUM_AS_RAW)) return_raw(); \ + if (has_allow(INF_AND_NAN)) return_f64_bin(F64_BITS_INF); \ else return_err(hdr, "number is infinity when parsed as double"); \ } while (false) #define return_raw() do { \ - if (*pre) **pre = '\0'; /* add null-terminator for previous raw string */ \ + **pre = '\0'; /* add null-terminator for previous raw string */ \ val->tag = ((u64)(cur - hdr) << YYJSON_TAG_BIT) | YYJSON_TYPE_RAW; \ val->uni.str = (const char *)hdr; \ *pre = cur; *end = cur; return true; \ @@ -4551,7 +3877,7 @@ static_inline bool read_num(u8 **ptr, u8 **pre, yyjson_read_flag flg, bool sign; /* read number as raw string if has `YYJSON_READ_NUMBER_AS_RAW` flag */ - if (has_read_flag(NUMBER_AS_RAW)) { + if (has_flg(NUMBER_AS_RAW)) { return read_num_raw(ptr, pre, flg, val, msg); } @@ -4559,40 +3885,59 @@ static_inline bool read_num(u8 **ptr, u8 **pre, yyjson_read_flag flg, cur += sign; /* begin with a leading zero or non-digit */ - if (unlikely(!digi_is_nonzero(*cur))) { /* 0 or non-digit char */ + while (unlikely(!char_is_nonzero(*cur))) { /* 0 or non-digit char */ if (unlikely(*cur != '0')) { /* non-digit char */ - if (has_read_flag(ALLOW_INF_AND_NAN)) { - if (read_inf_or_nan(sign, &cur, pre, flg, val)) { - *end = cur; - return true; + if (has_allow(EXT_NUMBER)) { + if (*cur == '+' && cur == hdr) { /* leading `+` sign */ + cur++; + continue; + } + if (*cur == '.' && char_is_digit(cur[1])) { /* e.g. '.123' */ + goto leading_dot; } } - return_err(cur, "no digit after minus sign"); + if (has_allow(INF_AND_NAN)) { + if (read_inf_or_nan(ptr, pre, flg, val)) return true; + } + return_err(cur, "no digit after sign"); } /* begin with 0 */ - if (likely(!digi_is_digit_or_fp(*++cur))) return_0(); + if (likely(!char_is_digit_or_fp(*++cur))) { + if (has_allow(EXT_NUMBER) && char_to_lower(*cur) == 'x') { /* hex */ + return read_num_hex(ptr, pre, flg, val, msg); + } + return_0(); + } if (likely(*cur == '.')) { +leading_dot: dot_pos = cur++; - if (unlikely(!digi_is_digit(*cur))) { + if (unlikely(!char_is_digit(*cur))) { + if (has_allow(EXT_NUMBER)) { + if (char_is_exp(*cur)) { + goto digi_exp_more; + } else { + return_f64_bin(0); + } + } return_err(cur, "no digit after decimal point"); } while (unlikely(*cur == '0')) cur++; - if (likely(digi_is_digit(*cur))) { + if (likely(char_is_digit(*cur))) { /* first non-zero digit after decimal point */ sig = (u64)(*cur - '0'); /* read first digit */ cur--; goto digi_frac_1; /* continue read fraction part */ } } - if (unlikely(digi_is_digit(*cur))) { + if (unlikely(char_is_digit(*cur))) { return_err(cur - 1, "number with leading zero is not allowed"); } - if (unlikely(digi_is_exp(*cur))) { /* 0 with any exponent is still 0 */ - cur += (usize)1 + digi_is_sign(cur[1]); - if (unlikely(!digi_is_digit(*cur))) { + if (unlikely(char_is_exp(*cur))) { /* 0 with any exponent is still 0 */ + cur += (usize)1 + char_is_sign(cur[1]); + if (unlikely(!char_is_digit(*cur))) { return_err(cur, "no digit after exponent sign"); } - while (digi_is_digit(*++cur)); + while (char_is_digit(*++cur)); } return_f64_bin(0); } @@ -4617,10 +3962,10 @@ static_inline bool read_num(u8 **ptr, u8 **pre, yyjson_read_flag flg, cur += 19; /* skip continuous 19 digits */ - if (!digi_is_digit_or_fp(*cur)) { + if (!char_is_digit_or_fp(*cur)) { /* this number is an integer consisting of 19 digits */ if (sign && (sig > ((u64)1 << 63))) { /* overflow */ - if (has_read_flag(BIGNUM_AS_RAW)) return_raw(); + if (has_flg(BIGNUM_AS_RAW)) return_raw(); return_f64(unsafe_yyjson_u64_to_f64(sig)); } return_i64(sig); @@ -4631,7 +3976,7 @@ static_inline bool read_num(u8 **ptr, u8 **pre, yyjson_read_flag flg, /* process first non-digit character */ #define expr_sepr(i) \ digi_sepr_##i: \ - if (likely(!digi_is_fp(cur[i]))) { cur += i; return_i64(sig); } \ + if (likely(!char_is_fp(cur[i]))) { cur += i; return_i64(sig); } \ dot_pos = cur + i; \ if (likely(cur[i] == '.')) goto digi_frac_##i; \ cur += i; sig_end = cur; goto digi_exp_more; @@ -4649,7 +3994,7 @@ static_inline bool read_num(u8 **ptr, u8 **pre, yyjson_read_flag flg, #undef expr_frac cur += 20; /* skip 19 digits and 1 decimal point */ - if (!digi_is_digit(*cur)) goto digi_frac_end; /* fraction part end */ + if (!char_is_digit(*cur)) goto digi_frac_end; /* fraction part end */ goto digi_frac_more; /* read more digits in fraction part */ @@ -4664,8 +4009,8 @@ static_inline bool read_num(u8 **ptr, u8 **pre, yyjson_read_flag flg, /* read more digits in integral part */ digi_intg_more: - if (digi_is_digit(*cur)) { - if (!digi_is_digit_or_fp(cur[1])) { + if (char_is_digit(*cur)) { + if (!char_is_digit_or_fp(cur[1])) { /* this number is an integer consisting of 20 digits */ num = (u64)(*cur - '0'); if ((sig < (U64_MAX / 10)) || @@ -4674,7 +4019,7 @@ digi_intg_more: cur++; /* convert to double if overflow */ if (sign) { - if (has_read_flag(BIGNUM_AS_RAW)) return_raw(); + if (has_flg(BIGNUM_AS_RAW)) return_raw(); return_f64(unsafe_yyjson_u64_to_f64(sig)); } return_i64(sig); @@ -4682,14 +4027,17 @@ digi_intg_more: } } - if (digi_is_exp(*cur)) { + if (char_is_exp(*cur)) { dot_pos = cur; goto digi_exp_more; } if (*cur == '.') { dot_pos = cur++; - if (!digi_is_digit(*cur)) { + if (unlikely(!char_is_digit(*cur))) { + if (has_allow(EXT_NUMBER)) { + goto digi_frac_end; + } return_err(cur, "no digit after decimal point"); } } @@ -4699,17 +4047,19 @@ digi_intg_more: digi_frac_more: sig_cut = cur; /* too large to fit in u64, excess digits need to be cut */ sig += (*cur >= '5'); /* round */ - while (digi_is_digit(*++cur)); + while (char_is_digit(*++cur)); if (!dot_pos) { - if (!digi_is_fp(*cur) && has_read_flag(BIGNUM_AS_RAW)) { + if (!char_is_fp(*cur) && has_flg(BIGNUM_AS_RAW)) { return_raw(); /* it's a large integer */ } dot_pos = cur; if (*cur == '.') { - if (!digi_is_digit(*++cur)) { - return_err(cur, "no digit after decimal point"); + if (unlikely(!char_is_digit(*++cur))) { + if (!has_allow(EXT_NUMBER)) { + return_err(cur, "no digit after decimal point"); + } } - while (digi_is_digit(*cur)) cur++; + while (char_is_digit(*cur)) cur++; } } exp_sig = (i64)(dot_pos - sig_cut); @@ -4717,25 +4067,27 @@ digi_frac_more: /* ignore trailing zeros */ tmp = cur - 1; - while (*tmp == '0' || *tmp == '.') tmp--; + while ((*tmp == '0' || *tmp == '.') && tmp > hdr) tmp--; if (tmp < sig_cut) { sig_cut = NULL; } else { sig_end = cur; } - if (digi_is_exp(*cur)) goto digi_exp_more; + if (char_is_exp(*cur)) goto digi_exp_more; goto digi_exp_finish; /* fraction part end */ digi_frac_end: if (unlikely(dot_pos + 1 == cur)) { - return_err(cur, "no digit after decimal point"); + if (!has_allow(EXT_NUMBER)) { + return_err(cur, "no digit after decimal point"); + } } sig_end = cur; exp_sig = -(i64)((u64)(cur - dot_pos) - 1); - if (likely(!digi_is_exp(*cur))) { + if (likely(!char_is_exp(*cur))) { if (unlikely(exp_sig < F64_MIN_DEC_EXP - 19)) { return_f64_bin(0); /* underflow */ } @@ -4749,15 +4101,15 @@ digi_frac_end: /* read exponent part */ digi_exp_more: exp_sign = (*++cur == '-'); - cur += digi_is_sign(*cur); - if (unlikely(!digi_is_digit(*cur))) { + cur += char_is_sign(*cur); + if (unlikely(!char_is_digit(*cur))) { return_err(cur, "no digit after exponent sign"); } while (*cur == '0') cur++; /* read exponent literal */ tmp = cur; - while (digi_is_digit(*cur)) { + while (char_is_digit(*cur)) { exp_lit = (i64)((u8)(*cur++ - '0') + (u64)exp_lit * 10); } if (unlikely(cur - tmp >= U64_SAFE_DIG)) { @@ -4798,8 +4150,8 @@ digi_finish: */ #if YYJSON_DOUBLE_MATH_CORRECT if (sig < ((u64)1 << 53) && - exp >= -F64_POW10_EXP_MAX_EXACT && - exp <= +F64_POW10_EXP_MAX_EXACT) { + exp >= -F64_POW10_MAX_EXACT_EXP && + exp <= +F64_POW10_MAX_EXACT_EXP) { f64 dbl = (f64)sig; if (exp < 0) { dbl /= f64_pow10_table[-exp]; @@ -5031,7 +4383,7 @@ digi_finish: /* get IEEE double raw value */ raw = diy_fp_to_ieee_raw(fp); - if (unlikely(raw == F64_RAW_INF)) return_inf(); + if (unlikely(raw == F64_BITS_INF)) return_inf(); if (likely(precision_bits <= half_way - fp_err || precision_bits >= half_way + fp_err)) { return_f64_bin(raw); /* number is accurate */ @@ -5073,7 +4425,7 @@ digi_finish: raw += (raw & 1); } - if (unlikely(raw == F64_RAW_INF)) return_inf(); + if (unlikely(raw == F64_BITS_INF)) return_inf(); return_f64_bin(raw); } @@ -5097,7 +4449,6 @@ digi_finish: */ static_inline bool read_num(u8 **ptr, u8 **pre, yyjson_read_flag flg, yyjson_val *val, const char **msg) { - #define return_err(_pos, _msg) do { \ *msg = _msg; \ *end = _pos; \ @@ -5129,16 +4480,15 @@ static_inline bool read_num(u8 **ptr, u8 **pre, yyjson_read_flag flg, } while (false) #define return_inf() do { \ - if (has_read_flag(BIGNUM_AS_RAW)) return_raw(); \ - if (has_read_flag(ALLOW_INF_AND_NAN)) return_f64_bin(F64_RAW_INF); \ + if (has_flg(BIGNUM_AS_RAW)) return_raw(); \ + if (has_allow(INF_AND_NAN)) return_f64_bin(F64_BITS_INF); \ else return_err(hdr, "number is infinity when parsed as double"); \ } while (false) #define return_raw() do { \ - if (*pre) **pre = '\0'; /* add null-terminator for previous raw string */ \ val->tag = ((u64)(cur - hdr) << YYJSON_TAG_BIT) | YYJSON_TYPE_RAW; \ val->uni.str = (const char *)hdr; \ - *pre = cur; *end = cur; return true; \ + **pre = '\0'; *pre = cur; *end = cur; return true; \ } while (false) u64 sig, num; @@ -5150,7 +4500,7 @@ static_inline bool read_num(u8 **ptr, u8 **pre, yyjson_read_flag flg, bool sign; /* read number as raw string if has `YYJSON_READ_NUMBER_AS_RAW` flag */ - if (has_read_flag(NUMBER_AS_RAW)) { + if (has_flg(NUMBER_AS_RAW)) { return read_num_raw(ptr, pre, flg, val, msg); } @@ -5159,21 +4509,34 @@ static_inline bool read_num(u8 **ptr, u8 **pre, yyjson_read_flag flg, sig = (u8)(*cur - '0'); /* read first digit, check leading zero */ - if (unlikely(!digi_is_digit(*cur))) { - if (has_read_flag(ALLOW_INF_AND_NAN)) { - if (read_inf_or_nan(sign, &cur, pre, flg, val)) { - *end = cur; - return true; + while (unlikely(!char_is_digit(*cur))) { + if (has_allow(EXT_NUMBER)) { + if (*cur == '+' && cur == hdr) { /* leading `+` sign */ + cur++; + sig = (u8)(*cur - '0'); + continue; + } + if (*cur == '.' && char_is_num(cur[1])) { /* no integer part */ + goto read_double; /* e.g. '.123' */ } } - return_err(cur, "no digit after minus sign"); + if (has_allow(INF_AND_NAN)) { + if (read_inf_or_nan(ptr, pre, flg, val)) return true; + } + return_err(cur, "no digit after sign"); } if (*cur == '0') { cur++; - if (unlikely(digi_is_digit(*cur))) { + if (unlikely(char_is_digit(*cur))) { return_err(cur - 1, "number with leading zero is not allowed"); } - if (!digi_is_fp(*cur)) return_0(); + if (!char_is_fp(*cur)) { + if (has_allow(EXT_NUMBER) && + (*cur == 'x' || *cur == 'X')) { /* hex integer */ + return read_num_hex(ptr, pre, flg, val, msg); + } + return_0(); + } goto read_double; } @@ -5186,7 +4549,7 @@ static_inline bool read_num(u8 **ptr, u8 **pre, yyjson_read_flag flg, /* here are 19 continuous digits, skip them */ cur += 19; - if (digi_is_digit(cur[0]) && !digi_is_digit_or_fp(cur[1])) { + if (char_is_digit(cur[0]) && !char_is_digit_or_fp(cur[1])) { /* this number is an integer consisting of 20 digits */ num = (u8)(*cur - '0'); if ((sig < (U64_MAX / 10)) || @@ -5194,7 +4557,7 @@ static_inline bool read_num(u8 **ptr, u8 **pre, yyjson_read_flag flg, sig = num + sig * 10; cur++; if (sign) { - if (has_read_flag(BIGNUM_AS_RAW)) return_raw(); + if (has_flg(BIGNUM_AS_RAW)) return_raw(); return_f64(unsafe_yyjson_u64_to_f64(sig)); } return_i64(sig); @@ -5203,10 +4566,10 @@ static_inline bool read_num(u8 **ptr, u8 **pre, yyjson_read_flag flg, intg_end: /* continuous digits ended */ - if (!digi_is_digit_or_fp(*cur)) { + if (!char_is_digit_or_fp(*cur)) { /* this number is an integer consisting of 1 to 19 digits */ if (sign && (sig > ((u64)1 << 63))) { - if (has_read_flag(BIGNUM_AS_RAW)) return_raw(); + if (has_flg(BIGNUM_AS_RAW)) return_raw(); return_f64(unsafe_yyjson_u64_to_f64(sig)); } return_i64(sig); @@ -5214,28 +4577,33 @@ intg_end: read_double: /* this number should be read as double */ - while (digi_is_digit(*cur)) cur++; - if (!digi_is_fp(*cur) && has_read_flag(BIGNUM_AS_RAW)) { + while (char_is_digit(*cur)) cur++; + if (!char_is_fp(*cur) && has_flg(BIGNUM_AS_RAW)) { return_raw(); /* it's a large integer */ } - if (*cur == '.') { + while (*cur == '.') { /* skip fraction part */ dot = cur; cur++; - if (!digi_is_digit(*cur)) { - return_err(cur, "no digit after decimal point"); + if (!char_is_digit(*cur)) { + if (has_allow(EXT_NUMBER)) { + break; + } else { + return_err(cur, "no digit after decimal point"); + } } cur++; - while (digi_is_digit(*cur)) cur++; + while (char_is_digit(*cur)) cur++; + break; } - if (digi_is_exp(*cur)) { + if (char_is_exp(*cur)) { /* skip exponent part */ - cur += 1 + digi_is_sign(cur[1]); - if (!digi_is_digit(*cur)) { + cur += 1 + char_is_sign(cur[1]); + if (!char_is_digit(*cur)) { return_err(cur, "no digit after exponent sign"); } cur++; - while (digi_is_digit(*cur)) cur++; + while (char_is_digit(*cur)) cur++; } /* @@ -5285,147 +4653,85 @@ read_double: /*============================================================================== - * JSON String Reader + * MARK: - String Reader (Private) *============================================================================*/ +/** Read unicode escape sequence. */ +static_inline bool read_uni_esc(u8 **src_ptr, u8 **dst_ptr, const char **msg) { +#define return_err(_end, _msg) *msg = _msg; *src_ptr = _end; return false + + u8 *src = *src_ptr; + u8 *dst = *dst_ptr; + u16 hi, lo; + u32 uni; + + src += 2; /* skip `\u` */ + if (unlikely(!hex_load_4(src, &hi))) { + return_err(src - 2, "invalid escaped sequence in string"); + } + src += 4; /* skip hex */ + if (likely((hi & 0xF800) != 0xD800)) { + /* a BMP character */ + if (hi >= 0x800) { + *dst++ = (u8)(0xE0 | (hi >> 12)); + *dst++ = (u8)(0x80 | ((hi >> 6) & 0x3F)); + *dst++ = (u8)(0x80 | (hi & 0x3F)); + } else if (hi >= 0x80) { + *dst++ = (u8)(0xC0 | (hi >> 6)); + *dst++ = (u8)(0x80 | (hi & 0x3F)); + } else { + *dst++ = (u8)hi; + } + } else { + /* a non-BMP character, represented as a surrogate pair */ + if (unlikely((hi & 0xFC00) != 0xD800)) { + return_err(src - 6, "invalid high surrogate in string"); + } + if (unlikely(!byte_match_2(src, "\\u"))) { + return_err(src - 6, "no low surrogate in string"); + } + if (unlikely(!hex_load_4(src + 2, &lo))) { + return_err(src - 6, "invalid escape in string"); + } + if (unlikely((lo & 0xFC00) != 0xDC00)) { + return_err(src - 6, "invalid low surrogate in string"); + } + uni = ((((u32)hi - 0xD800) << 10) | + ((u32)lo - 0xDC00)) + 0x10000; + *dst++ = (u8)(0xF0 | (uni >> 18)); + *dst++ = (u8)(0x80 | ((uni >> 12) & 0x3F)); + *dst++ = (u8)(0x80 | ((uni >> 6) & 0x3F)); + *dst++ = (u8)(0x80 | (uni & 0x3F)); + src += 6; + } + *src_ptr = src; + *dst_ptr = dst; + return true; +#undef return_err +} + /** Read a JSON string. - @param ptr The head pointer of string before '"' prefix (inout). - @param lst JSON last position. - @param inv Allow invalid unicode. + @param quo The quote character (single quote or double quote). + @param ptr The head pointer of string before quote (inout). + @param eof JSON end position. + @param flg JSON read flag. @param val The string value to be written. @param msg The error message pointer. @param con Continuation for incremental parsing. @return Whether success. */ -static_inline bool read_str(u8 **ptr, u8 *lst, bool inv, yyjson_val *val, - const char **msg, u8 **con) { +static_inline bool read_str_opt(u8 quo, u8 **ptr, u8 *eof, yyjson_read_flag flg, + yyjson_val *val, const char **msg, u8 *con[2]) { /* - Each unicode code point is encoded as 1 to 4 bytes in UTF-8 encoding, - we use 4-byte mask and pattern value to validate UTF-8 byte sequence, - this requires the input data to have 4-byte zero padding. - --------------------------------------------------- - 1 byte - unicode range [U+0000, U+007F] - unicode min [.......0] - unicode max [.1111111] - bit pattern [0.......] - --------------------------------------------------- - 2 byte - unicode range [U+0080, U+07FF] - unicode min [......10 ..000000] - unicode max [...11111 ..111111] - bit require [...xxxx. ........] (1E 00) - bit mask [xxx..... xx......] (E0 C0) - bit pattern [110..... 10......] (C0 80) - --------------------------------------------------- - 3 byte - unicode range [U+0800, U+FFFF] - unicode min [........ ..100000 ..000000] - unicode max [....1111 ..111111 ..111111] - bit require [....xxxx ..x..... ........] (0F 20 00) - bit mask [xxxx.... xx...... xx......] (F0 C0 C0) - bit pattern [1110.... 10...... 10......] (E0 80 80) - --------------------------------------------------- - 3 byte invalid (reserved for surrogate halves) - unicode range [U+D800, U+DFFF] - unicode min [....1101 ..100000 ..000000] - unicode max [....1101 ..111111 ..111111] - bit mask [....xxxx ..x..... ........] (0F 20 00) - bit pattern [....1101 ..1..... ........] (0D 20 00) - --------------------------------------------------- - 4 byte - unicode range [U+10000, U+10FFFF] - unicode min [........ ...10000 ..000000 ..000000] - unicode max [.....100 ..001111 ..111111 ..111111] - bit require [.....xxx ..xx.... ........ ........] (07 30 00 00) - bit mask [xxxxx... xx...... xx...... xx......] (F8 C0 C0 C0) - bit pattern [11110... 10...... 10...... 10......] (F0 80 80 80) - --------------------------------------------------- + GCC may sometimes load variables into registers too early, causing + unnecessary instructions and performance degradation. This inline assembly + serves as a hint to GCC: 'This variable will be modified, so avoid loading + it too early.' Other compilers like MSVC, Clang, and ICC can generate the + expected instructions without needing this hint. + + Check out this example: https://godbolt.org/z/YG6a5W5Ec */ -#if YYJSON_ENDIAN == YYJSON_BIG_ENDIAN - const u32 b1_mask = 0x80000000UL; - const u32 b1_patt = 0x00000000UL; - const u32 b2_mask = 0xE0C00000UL; - const u32 b2_patt = 0xC0800000UL; - const u32 b2_requ = 0x1E000000UL; - const u32 b3_mask = 0xF0C0C000UL; - const u32 b3_patt = 0xE0808000UL; - const u32 b3_requ = 0x0F200000UL; - const u32 b3_erro = 0x0D200000UL; - const u32 b4_mask = 0xF8C0C0C0UL; - const u32 b4_patt = 0xF0808080UL; - const u32 b4_requ = 0x07300000UL; - const u32 b4_err0 = 0x04000000UL; - const u32 b4_err1 = 0x03300000UL; -#elif YYJSON_ENDIAN == YYJSON_LITTLE_ENDIAN - const u32 b1_mask = 0x00000080UL; - const u32 b1_patt = 0x00000000UL; - const u32 b2_mask = 0x0000C0E0UL; - const u32 b2_patt = 0x000080C0UL; - const u32 b2_requ = 0x0000001EUL; - const u32 b3_mask = 0x00C0C0F0UL; - const u32 b3_patt = 0x008080E0UL; - const u32 b3_requ = 0x0000200FUL; - const u32 b3_erro = 0x0000200DUL; - const u32 b4_mask = 0xC0C0C0F8UL; - const u32 b4_patt = 0x808080F0UL; - const u32 b4_requ = 0x00003007UL; - const u32 b4_err0 = 0x00000004UL; - const u32 b4_err1 = 0x00003003UL; -#else - /* this should be evaluated at compile-time */ - v32_uni b1_mask_uni = {{ 0x80, 0x00, 0x00, 0x00 }}; - v32_uni b1_patt_uni = {{ 0x00, 0x00, 0x00, 0x00 }}; - v32_uni b2_mask_uni = {{ 0xE0, 0xC0, 0x00, 0x00 }}; - v32_uni b2_patt_uni = {{ 0xC0, 0x80, 0x00, 0x00 }}; - v32_uni b2_requ_uni = {{ 0x1E, 0x00, 0x00, 0x00 }}; - v32_uni b3_mask_uni = {{ 0xF0, 0xC0, 0xC0, 0x00 }}; - v32_uni b3_patt_uni = {{ 0xE0, 0x80, 0x80, 0x00 }}; - v32_uni b3_requ_uni = {{ 0x0F, 0x20, 0x00, 0x00 }}; - v32_uni b3_erro_uni = {{ 0x0D, 0x20, 0x00, 0x00 }}; - v32_uni b4_mask_uni = {{ 0xF8, 0xC0, 0xC0, 0xC0 }}; - v32_uni b4_patt_uni = {{ 0xF0, 0x80, 0x80, 0x80 }}; - v32_uni b4_requ_uni = {{ 0x07, 0x30, 0x00, 0x00 }}; - v32_uni b4_err0_uni = {{ 0x04, 0x00, 0x00, 0x00 }}; - v32_uni b4_err1_uni = {{ 0x03, 0x30, 0x00, 0x00 }}; - u32 b1_mask = b1_mask_uni.u; - u32 b1_patt = b1_patt_uni.u; - u32 b2_mask = b2_mask_uni.u; - u32 b2_patt = b2_patt_uni.u; - u32 b2_requ = b2_requ_uni.u; - u32 b3_mask = b3_mask_uni.u; - u32 b3_patt = b3_patt_uni.u; - u32 b3_requ = b3_requ_uni.u; - u32 b3_erro = b3_erro_uni.u; - u32 b4_mask = b4_mask_uni.u; - u32 b4_patt = b4_patt_uni.u; - u32 b4_requ = b4_requ_uni.u; - u32 b4_err0 = b4_err0_uni.u; - u32 b4_err1 = b4_err1_uni.u; -#endif - -#define is_valid_seq_1(uni) ( \ - ((uni & b1_mask) == b1_patt) \ -) - -#define is_valid_seq_2(uni) ( \ - ((uni & b2_mask) == b2_patt) && \ - ((uni & b2_requ)) \ -) - -#define is_valid_seq_3(uni) ( \ - ((uni & b3_mask) == b3_patt) && \ - ((tmp = (uni & b3_requ))) && \ - ((tmp != b3_erro)) \ -) - -#define is_valid_seq_4(uni) ( \ - ((uni & b4_mask) == b4_patt) && \ - ((tmp = (uni & b4_requ))) && \ - ((tmp & b4_err0) == 0 || (tmp & b4_err1) == 0) \ -) - #define return_err(_end, _msg) do { \ *msg = _msg; \ *end = _end; \ @@ -5433,36 +4739,36 @@ static_inline bool read_str(u8 **ptr, u8 *lst, bool inv, yyjson_val *val, return false; \ } while (false) - u8 *cur = *ptr; + u8 *hdr = *ptr + 1; u8 **end = ptr; - u8 *src = ++cur, *dst = NULL, *pos; + u8 *src = hdr, *dst = NULL, *pos; u16 hi, lo; u32 uni, tmp; - if (unlikely(con && con[0])) { - /* Resume incremental parsing. */ + /* Resume incremental parsing. */ + if (con && unlikely(con[0])) { src = con[0]; dst = con[1]; if (dst) goto copy_ascii; } skip_ascii: - /* Most strings have no escaped characters, so we can jump them quickly. */ - -skip_ascii_begin: /* + Most strings have no escaped characters, so we can jump them quickly. + We want to make loop unrolling, as shown in the following code. Some compiler may not generate instructions as expected, so we rewrite it with explicit goto statements. We hope the compiler can generate instructions like this: https://godbolt.org/z/8vjsYq - while (true) repeat16({ - if (likely(!(char_is_ascii_stop(*src)))) src++; - else break; - }) + while (true) repeat16({ + if (likely((char_is_ascii_skip(*src)))) src++; + else break; + }) */ + if (quo == '"') { #define expr_jump(i) \ - if (likely(!char_is_ascii_stop(src[i]))) {} \ + if (likely(char_is_ascii_skip(src[i]))) {} \ else goto skip_ascii_stop##i; #define expr_stop(i) \ @@ -5472,29 +4778,36 @@ skip_ascii_begin: repeat16_incr(expr_jump) src += 16; - goto skip_ascii_begin; + goto skip_ascii; repeat16_incr(expr_stop) #undef expr_jump #undef expr_stop + } else { +#define expr_jump(i) \ + if (likely(char_is_ascii_skip_sq(src[i]))) {} \ + else goto skip_ascii_stop_sq##i; + +#define expr_stop(i) \ + skip_ascii_stop_sq##i: \ + src += i; \ + goto skip_ascii_end; + + repeat16_incr(expr_jump) + src += 16; + goto skip_ascii; + repeat16_incr(expr_stop) + +#undef expr_jump +#undef expr_stop + } skip_ascii_end: - - /* - GCC may store src[i] in a register at each line of expr_jump(i) above. - These instructions are useless and will degrade performance. - This inline asm is a hint for gcc: "the memory has been modified, - do not cache it". - - MSVC, Clang, ICC can generate expected instructions without this hint. - */ -#if YYJSON_IS_REAL_GCC - __asm__ volatile("":"=m"(*src)); -#endif - if (likely(*src == '"')) { - val->tag = ((u64)(src - cur) << YYJSON_TAG_BIT) | - (u64)(YYJSON_TYPE_STR | YYJSON_SUBTYPE_NOESC); - val->uni.str = (const char *)cur; + gcc_store_barrier(*src); + if (likely(*src == quo)) { + val->tag = ((u64)(src - hdr) << YYJSON_TAG_BIT) | YYJSON_TYPE_STR | + (quo == '"' ? YYJSON_SUBTYPE_NOESC : 0); + val->uni.str = (const char *)hdr; *src = '\0'; *end = src + 1; if (con) con[0] = con[1] = NULL; @@ -5527,23 +4840,23 @@ skip_utf8: }) #else uni = byte_load_4(src); - while (is_valid_seq_3(uni)) { + while (is_utf8_seq3(uni)) { src += 3; uni = byte_load_4(src); } - if (is_valid_seq_1(uni)) goto skip_ascii; - while (is_valid_seq_2(uni)) { + if (is_utf8_seq1(uni)) goto skip_ascii; + while (is_utf8_seq2(uni)) { src += 2; uni = byte_load_4(src); } - while (is_valid_seq_4(uni)) { + while (is_utf8_seq4(uni)) { src += 4; uni = byte_load_4(src); } #endif if (unlikely(pos == src)) { - if (!inv) return_err(src, "invalid UTF-8 encoding in string"); - ++src; + if (has_allow(INVALID_UNICODE)) ++src; + else return_err(src, "invalid UTF-8 encoding in string"); } goto skip_ascii; } @@ -5562,57 +4875,70 @@ copy_escape: case 'r': *dst++ = '\r'; src++; break; case 't': *dst++ = '\t'; src++; break; case 'u': - if (unlikely(!read_hex_u16(++src, &hi))) { - return_err(src - 2, "invalid escaped sequence in string"); - } - src += 4; - if (likely((hi & 0xF800) != 0xD800)) { - /* a BMP character */ - if (hi >= 0x800) { - *dst++ = (u8)(0xE0 | (hi >> 12)); - *dst++ = (u8)(0x80 | ((hi >> 6) & 0x3F)); - *dst++ = (u8)(0x80 | (hi & 0x3F)); - } else if (hi >= 0x80) { - *dst++ = (u8)(0xC0 | (hi >> 6)); - *dst++ = (u8)(0x80 | (hi & 0x3F)); - } else { - *dst++ = (u8)hi; - } - } else { - /* a non-BMP character, represented as a surrogate pair */ - if (unlikely((hi & 0xFC00) != 0xD800)) { - return_err(src - 6, "invalid high surrogate in string"); - } - if (unlikely(!byte_match_2(src, "\\u"))) { - return_err(src - 6, "no low surrogate in string"); - } - if (unlikely(!read_hex_u16(src + 2, &lo))) { - return_err(src - 6, "invalid escape in string"); - } - if (unlikely((lo & 0xFC00) != 0xDC00)) { - return_err(src - 6, "invalid low surrogate in string"); - } - uni = ((((u32)hi - 0xD800) << 10) | - ((u32)lo - 0xDC00)) + 0x10000; - *dst++ = (u8)(0xF0 | (uni >> 18)); - *dst++ = (u8)(0x80 | ((uni >> 12) & 0x3F)); - *dst++ = (u8)(0x80 | ((uni >> 6) & 0x3F)); - *dst++ = (u8)(0x80 | (uni & 0x3F)); - src += 6; - } + src--; + if (!read_uni_esc(&src, &dst, msg)) return_err(src, *msg); break; - default: return_err(src - 1, "invalid escaped sequence in string"); + default: { + if (has_allow(EXT_ESCAPE)) { + /* read extended escape (non-standard) */ + switch (*src) { + case '\'': *dst++ = '\''; src++; break; + case 'a': *dst++ = '\a'; src++; break; + case 'v': *dst++ = '\v'; src++; break; + case '?': *dst++ = '\?'; src++; break; + case 'e': *dst++ = 0x1B; src++; break; + case '0': + if (!char_is_digit(src[1])) { + *dst++ = '\0'; src++; break; + } + return_err(src - 1, "octal escape is not allowed"); + case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return_err(src - 1, "invalid number escape"); + case 'x': { + u8 c; + if (hex_load_2(src + 1, &c)) { + src += 3; + if (c <= 0x7F) { /* 1-byte ASCII */ + *dst++ = c; + } else { /* 2-byte UTF-8 */ + *dst++ = (u8)(0xC0 | (c >> 6)); + *dst++ = (u8)(0x80 | (c & 0x3F)); + } + break; + } + return_err(src - 1, "invalid hex escape"); + } + case '\n': src++; break; + case '\r': src++; src += (*src == '\n'); break; + case 0xE2: /* Line terminator: U+2028, U+2029 */ + if ((src[1] == 0x80 && src[2] == 0xA8) || + (src[1] == 0x80 && src[2] == 0xA9)) { + src += 3; + } + break; + default: + break; /* skip */ + } + } else if (quo == '\'' && *src == '\'') { + *dst++ = '\''; src++; break; + } else { + return_err(src - 1, "invalid escaped sequence in string"); + } + } } - } else if (likely(*src == '"')) { - val->tag = ((u64)(dst - cur) << YYJSON_TAG_BIT) | YYJSON_TYPE_STR; - val->uni.str = (const char *)cur; + } else if (likely(*src == quo)) { + val->tag = ((u64)(dst - hdr) << YYJSON_TAG_BIT) | YYJSON_TYPE_STR; + val->uni.str = (const char *)hdr; *dst = '\0'; *end = src + 1; if (con) con[0] = con[1] = NULL; return true; } else { - if (!inv) return_err(src, "unexpected control character in string"); - if (src >= lst) return_err(src, "unclosed string"); + if (!has_allow(INVALID_UNICODE)) { + return_err(src, "unexpected control character in string"); + } + if (src >= eof) return_err(src, "unclosed string"); *dst++ = *src++; } @@ -5620,119 +4946,40 @@ copy_ascii: /* Copy continuous ASCII, loop unrolling, same as the following code: - while (true) repeat16({ - if (unlikely(char_is_ascii_stop(*src))) break; - *dst++ = *src++; - }) + while (true) repeat16({ + if (char_is_ascii_skip(*src)) *dst++ = *src++; + else break; + }) */ -#if YYJSON_IS_REAL_GCC -# define expr_jump(i) \ - if (likely(!(char_is_ascii_stop(src[i])))) {} \ - else { __asm__ volatile("":"=m"(src[i])); goto copy_ascii_stop_##i; } -#else -# define expr_jump(i) \ - if (likely(!(char_is_ascii_stop(src[i])))) {} \ - else { goto copy_ascii_stop_##i; } -#endif + if (quo == '"') { +#define expr_jump(i) \ + if (likely((char_is_ascii_skip(src[i])))) {} \ + else { gcc_store_barrier(src[i]); goto copy_ascii_stop_##i; } repeat16_incr(expr_jump) #undef expr_jump + } else { +#define expr_jump(i) \ + if (likely((char_is_ascii_skip_sq(src[i])))) {} \ + else { gcc_store_barrier(src[i]); goto copy_ascii_stop_##i; } + repeat16_incr(expr_jump) +#undef expr_jump + } byte_move_16(dst, src); - src += 16; - dst += 16; + dst += 16; src += 16; goto copy_ascii; /* - The memory will be moved forward by at least 1 byte. So the `byte_move` - can be one byte more than needed to reduce the number of instructions. + The memory is copied forward since `dst < src`. + So it's safe to move one extra byte to reduce instruction count. */ -copy_ascii_stop_0: - goto copy_utf8; -copy_ascii_stop_1: - byte_move_2(dst, src); - src += 1; - dst += 1; - goto copy_utf8; -copy_ascii_stop_2: - byte_move_2(dst, src); - src += 2; - dst += 2; - goto copy_utf8; -copy_ascii_stop_3: - byte_move_4(dst, src); - src += 3; - dst += 3; - goto copy_utf8; -copy_ascii_stop_4: - byte_move_4(dst, src); - src += 4; - dst += 4; - goto copy_utf8; -copy_ascii_stop_5: - byte_move_4(dst, src); - byte_move_2(dst + 4, src + 4); - src += 5; - dst += 5; - goto copy_utf8; -copy_ascii_stop_6: - byte_move_4(dst, src); - byte_move_2(dst + 4, src + 4); - src += 6; - dst += 6; - goto copy_utf8; -copy_ascii_stop_7: - byte_move_8(dst, src); - src += 7; - dst += 7; - goto copy_utf8; -copy_ascii_stop_8: - byte_move_8(dst, src); - src += 8; - dst += 8; - goto copy_utf8; -copy_ascii_stop_9: - byte_move_8(dst, src); - byte_move_2(dst + 8, src + 8); - src += 9; - dst += 9; - goto copy_utf8; -copy_ascii_stop_10: - byte_move_8(dst, src); - byte_move_2(dst + 8, src + 8); - src += 10; - dst += 10; - goto copy_utf8; -copy_ascii_stop_11: - byte_move_8(dst, src); - byte_move_4(dst + 8, src + 8); - src += 11; - dst += 11; - goto copy_utf8; -copy_ascii_stop_12: - byte_move_8(dst, src); - byte_move_4(dst + 8, src + 8); - src += 12; - dst += 12; - goto copy_utf8; -copy_ascii_stop_13: - byte_move_8(dst, src); - byte_move_4(dst + 8, src + 8); - byte_move_2(dst + 12, src + 12); - src += 13; - dst += 13; - goto copy_utf8; -copy_ascii_stop_14: - byte_move_8(dst, src); - byte_move_4(dst + 8, src + 8); - byte_move_2(dst + 12, src + 12); - src += 14; - dst += 14; - goto copy_utf8; -copy_ascii_stop_15: - byte_move_16(dst, src); - src += 15; - dst += 15; +#define expr_jump(i) \ + copy_ascii_stop_##i: \ + byte_move_forward(dst, src, i); \ + dst += i; src += i; \ goto copy_utf8; + repeat16_incr(expr_jump) +#undef expr_jump copy_utf8: if (*src & 0x80) { /* non-ASCII character */ @@ -5740,53 +4987,49 @@ copy_utf8: uni = byte_load_4(src); #if YYJSON_DISABLE_UTF8_VALIDATION while (true) repeat4({ - if ((uni & b3_mask) == b3_patt) { + if ((uni & utf8_seq(b3_mask)) == utf8_seq(b3_patt)) { byte_copy_4(dst, &uni); - dst += 3; - src += 3; + dst += 3; src += 3; uni = byte_load_4(src); } else break; }) - if ((uni & b1_mask) == b1_patt) goto copy_ascii; + if ((uni & utf8_seq(b1_mask)) == utf8_seq(b1_patt)) goto copy_ascii; while (true) repeat4({ - if ((uni & b2_mask) == b2_patt) { + if ((uni & utf8_seq(b2_mask)) == utf8_seq(b2_patt)) { byte_copy_2(dst, &uni); - dst += 2; - src += 2; + dst += 2; src += 2; uni = byte_load_4(src); } else break; }) while (true) repeat4({ - if ((uni & b4_mask) == b4_patt) { + if ((uni & utf8_seq(b4_mask)) == utf8_seq(b4_patt)) { byte_copy_4(dst, &uni); - dst += 4; - src += 4; + dst += 4; src += 4; uni = byte_load_4(src); } else break; }) #else - while (is_valid_seq_3(uni)) { + while (is_utf8_seq3(uni)) { byte_copy_4(dst, &uni); - dst += 3; - src += 3; + dst += 3; src += 3; uni = byte_load_4(src); } - if (is_valid_seq_1(uni)) goto copy_ascii; - while (is_valid_seq_2(uni)) { + if (is_utf8_seq1(uni)) goto copy_ascii; + while (is_utf8_seq2(uni)) { byte_copy_2(dst, &uni); - dst += 2; - src += 2; + dst += 2; src += 2; uni = byte_load_4(src); } - while (is_valid_seq_4(uni)) { + while (is_utf8_seq4(uni)) { byte_copy_4(dst, &uni); - dst += 4; - src += 4; + dst += 4; src += 4; uni = byte_load_4(src); } #endif if (unlikely(pos == src)) { - if (!inv) return_err(src, MSG_ERR_UTF8); + if (!has_allow(INVALID_UNICODE)) { + return_err(src, MSG_ERR_UTF8); + } goto copy_ascii_stop_1; } goto copy_ascii; @@ -5794,16 +5037,169 @@ copy_utf8: goto copy_escape; #undef return_err -#undef is_valid_seq_1 -#undef is_valid_seq_2 -#undef is_valid_seq_3 -#undef is_valid_seq_4 +} + +static_inline bool read_str(u8 **ptr, u8 *eof, yyjson_read_flag flg, + yyjson_val *val, const char **msg) { + return read_str_opt('\"', ptr, eof, flg, val, msg, NULL); +} + +static_inline bool read_str_con(u8 **ptr, u8 *eof, yyjson_read_flag flg, + yyjson_val *val, const char **msg, u8 **con) { + return read_str_opt('\"', ptr, eof, flg, val, msg, con); +} + +static_noinline bool read_str_sq(u8 **ptr, u8 *eof, yyjson_read_flag flg, + yyjson_val *val, const char **msg) { + return read_str_opt('\'', ptr, eof, flg, val, msg, NULL); +} + +/** Read unquoted key (identifier name). */ +static_noinline bool read_str_id(u8 **ptr, u8 *eof, yyjson_read_flag flg, + u8 **pre, yyjson_val *val, const char **msg) { +#define return_err(_end, _msg) do { \ + *msg = _msg; \ + *end = _end; \ + return false; \ +} while (false) + +#define return_suc(_str_end, _cur_end) do { \ + val->tag = ((u64)(_str_end - hdr) << YYJSON_TAG_BIT) | \ + (u64)(YYJSON_TYPE_STR); \ + val->uni.str = (const char *)hdr; \ + *pre = _str_end; *end = _cur_end; \ + return true; \ +} while (false) + + u8 *hdr = *ptr; + u8 **end = ptr; + u8 *src = hdr, *dst = NULL; + u16 hi, lo; + u32 uni, tmp; + + /* add null-terminator for previous raw string */ + **pre = '\0'; + +skip_ascii: +#define expr_jump(i) \ + if (likely(char_is_id_ascii(src[i]))) {} \ + else goto skip_ascii_stop##i; + +#define expr_stop(i) \ + skip_ascii_stop##i: \ + src += i; \ + goto skip_ascii_end; + + repeat16_incr(expr_jump) + src += 16; + goto skip_ascii; + repeat16_incr(expr_stop) + +#undef expr_jump +#undef expr_stop + +skip_ascii_end: + gcc_store_barrier(*src); + if (likely(!char_is_id_next(*src))) { + return_suc(src, src); + } + +skip_utf8: + while (*src >= 0x80) { + if (has_allow(EXT_WHITESPACE)) { + if (char_is_space_ext(*src) && ext_space_len(src)) { + return_suc(src, src); + } + } + uni = byte_load_4(src); + if (is_utf8_seq2(uni)) { + src += 2; + } else if (is_utf8_seq3(uni)) { + src += 3; + } else if (is_utf8_seq4(uni)) { + src += 4; + } else { +#if !YYJSON_DISABLE_UTF8_VALIDATION + if (!has_allow(INVALID_UNICODE)) return_err(src, MSG_ERR_UTF8); +#endif + src += 1; + } + } + if (char_is_id_ascii(*src)) goto skip_ascii; + + /* The escape character appears, we need to copy it. */ + dst = src; +copy_escape: + if (byte_match_2(src, "\\u")) { + if (!read_uni_esc(&src, &dst, msg)) return_err(src, *msg); + } else { + if (!char_is_id_next(*src)) return_suc(dst, src); + return_err(src, "unexpected character in key"); + } + +copy_ascii: + /* + Copy continuous ASCII, loop unrolling, same as the following code: + + while (true) repeat16({ + if (char_is_ascii_skip(*src)) *dst++ = *src++; + else break; + }) + */ +#define expr_jump(i) \ + if (likely((char_is_id_ascii(src[i])))) {} \ + else { gcc_store_barrier(src[i]); goto copy_ascii_stop_##i; } + repeat16_incr(expr_jump) +#undef expr_jump + + byte_move_16(dst, src); + dst += 16; src += 16; + goto copy_ascii; + +#define expr_jump(i) \ + copy_ascii_stop_##i: \ + byte_move_forward(dst, src, i); \ + dst += i; src += i; \ + goto copy_utf8; + repeat16_incr(expr_jump) +#undef expr_jump + +copy_utf8: + while (*src >= 0x80) { /* non-ASCII character */ + if (has_allow(EXT_WHITESPACE)) { + if (char_is_space_ext(*src) && ext_space_len(src)) { + return_suc(dst, src); + } + } + uni = byte_load_4(src); + if (is_utf8_seq2(uni)) { + byte_copy_2(dst, &uni); + dst += 2; src += 2; + } else if (is_utf8_seq3(uni)) { + byte_copy_4(dst, &uni); + dst += 3; src += 3; + } else if (is_utf8_seq4(uni)) { + byte_copy_4(dst, &uni); + dst += 4; src += 4; + } else { +#if !YYJSON_DISABLE_UTF8_VALIDATION + if (!has_allow(INVALID_UNICODE)) return_err(src, MSG_ERR_UTF8); +#endif + *dst = *src; + dst += 1; src += 1; + } + } + if (char_is_id_ascii(*src)) goto copy_ascii; + goto copy_escape; + +#undef return_err +#undef return_suc } /*============================================================================== - * JSON Reader Implementation + * MARK: - JSON Reader Implementation (Private) * * We use goto statements to build the finite state machine (FSM). * The FSM's state was held by program counter (PC) and the 'goto' make the @@ -5811,14 +5207,13 @@ copy_utf8: *============================================================================*/ /** Read single value JSON document. */ -static_noinline yyjson_doc *read_root_single(u8 *hdr, u8 *cur, u8 *end, +static_noinline yyjson_doc *read_root_single(u8 *hdr, u8 *cur, u8 *eof, yyjson_alc alc, yyjson_read_flag flg, yyjson_read_err *err) { - #define return_err(_pos, _code, _msg) do { \ - if (is_truncated_end(hdr, _pos, end, YYJSON_READ_ERROR_##_code, flg)) { \ - err->pos = (usize)(end - hdr); \ + if (is_truncated_end(hdr, _pos, eof, YYJSON_READ_ERROR_##_code, flg)) { \ + err->pos = (usize)(eof - hdr); \ err->code = YYJSON_READ_ERROR_UNEXPECTED_END; \ err->msg = MSG_NOT_END; \ } else { \ @@ -5826,7 +5221,7 @@ static_noinline yyjson_doc *read_root_single(u8 *hdr, u8 *cur, u8 *end, err->code = YYJSON_READ_ERROR_##_code; \ err->msg = _msg; \ } \ - if (val_hdr) alc.free(alc.ctx, (void *)val_hdr); \ + if (val_hdr) alc.free(alc.ctx, val_hdr); \ return NULL; \ } while (false) @@ -5837,10 +5232,9 @@ static_noinline yyjson_doc *read_root_single(u8 *hdr, u8 *cur, u8 *end, yyjson_doc *doc; /* the JSON document, equals to val_hdr */ const char *msg; /* error message */ - bool raw; /* read number as raw */ - bool inv; /* allow invalid unicode */ - u8 *raw_end; /* raw end for null-terminator */ - u8 **pre; /* previous raw end pointer */ + u8 raw_end[1]; /* raw end for null-terminator */ + u8 *raw_ptr = raw_end; + u8 **pre = &raw_ptr; /* previous raw end pointer */ hdr_len = sizeof(yyjson_doc) / sizeof(yyjson_val); hdr_len += (sizeof(yyjson_doc) % sizeof(yyjson_val)) > 0; @@ -5849,17 +5243,13 @@ static_noinline yyjson_doc *read_root_single(u8 *hdr, u8 *cur, u8 *end, val_hdr = (yyjson_val *)alc.malloc(alc.ctx, alc_num * sizeof(yyjson_val)); if (unlikely(!val_hdr)) goto fail_alloc; val = val_hdr + hdr_len; - raw = has_read_flag(NUMBER_AS_RAW) || has_read_flag(BIGNUM_AS_RAW); - inv = has_read_flag(ALLOW_INVALID_UNICODE) != 0; - raw_end = NULL; - pre = raw ? &raw_end : NULL; if (char_is_num(*cur)) { if (likely(read_num(&cur, pre, flg, val, &msg))) goto doc_end; goto fail_number; } if (*cur == '"') { - if (likely(read_str(&cur, end, inv, val, &msg, NULL))) goto doc_end; + if (likely(read_str(&cur, eof, flg, val, &msg))) goto doc_end; goto fail_string; } if (*cur == 't') { @@ -5872,42 +5262,45 @@ static_noinline yyjson_doc *read_root_single(u8 *hdr, u8 *cur, u8 *end, } if (*cur == 'n') { if (likely(read_null(&cur, val))) goto doc_end; - if (has_read_flag(ALLOW_INF_AND_NAN)) { - if (read_nan(false, &cur, pre, flg, val)) goto doc_end; + if (has_allow(INF_AND_NAN)) { + if (read_nan(&cur, pre, flg, val)) goto doc_end; } goto fail_literal_null; } - if (has_read_flag(ALLOW_INF_AND_NAN)) { - if (read_inf_or_nan(false, &cur, pre, flg, val)) goto doc_end; + if (has_allow(INF_AND_NAN)) { + if (read_inf_or_nan(&cur, pre, flg, val)) goto doc_end; + } + if (has_allow(SINGLE_QUOTED_STR) && *cur == '\'') { + if (likely(read_str_sq(&cur, eof, flg, val, &msg))) goto doc_end; + goto fail_string; } goto fail_character; doc_end: /* check invalid contents after json document */ - if (unlikely(cur < end) && !has_read_flag(STOP_WHEN_DONE)) { - if (has_read_flag(ALLOW_COMMENTS)) { - if (!skip_spaces_and_comments(&cur)) { - if (byte_match_2(cur, "/*")) goto fail_comment; + if (unlikely(cur < eof) && !has_flg(STOP_WHEN_DONE)) { + while (char_is_space(*cur)) cur++; + if (has_allow(TRIVIA) && char_is_trivia(*cur)) { + if (!skip_trivia(&cur, eof, flg) && cur == eof) { + goto fail_comment; } - } else { - while (char_is_space(*cur)) cur++; } - if (unlikely(cur < end)) goto fail_garbage; + if (unlikely(cur < eof)) goto fail_garbage; } - if (pre && *pre) **pre = '\0'; + **pre = '\0'; doc = (yyjson_doc *)val_hdr; doc->root = val_hdr + hdr_len; doc->alc = alc; doc->dat_read = (usize)(cur - hdr); doc->val_read = 1; - doc->str_pool = has_read_flag(INSITU) ? NULL : (char *)hdr; + doc->str_pool = has_flg(INSITU) ? NULL : (char *)hdr; return doc; fail_string: return_err(cur, INVALID_STRING, msg); fail_number: return_err(cur, INVALID_NUMBER, msg); fail_alloc: return_err(cur, MEMORY_ALLOCATION, MSG_MALLOC); -fail_literal_true: return_err(cur, LITERAL, MSG_CHAT_T); +fail_literal_true: return_err(cur, LITERAL, MSG_CHAR_T); fail_literal_false: return_err(cur, LITERAL, MSG_CHAR_F); fail_literal_null: return_err(cur, LITERAL, MSG_CHAR_N); fail_character: return_err(cur, UNEXPECTED_CHARACTER, MSG_CHAR); @@ -5918,14 +5311,13 @@ fail_garbage: return_err(cur, UNEXPECTED_CONTENT, MSG_GARBAGE); } /** Read JSON document (accept all style, but optimized for minify). */ -static_inline yyjson_doc *read_root_minify(u8 *hdr, u8 *cur, u8 *end, +static_inline yyjson_doc *read_root_minify(u8 *hdr, u8 *cur, u8 *eof, yyjson_alc alc, yyjson_read_flag flg, yyjson_read_err *err) { - #define return_err(_pos, _code, _msg) do { \ - if (is_truncated_end(hdr, _pos, end, YYJSON_READ_ERROR_##_code, flg)) { \ - err->pos = (usize)(end - hdr); \ + if (is_truncated_end(hdr, _pos, eof, YYJSON_READ_ERROR_##_code, flg)) { \ + err->pos = (usize)(eof - hdr); \ err->code = YYJSON_READ_ERROR_UNEXPECTED_END; \ err->msg = MSG_NOT_END; \ } else { \ @@ -5933,7 +5325,7 @@ static_inline yyjson_doc *read_root_minify(u8 *hdr, u8 *cur, u8 *end, err->code = YYJSON_READ_ERROR_##_code; \ err->msg = _msg; \ } \ - if (val_hdr) alc.free(alc.ctx, (void *)val_hdr); \ + if (val_hdr) alc.free(alc.ctx, val_hdr); \ return NULL; \ } while (false) @@ -5970,12 +5362,11 @@ static_inline yyjson_doc *read_root_minify(u8 *hdr, u8 *cur, u8 *end, yyjson_doc *doc; /* the JSON document, equals to val_hdr */ const char *msg; /* error message */ - bool raw; /* read number as raw */ - bool inv; /* allow invalid unicode */ - u8 *raw_end; /* raw end for null-terminator */ - u8 **pre; /* previous raw end pointer */ + u8 raw_end[1]; /* raw end for null-terminator */ + u8 *raw_ptr = raw_end; + u8 **pre = &raw_ptr; /* previous raw end pointer */ - dat_len = has_read_flag(STOP_WHEN_DONE) ? 256 : (usize)(end - cur); + dat_len = has_flg(STOP_WHEN_DONE) ? 256 : (usize)(eof - cur); hdr_len = sizeof(yyjson_doc) / sizeof(yyjson_val); hdr_len += (sizeof(yyjson_doc) % sizeof(yyjson_val)) > 0; alc_max = USIZE_MAX / sizeof(yyjson_val); @@ -5988,10 +5379,6 @@ static_inline yyjson_doc *read_root_minify(u8 *hdr, u8 *cur, u8 *end, val = val_hdr + hdr_len; ctn = val; ctn_len = 0; - raw = has_read_flag(NUMBER_AS_RAW) || has_read_flag(BIGNUM_AS_RAW); - inv = has_read_flag(ALLOW_INVALID_UNICODE) != 0; - raw_end = NULL; - pre = raw ? &raw_end : NULL; if (*cur++ == '{') { ctn->tag = YYJSON_TYPE_OBJ; @@ -6035,7 +5422,7 @@ arr_val_begin: if (*cur == '"') { val_incr(); ctn_len++; - if (likely(read_str(&cur, end, inv, val, &msg, NULL))) goto arr_val_end; + if (likely(read_str(&cur, eof, flg, val, &msg))) goto arr_val_end; goto fail_string; } if (*cur == 't') { @@ -6054,15 +5441,15 @@ arr_val_begin: val_incr(); ctn_len++; if (likely(read_null(&cur, val))) goto arr_val_end; - if (has_read_flag(ALLOW_INF_AND_NAN)) { - if (read_nan(false, &cur, pre, flg, val)) goto arr_val_end; + if (has_allow(INF_AND_NAN)) { + if (read_nan(&cur, pre, flg, val)) goto arr_val_end; } goto fail_literal_null; } if (*cur == ']') { cur++; if (likely(ctn_len == 0)) goto arr_end; - if (has_read_flag(ALLOW_TRAILING_COMMAS)) goto arr_end; + if (has_allow(TRAILING_COMMAS)) goto arr_end; while (*cur != ',') cur--; goto fail_trailing_comma; } @@ -6070,16 +5457,22 @@ arr_val_begin: while (char_is_space(*++cur)); goto arr_val_begin; } - if (has_read_flag(ALLOW_INF_AND_NAN) && + if (has_allow(INF_AND_NAN) && (*cur == 'i' || *cur == 'I' || *cur == 'N')) { val_incr(); ctn_len++; - if (read_inf_or_nan(false, &cur, pre, flg, val)) goto arr_val_end; + if (read_inf_or_nan(&cur, pre, flg, val)) goto arr_val_end; goto fail_character_val; } - if (has_read_flag(ALLOW_COMMENTS)) { - if (skip_spaces_and_comments(&cur)) goto arr_val_begin; - if (byte_match_2(cur, "/*")) goto fail_comment; + if (has_allow(SINGLE_QUOTED_STR) && *cur == '\'') { + val_incr(); + ctn_len++; + if (likely(read_str_sq(&cur, eof, flg, val, &msg))) goto arr_val_end; + goto fail_string; + } + if (has_allow(TRIVIA) && char_is_trivia(*cur)) { + if (skip_trivia(&cur, eof, flg)) goto arr_val_begin; + if (cur == eof) goto fail_comment; } goto fail_character_val; @@ -6096,9 +5489,9 @@ arr_val_end: while (char_is_space(*++cur)); goto arr_val_end; } - if (has_read_flag(ALLOW_COMMENTS)) { - if (skip_spaces_and_comments(&cur)) goto arr_val_end; - if (byte_match_2(cur, "/*")) goto fail_comment; + if (has_allow(TRIVIA) && char_is_trivia(*cur)) { + if (skip_trivia(&cur, eof, flg)) goto arr_val_end; + if (cur == eof) goto fail_comment; } goto fail_character_arr_end; @@ -6135,13 +5528,13 @@ obj_key_begin: if (likely(*cur == '"')) { val_incr(); ctn_len++; - if (likely(read_str(&cur, end, inv, val, &msg, NULL))) goto obj_key_end; + if (likely(read_str(&cur, eof, flg, val, &msg))) goto obj_key_end; goto fail_string; } if (likely(*cur == '}')) { cur++; if (likely(ctn_len == 0)) goto obj_end; - if (has_read_flag(ALLOW_TRAILING_COMMAS)) goto obj_end; + if (has_allow(TRAILING_COMMAS)) goto obj_end; while (*cur != ',') cur--; goto fail_trailing_comma; } @@ -6149,9 +5542,21 @@ obj_key_begin: while (char_is_space(*++cur)); goto obj_key_begin; } - if (has_read_flag(ALLOW_COMMENTS)) { - if (skip_spaces_and_comments(&cur)) goto obj_key_begin; - if (byte_match_2(cur, "/*")) goto fail_comment; + if (has_allow(SINGLE_QUOTED_STR) && *cur == '\'') { + val_incr(); + ctn_len++; + if (likely(read_str_sq(&cur, eof, flg, val, &msg))) goto obj_key_end; + goto fail_string; + } + if (has_allow(UNQUOTED_KEY) && char_is_id_start(*cur)) { + val_incr(); + ctn_len++; + if (read_str_id(&cur, eof, flg, pre, val, &msg)) goto obj_key_end; + goto fail_string; + } + if (has_allow(TRIVIA) && char_is_trivia(*cur)) { + if (skip_trivia(&cur, eof, flg)) goto obj_key_begin; + if (cur == eof) goto fail_comment; } goto fail_character_obj_key; @@ -6164,9 +5569,9 @@ obj_key_end: while (char_is_space(*++cur)); goto obj_key_end; } - if (has_read_flag(ALLOW_COMMENTS)) { - if (skip_spaces_and_comments(&cur)) goto obj_key_end; - if (byte_match_2(cur, "/*")) goto fail_comment; + if (has_allow(TRIVIA) && char_is_trivia(*cur)) { + if (skip_trivia(&cur, eof, flg)) goto obj_key_end; + if (cur == eof) goto fail_comment; } goto fail_character_obj_sep; @@ -6174,7 +5579,7 @@ obj_val_begin: if (*cur == '"') { val++; ctn_len++; - if (likely(read_str(&cur, end, inv, val, &msg, NULL))) goto obj_val_end; + if (likely(read_str(&cur, eof, flg, val, &msg))) goto obj_val_end; goto fail_string; } if (char_is_num(*cur)) { @@ -6207,8 +5612,8 @@ obj_val_begin: val++; ctn_len++; if (likely(read_null(&cur, val))) goto obj_val_end; - if (has_read_flag(ALLOW_INF_AND_NAN)) { - if (read_nan(false, &cur, pre, flg, val)) goto obj_val_end; + if (has_allow(INF_AND_NAN)) { + if (read_nan(&cur, pre, flg, val)) goto obj_val_end; } goto fail_literal_null; } @@ -6216,16 +5621,22 @@ obj_val_begin: while (char_is_space(*++cur)); goto obj_val_begin; } - if (has_read_flag(ALLOW_INF_AND_NAN) && + if (has_allow(INF_AND_NAN) && (*cur == 'i' || *cur == 'I' || *cur == 'N')) { val++; ctn_len++; - if (read_inf_or_nan(false, &cur, pre, flg, val)) goto obj_val_end; + if (read_inf_or_nan(&cur, pre, flg, val)) goto obj_val_end; goto fail_character_val; } - if (has_read_flag(ALLOW_COMMENTS)) { - if (skip_spaces_and_comments(&cur)) goto obj_val_begin; - if (byte_match_2(cur, "/*")) goto fail_comment; + if (has_allow(SINGLE_QUOTED_STR) && *cur == '\'') { + val++; + ctn_len++; + if (likely(read_str_sq(&cur, eof, flg, val, &msg))) goto obj_val_end; + goto fail_string; + } + if (has_allow(TRIVIA) && char_is_trivia(*cur)) { + if (skip_trivia(&cur, eof, flg)) goto obj_val_begin; + if (cur == eof) goto fail_comment; } goto fail_character_val; @@ -6242,9 +5653,9 @@ obj_val_end: while (char_is_space(*++cur)); goto obj_val_end; } - if (has_read_flag(ALLOW_COMMENTS)) { - if (skip_spaces_and_comments(&cur)) goto obj_val_end; - if (byte_match_2(cur, "/*")) goto fail_comment; + if (has_allow(TRIVIA) && char_is_trivia(*cur)) { + if (skip_trivia(&cur, eof, flg)) goto obj_val_end; + if (cur == eof) goto fail_comment; } goto fail_character_obj_end; @@ -6265,30 +5676,30 @@ obj_end: doc_end: /* check invalid contents after json document */ - if (unlikely(cur < end) && !has_read_flag(STOP_WHEN_DONE)) { - if (has_read_flag(ALLOW_COMMENTS)) { - skip_spaces_and_comments(&cur); - if (byte_match_2(cur, "/*")) goto fail_comment; - } else { - while (char_is_space(*cur)) cur++; + if (unlikely(cur < eof) && !has_flg(STOP_WHEN_DONE)) { + while (char_is_space(*cur)) cur++; + if (has_allow(TRIVIA) && char_is_trivia(*cur)) { + if (!skip_trivia(&cur, eof, flg) && cur == eof) { + goto fail_comment; + } } - if (unlikely(cur < end)) goto fail_garbage; + if (unlikely(cur < eof)) goto fail_garbage; } - if (pre && *pre) **pre = '\0'; + **pre = '\0'; doc = (yyjson_doc *)val_hdr; doc->root = val_hdr + hdr_len; doc->alc = alc; doc->dat_read = (usize)(cur - hdr); doc->val_read = (usize)((val - doc->root) + 1); - doc->str_pool = has_read_flag(INSITU) ? NULL : (char *)hdr; + doc->str_pool = has_flg(INSITU) ? NULL : (char *)hdr; return doc; fail_string: return_err(cur, INVALID_STRING, msg); fail_number: return_err(cur, INVALID_NUMBER, msg); fail_alloc: return_err(cur, MEMORY_ALLOCATION, MSG_MALLOC); fail_trailing_comma: return_err(cur, JSON_STRUCTURE, MSG_COMMA); -fail_literal_true: return_err(cur, LITERAL, MSG_CHAT_T); +fail_literal_true: return_err(cur, LITERAL, MSG_CHAR_T); fail_literal_false: return_err(cur, LITERAL, MSG_CHAR_F); fail_literal_null: return_err(cur, LITERAL, MSG_CHAR_N); fail_character_val: return_err(cur, UNEXPECTED_CHARACTER, MSG_CHAR); @@ -6304,14 +5715,13 @@ fail_garbage: return_err(cur, UNEXPECTED_CONTENT, MSG_GARBAGE); } /** Read JSON document (accept all style, but optimized for pretty). */ -static_inline yyjson_doc *read_root_pretty(u8 *hdr, u8 *cur, u8 *end, +static_inline yyjson_doc *read_root_pretty(u8 *hdr, u8 *cur, u8 *eof, yyjson_alc alc, yyjson_read_flag flg, yyjson_read_err *err) { - #define return_err(_pos, _code, _msg) do { \ - if (is_truncated_end(hdr, _pos, end, YYJSON_READ_ERROR_##_code, flg)) { \ - err->pos = (usize)(end - hdr); \ + if (is_truncated_end(hdr, _pos, eof, YYJSON_READ_ERROR_##_code, flg)) { \ + err->pos = (usize)(eof - hdr); \ err->code = YYJSON_READ_ERROR_UNEXPECTED_END; \ err->msg = MSG_NOT_END; \ } else { \ @@ -6319,7 +5729,7 @@ static_inline yyjson_doc *read_root_pretty(u8 *hdr, u8 *cur, u8 *end, err->code = YYJSON_READ_ERROR_##_code; \ err->msg = _msg; \ } \ - if (val_hdr) alc.free(alc.ctx, (void *)val_hdr); \ + if (val_hdr) alc.free(alc.ctx, val_hdr); \ return NULL; \ } while (false) @@ -6356,12 +5766,11 @@ static_inline yyjson_doc *read_root_pretty(u8 *hdr, u8 *cur, u8 *end, yyjson_doc *doc; /* the JSON document, equals to val_hdr */ const char *msg; /* error message */ - bool raw; /* read number as raw */ - bool inv; /* allow invalid unicode */ - u8 *raw_end; /* raw end for null-terminator */ - u8 **pre; /* previous raw end pointer */ + u8 raw_end[1]; /* raw end for null-terminator */ + u8 *raw_ptr = raw_end; + u8 **pre = &raw_ptr; /* previous raw end pointer */ - dat_len = has_read_flag(STOP_WHEN_DONE) ? 256 : (usize)(end - cur); + dat_len = has_flg(STOP_WHEN_DONE) ? 256 : (usize)(eof - cur); hdr_len = sizeof(yyjson_doc) / sizeof(yyjson_val); hdr_len += (sizeof(yyjson_doc) % sizeof(yyjson_val)) > 0; alc_max = USIZE_MAX / sizeof(yyjson_val); @@ -6374,10 +5783,6 @@ static_inline yyjson_doc *read_root_pretty(u8 *hdr, u8 *cur, u8 *end, val = val_hdr + hdr_len; ctn = val; ctn_len = 0; - raw = has_read_flag(NUMBER_AS_RAW) || has_read_flag(BIGNUM_AS_RAW); - inv = has_read_flag(ALLOW_INVALID_UNICODE) != 0; - raw_end = NULL; - pre = raw ? &raw_end : NULL; if (*cur++ == '{') { ctn->tag = YYJSON_TYPE_OBJ; @@ -6436,7 +5841,7 @@ arr_val_begin: if (*cur == '"') { val_incr(); ctn_len++; - if (likely(read_str(&cur, end, inv, val, &msg, NULL))) goto arr_val_end; + if (likely(read_str(&cur, eof, flg, val, &msg))) goto arr_val_end; goto fail_string; } if (*cur == 't') { @@ -6455,15 +5860,15 @@ arr_val_begin: val_incr(); ctn_len++; if (likely(read_null(&cur, val))) goto arr_val_end; - if (has_read_flag(ALLOW_INF_AND_NAN)) { - if (read_nan(false, &cur, pre, flg, val)) goto arr_val_end; + if (has_allow(INF_AND_NAN)) { + if (read_nan(&cur, pre, flg, val)) goto arr_val_end; } goto fail_literal_null; } if (*cur == ']') { cur++; if (likely(ctn_len == 0)) goto arr_end; - if (has_read_flag(ALLOW_TRAILING_COMMAS)) goto arr_end; + if (has_allow(TRAILING_COMMAS)) goto arr_end; while (*cur != ',') cur--; goto fail_trailing_comma; } @@ -6471,16 +5876,22 @@ arr_val_begin: while (char_is_space(*++cur)); goto arr_val_begin; } - if (has_read_flag(ALLOW_INF_AND_NAN) && + if (has_allow(INF_AND_NAN) && (*cur == 'i' || *cur == 'I' || *cur == 'N')) { val_incr(); ctn_len++; - if (read_inf_or_nan(false, &cur, pre, flg, val)) goto arr_val_end; + if (read_inf_or_nan(&cur, pre, flg, val)) goto arr_val_end; goto fail_character_val; } - if (has_read_flag(ALLOW_COMMENTS)) { - if (skip_spaces_and_comments(&cur)) goto arr_val_begin; - if (byte_match_2(cur, "/*")) goto fail_comment; + if (has_allow(SINGLE_QUOTED_STR) && *cur == '\'') { + val_incr(); + ctn_len++; + if (likely(read_str_sq(&cur, eof, flg, val, &msg))) goto arr_val_end; + goto fail_string; + } + if (has_allow(TRIVIA) && char_is_trivia(*cur)) { + if (skip_trivia(&cur, eof, flg)) goto arr_val_begin; + if (cur == eof) goto fail_comment; } goto fail_character_val; @@ -6501,9 +5912,9 @@ arr_val_end: while (char_is_space(*++cur)); goto arr_val_end; } - if (has_read_flag(ALLOW_COMMENTS)) { - if (skip_spaces_and_comments(&cur)) goto arr_val_end; - if (byte_match_2(cur, "/*")) goto fail_comment; + if (has_allow(TRIVIA) && char_is_trivia(*cur)) { + if (skip_trivia(&cur, eof, flg)) goto arr_val_end; + if (cur == eof) goto fail_comment; } goto fail_character_arr_end; @@ -6553,13 +5964,13 @@ obj_key_begin: if (likely(*cur == '"')) { val_incr(); ctn_len++; - if (likely(read_str(&cur, end, inv, val, &msg, NULL))) goto obj_key_end; + if (likely(read_str(&cur, eof, flg, val, &msg))) goto obj_key_end; goto fail_string; } if (likely(*cur == '}')) { cur++; if (likely(ctn_len == 0)) goto obj_end; - if (has_read_flag(ALLOW_TRAILING_COMMAS)) goto obj_end; + if (has_allow(TRAILING_COMMAS)) goto obj_end; while (*cur != ',') cur--; goto fail_trailing_comma; } @@ -6567,9 +5978,21 @@ obj_key_begin: while (char_is_space(*++cur)); goto obj_key_begin; } - if (has_read_flag(ALLOW_COMMENTS)) { - if (skip_spaces_and_comments(&cur)) goto obj_key_begin; - if (byte_match_2(cur, "/*")) goto fail_comment; + if (has_allow(SINGLE_QUOTED_STR) && *cur == '\'') { + val_incr(); + ctn_len++; + if (likely(read_str_sq(&cur, eof, flg, val, &msg))) goto obj_key_end; + goto fail_string; + } + if (has_allow(UNQUOTED_KEY) && char_is_id_start(*cur)) { + val_incr(); + ctn_len++; + if (read_str_id(&cur, eof, flg, pre, val, &msg)) goto obj_key_end; + goto fail_string; + } + if (has_allow(TRIVIA) && char_is_trivia(*cur)) { + if (skip_trivia(&cur, eof, flg)) goto obj_key_begin; + if (cur == eof) goto fail_comment; } goto fail_character_obj_key; @@ -6586,9 +6009,9 @@ obj_key_end: while (char_is_space(*++cur)); goto obj_key_end; } - if (has_read_flag(ALLOW_COMMENTS)) { - if (skip_spaces_and_comments(&cur)) goto obj_key_end; - if (byte_match_2(cur, "/*")) goto fail_comment; + if (has_allow(TRIVIA) && char_is_trivia(*cur)) { + if (skip_trivia(&cur, eof, flg)) goto obj_key_end; + if (cur == eof) goto fail_comment; } goto fail_character_obj_sep; @@ -6596,7 +6019,7 @@ obj_val_begin: if (*cur == '"') { val++; ctn_len++; - if (likely(read_str(&cur, end, inv, val, &msg, NULL))) goto obj_val_end; + if (likely(read_str(&cur, eof, flg, val, &msg))) goto obj_val_end; goto fail_string; } if (char_is_num(*cur)) { @@ -6629,8 +6052,8 @@ obj_val_begin: val++; ctn_len++; if (likely(read_null(&cur, val))) goto obj_val_end; - if (has_read_flag(ALLOW_INF_AND_NAN)) { - if (read_nan(false, &cur, pre, flg, val)) goto obj_val_end; + if (has_allow(INF_AND_NAN)) { + if (read_nan(&cur, pre, flg, val)) goto obj_val_end; } goto fail_literal_null; } @@ -6638,16 +6061,22 @@ obj_val_begin: while (char_is_space(*++cur)); goto obj_val_begin; } - if (has_read_flag(ALLOW_INF_AND_NAN) && + if (has_allow(INF_AND_NAN) && (*cur == 'i' || *cur == 'I' || *cur == 'N')) { val++; ctn_len++; - if (read_inf_or_nan(false, &cur, pre, flg, val)) goto obj_val_end; + if (read_inf_or_nan(&cur, pre, flg, val)) goto obj_val_end; goto fail_character_val; } - if (has_read_flag(ALLOW_COMMENTS)) { - if (skip_spaces_and_comments(&cur)) goto obj_val_begin; - if (byte_match_2(cur, "/*")) goto fail_comment; + if (has_allow(SINGLE_QUOTED_STR) && *cur == '\'') { + val++; + ctn_len++; + if (likely(read_str_sq(&cur, eof, flg, val, &msg))) goto obj_val_end; + goto fail_string; + } + if (has_allow(TRIVIA) && char_is_trivia(*cur)) { + if (skip_trivia(&cur, eof, flg)) goto obj_val_begin; + if (cur == eof) goto fail_comment; } goto fail_character_val; @@ -6668,9 +6097,9 @@ obj_val_end: while (char_is_space(*++cur)); goto obj_val_end; } - if (has_read_flag(ALLOW_COMMENTS)) { - if (skip_spaces_and_comments(&cur)) goto obj_val_end; - if (byte_match_2(cur, "/*")) goto fail_comment; + if (has_allow(TRIVIA) && char_is_trivia(*cur)) { + if (skip_trivia(&cur, eof, flg)) goto obj_val_end; + if (cur == eof) goto fail_comment; } goto fail_character_obj_end; @@ -6692,30 +6121,30 @@ obj_end: doc_end: /* check invalid contents after json document */ - if (unlikely(cur < end) && !has_read_flag(STOP_WHEN_DONE)) { - if (has_read_flag(ALLOW_COMMENTS)) { - skip_spaces_and_comments(&cur); - if (byte_match_2(cur, "/*")) goto fail_comment; - } else { - while (char_is_space(*cur)) cur++; + if (unlikely(cur < eof) && !has_flg(STOP_WHEN_DONE)) { + while (char_is_space(*cur)) cur++; + if (has_allow(TRIVIA) && char_is_trivia(*cur)) { + if (!skip_trivia(&cur, eof, flg) && cur == eof) { + goto fail_comment; + } } - if (unlikely(cur < end)) goto fail_garbage; + if (unlikely(cur < eof)) goto fail_garbage; } - if (pre && *pre) **pre = '\0'; + **pre = '\0'; doc = (yyjson_doc *)val_hdr; doc->root = val_hdr + hdr_len; doc->alc = alc; doc->dat_read = (usize)(cur - hdr); doc->val_read = (usize)((val - doc->root) + 1); - doc->str_pool = has_read_flag(INSITU) ? NULL : (char *)hdr; + doc->str_pool = has_flg(INSITU) ? NULL : (char *)hdr; return doc; fail_string: return_err(cur, INVALID_STRING, msg); fail_number: return_err(cur, INVALID_NUMBER, msg); fail_alloc: return_err(cur, MEMORY_ALLOCATION, MSG_MALLOC); fail_trailing_comma: return_err(cur, JSON_STRUCTURE, MSG_COMMA); -fail_literal_true: return_err(cur, LITERAL, MSG_CHAT_T); +fail_literal_true: return_err(cur, LITERAL, MSG_CHAR_T); fail_literal_false: return_err(cur, LITERAL, MSG_CHAR_F); fail_literal_null: return_err(cur, LITERAL, MSG_CHAR_N); fail_character_val: return_err(cur, UNEXPECTED_CHARACTER, MSG_CHAR); @@ -6733,36 +6162,35 @@ fail_garbage: return_err(cur, UNEXPECTED_CONTENT, MSG_GARBAGE); /*============================================================================== - * JSON Reader Entrance + * MARK: - JSON Reader (Public) *============================================================================*/ yyjson_doc *yyjson_read_opts(char *dat, usize len, yyjson_read_flag flg, const yyjson_alc *alc_ptr, yyjson_read_err *err) { - #define return_err(_pos, _code, _msg) do { \ err->pos = (usize)(_pos); \ err->msg = _msg; \ err->code = YYJSON_READ_ERROR_##_code; \ - if (!has_read_flag(INSITU) && hdr) alc.free(alc.ctx, (void *)hdr); \ + if (!has_flg(INSITU) && hdr) alc.free(alc.ctx, (void *)hdr); \ return NULL; \ } while (false) - yyjson_read_err dummy_err; + yyjson_read_err tmp_err; yyjson_alc alc = alc_ptr ? *alc_ptr : YYJSON_DEFAULT_ALC; yyjson_doc *doc; - u8 *hdr = NULL, *end, *cur; + u8 *hdr = NULL, *eof, *cur; /* validate input parameters */ - if (!err) err = &dummy_err; + if (!err) err = &tmp_err; if (unlikely(!dat)) return_err(0, INVALID_PARAMETER, "input data is NULL"); if (unlikely(!len)) return_err(0, INVALID_PARAMETER, "input length is 0"); /* add 4-byte zero padding for input data if necessary */ - if (has_read_flag(INSITU)) { + if (has_flg(INSITU)) { hdr = (u8 *)dat; - end = (u8 *)dat + len; + eof = (u8 *)dat + len; cur = (u8 *)dat; } else { if (unlikely(len >= USIZE_MAX - YYJSON_PADDING_SIZE)) { @@ -6772,41 +6200,40 @@ yyjson_doc *yyjson_read_opts(char *dat, usize len, if (unlikely(!hdr)) { return_err(0, MEMORY_ALLOCATION, MSG_MALLOC); } - end = hdr + len; + eof = hdr + len; cur = hdr; memcpy(hdr, dat, len); - memset(end, 0, YYJSON_PADDING_SIZE); } + memset(eof, 0, YYJSON_PADDING_SIZE); - if (has_read_flag(ALLOW_BOM)) { + if (has_allow(BOM)) { if (len >= 3 && is_utf8_bom(cur)) cur += 3; } /* skip empty contents before json document */ - if (unlikely(char_is_space_or_comment(*cur))) { - if (has_read_flag(ALLOW_COMMENTS)) { - if (!skip_spaces_and_comments(&cur)) { - return_err(cur - hdr, INVALID_COMMENT, MSG_COMMENT); - } - } else { - if (likely(char_is_space(*cur))) { - while (char_is_space(*++cur)); + if (unlikely(!char_is_ctn(*cur))) { + while (char_is_space(*cur)) cur++; + if (unlikely(!char_is_ctn(*cur))) { + if (has_allow(TRIVIA) && char_is_trivia(*cur)) { + if (!skip_trivia(&cur, eof, flg) && cur == eof) { + return_err(cur - hdr, INVALID_COMMENT, MSG_COMMENT); + } } } - if (unlikely(cur >= end)) { + if (unlikely(cur >= eof)) { return_err(0, EMPTY_CONTENT, "input data is empty"); } } /* read json document */ - if (likely(char_is_container(*cur))) { + if (likely(char_is_ctn(*cur))) { if (char_is_space(cur[1]) && char_is_space(cur[2])) { - doc = read_root_pretty(hdr, cur, end, alc, flg, err); + doc = read_root_pretty(hdr, cur, eof, alc, flg, err); } else { - doc = read_root_minify(hdr, cur, end, alc, flg, err); + doc = read_root_minify(hdr, cur, eof, alc, flg, err); } } else { - doc = read_root_single(hdr, cur, end, alc, flg, err); + doc = read_root_single(hdr, cur, eof, alc, flg, err); } /* check result */ @@ -6819,673 +6246,13 @@ yyjson_doc *yyjson_read_opts(char *dat, usize len, else if (len >= 4 && is_utf32_bom(hdr)) err->msg = MSG_ERR_UTF32; else if (len >= 2 && is_utf16_bom(hdr)) err->msg = MSG_ERR_UTF16; } - if (!has_read_flag(INSITU)) alc.free(alc.ctx, (void *)hdr); + if (!has_flg(INSITU)) alc.free(alc.ctx, hdr); } return doc; #undef return_err } - - -#if !YYJSON_DISABLE_INCR_READER - -/* labels within yyjson_incr_read() to resume incremental parsing */ -#define YYJSON_READ_LABEL_doc_begin 0 -#define YYJSON_READ_LABEL_arr_val_begin 1 -#define YYJSON_READ_LABEL_arr_val_end 2 -#define YYJSON_READ_LABEL_obj_key_begin 3 -#define YYJSON_READ_LABEL_obj_key_end 4 -#define YYJSON_READ_LABEL_obj_val_begin 5 -#define YYJSON_READ_LABEL_obj_val_end 6 -#define YYJSON_READ_LABEL_doc_end 7 - -/** State for incremental JSON reader, opaque in the API. */ -struct yyjson_incr_state { - u32 label; /* current parser goto label */ - const yyjson_alc *alc; /* allocator */ - yyjson_read_flag flg; /* read flags */ - u8 *hdr; /* JSON data */ - u8 *cur; /* current position in JSON data */ - usize len; - usize hdr_len; /* value count used by yyjson_doc */ - usize alc_len; /* value count allocated */ - usize ctn_len; /* the number of elements in current container */ - yyjson_val *val_hdr; /* the head of allocated values */ - yyjson_val *val_end; /* the end of allocated values */ - yyjson_val *val; /* current JSON value */ - yyjson_val *ctn; /* current container */ - u8 *str_con[2]; /* string parser incremental state */ -}; - -yyjson_incr_state *yyjson_incr_new(char *buf, size_t buf_len, - yyjson_read_flag flg, - const yyjson_alc *alc) { - yyjson_incr_state *state = NULL; - if (unlikely(!buf)) goto error; - if (likely(!alc)) alc = &YYJSON_DEFAULT_ALC; - state = (yyjson_incr_state *)alc->malloc(alc->ctx, - sizeof(yyjson_incr_state)); - if (!state) goto error; - memset(state, 0, sizeof(yyjson_incr_state)); - state->alc = alc; - state->flg = flg; - state->len = buf_len; - - /* add 4-byte zero padding for input data if necessary */ - if (has_read_flag(INSITU)) { - state->hdr = (u8 *)buf; - state->cur = (u8 *)buf; - } else { - if (unlikely(buf_len >= USIZE_MAX - YYJSON_PADDING_SIZE)) goto error; - state->hdr = (u8 *)alc->malloc(alc->ctx, buf_len + YYJSON_PADDING_SIZE); - if (unlikely(!state->hdr)) goto error; - state->cur = state->hdr; - memcpy(state->hdr, buf, buf_len); - memset(state->hdr + buf_len, 0, YYJSON_PADDING_SIZE); - } - return state; - -error: - if (state) yyjson_incr_free(state); - return NULL; -} - -void yyjson_incr_free(yyjson_incr_state *state) { - const yyjson_alc *alc = state->alc; - if (state->val_hdr != NULL) { - alc->free(alc->ctx, (void *)state->val_hdr); - } - if (state->hdr != NULL && !(state->flg & YYJSON_READ_INSITU)) { - alc->free(alc->ctx, (void *)state->hdr); - } - alc->free(alc->ctx, (void *)state); -} - -yyjson_doc *yyjson_incr_read(yyjson_incr_state *state, size_t len, - yyjson_read_err *err) { - -#define return_err_inv_param(_msg) do { \ - err->pos = 0; \ - err->msg = _msg; \ - err->code = YYJSON_READ_ERROR_INVALID_PARAMETER; \ - return NULL; \ -} while (false) - -#define return_err(_pos, _code, _msg) do { \ - if (is_truncated_end(hdr, _pos, end, YYJSON_READ_ERROR_##_code, flg)) { \ - goto unexpected_end; \ - } else { \ - err->pos = (usize)(_pos - hdr); \ - err->code = YYJSON_READ_ERROR_##_code; \ - err->msg = _msg; \ - } \ - return NULL; \ -} while (false) - -#define val_incr() do { \ - val++; \ - if (unlikely(val >= val_end)) { \ - usize alc_old = alc_len; \ - alc_len += alc_len / 2; \ - if ((sizeof(usize) < 8) && (alc_len >= alc_max)) goto fail_alloc; \ - val_tmp = (yyjson_val *)alc.realloc(alc.ctx, (void *)val_hdr, \ - alc_old * sizeof(yyjson_val), \ - alc_len * sizeof(yyjson_val)); \ - if ((!val_tmp)) goto fail_alloc; \ - val = val_tmp + (usize)(val - val_hdr); \ - ctn = val_tmp + (usize)(ctn - val_hdr); \ - state->val = val_tmp + (usize)(state->val - val_hdr); \ - state->val_hdr = val_hdr = val_tmp; \ - val_end = val_tmp + (alc_len - 2); \ - state->val_end = val_end; \ - } \ -} while (false) - -#define save_incr_state(_label) do { \ - /* save position where it's possible to resume incremental parsing */ \ - state->label = YYJSON_READ_LABEL_##_label; \ - state->cur = cur; \ - state->val = val; \ - state->ctn_len = ctn_len; \ - state->hdr_len = hdr_len; \ - if (unlikely(cur >= end)) goto unexpected_end; \ -} while (false) - -#define check_maybe_truncated_number() do { \ - if (unlikely(cur >= end)) { \ - if (unlikely(cur > state->cur + INCR_NUM_MAX_LEN)) { \ - msg = "number too long"; \ - goto fail_number; \ - } \ - goto unexpected_end; \ - } \ -} while (false) - - u8 *hdr = NULL, *end = NULL, *cur = NULL; - yyjson_read_flag flg; - yyjson_alc alc; - usize dat_len; /* data length in bytes, hint for allocator */ - usize hdr_len; /* value count used by yyjson_doc */ - usize alc_len; /* value count allocated */ - usize alc_max; /* maximum value count for allocator */ - usize ctn_len; /* the number of elements in current container */ - yyjson_val *val_hdr; /* the head of allocated values */ - yyjson_val *val_end; /* the end of allocated values */ - yyjson_val *val_tmp; /* temporary pointer for realloc */ - yyjson_val *val; /* current JSON value */ - yyjson_val *ctn; /* current container */ - yyjson_val *ctn_parent; /* parent of current container */ - yyjson_doc *doc; /* the JSON document, equals to val_hdr */ - const char *msg; /* error message */ - - bool raw; /* read number as raw */ - bool inv; /* allow invalid unicode */ - u8 *raw_end; /* raw end for null-terminator */ - u8 **pre; /* previous raw end pointer */ - u8 **con = NULL; /* for incremental string parsing */ - u8 saved_end = '\0'; /* saved end char */ - - /* validate input parameters */ - if (unlikely(!err)) { - return NULL; - } - if (unlikely(!state)) { - return_err_inv_param("input state is NULL"); - } - if (unlikely(!len)) { - return_err_inv_param("input length is 0"); - } - if (unlikely(len > state->len)) { - return_err_inv_param("length is greater than total input length"); - } - - hdr = state->hdr; - end = state->hdr + len; - cur = state->cur; - flg = state->flg; - alc = *state->alc; - ctn_len = state->ctn_len; - hdr_len = state->hdr_len; - alc_len = state->alc_len; - val = state->val; - val_hdr = state->val_hdr; - val_end = state->val_end; - ctn = state->ctn; - con = state->str_con; - - alc_max = USIZE_MAX / sizeof(yyjson_val); - raw = has_read_flag(NUMBER_AS_RAW) || has_read_flag(BIGNUM_AS_RAW); - inv = has_read_flag(ALLOW_INVALID_UNICODE) != 0; - raw_end = NULL; - pre = raw ? &raw_end : NULL; - - /* insert null terminator to make us stop at the specified end, even if - the data contains more valid JSON */ - saved_end = *end; - *end = '\0'; - - /* resume parsing from the last save point */ - switch (state->label) { - case YYJSON_READ_LABEL_doc_begin: goto doc_begin; - case YYJSON_READ_LABEL_arr_val_begin: goto arr_val_begin; - case YYJSON_READ_LABEL_arr_val_end: goto arr_val_end; - case YYJSON_READ_LABEL_obj_key_begin: goto obj_key_begin; - case YYJSON_READ_LABEL_obj_key_end: goto obj_key_end; - case YYJSON_READ_LABEL_obj_val_begin: goto obj_val_begin; - case YYJSON_READ_LABEL_obj_val_end: goto obj_val_end; - case YYJSON_READ_LABEL_doc_end: goto doc_end; - default: return_err_inv_param("invalid incremental state"); - } - -doc_begin: - if (cur == hdr && has_read_flag(ALLOW_BOM)) { - if (len >= 3 && is_utf8_bom(cur)) cur += 3; - } - - /* skip empty contents before json document */ - if (unlikely(char_is_space_or_comment(*cur))) { - if (has_read_flag(ALLOW_COMMENTS)) { - if (!skip_spaces_and_comments(&cur)) { - /* unclosed multiline comment */ - goto unexpected_end; - } - } else { - if (likely(char_is_space(*cur))) { - while (char_is_space(*++cur)); - } - } - if (unlikely(cur >= end)) { - /* input data is empty */ - goto unexpected_end; - } - } - - /* allocate memory for document */ - if (!val_hdr) { - hdr_len = sizeof(yyjson_doc) / sizeof(yyjson_val); - hdr_len += (sizeof(yyjson_doc) % sizeof(yyjson_val)) > 0; - if (likely(char_is_container(*cur))) { - dat_len = has_read_flag(STOP_WHEN_DONE) ? 256 : state->len; - alc_len = hdr_len + - (dat_len / YYJSON_READER_ESTIMATED_MINIFY_RATIO) + 4; - alc_len = yyjson_min(alc_len, alc_max); - } else { - alc_len = hdr_len + 1; /* single value */ - } - val_hdr = (yyjson_val *)alc.malloc(alc.ctx, - alc_len * sizeof(yyjson_val)); - if (unlikely(!val_hdr)) goto fail_alloc; - val_end = val_hdr + (alc_len - 2); /* padding for kv pair reading */ - val = val_hdr + hdr_len; - ctn = val; - ctn_len = 0; - state->val_hdr = val_hdr; - state->val_end = val_end; - save_incr_state(doc_begin); - } - - /* read json document */ - if (*cur == '{') { - cur++; - ctn->tag = YYJSON_TYPE_OBJ; - ctn->uni.ofs = 0; - goto obj_key_begin; - } - if (*cur == '[') { - cur++; - ctn->tag = YYJSON_TYPE_ARR; - ctn->uni.ofs = 0; - goto arr_val_begin; - } - if (char_is_num(*cur)) { - if (likely(read_num(&cur, pre, flg, val, &msg))) goto doc_end; - goto fail_number; - } - if (*cur == '"') { - if (likely(read_str(&cur, end, inv, val, &msg, con))) goto doc_end; - goto fail_string; - } - if (*cur == 't') { - if (likely(read_true(&cur, val))) goto doc_end; - goto fail_literal_true; - } - if (*cur == 'f') { - if (likely(read_false(&cur, val))) goto doc_end; - goto fail_literal_false; - } - if (*cur == 'n') { - if (likely(read_null(&cur, val))) goto doc_end; - if (has_read_flag(ALLOW_INF_AND_NAN)) { - if (read_nan(false, &cur, pre, flg, val)) goto doc_end; - } - goto fail_literal_null; - } - if (has_read_flag(ALLOW_INF_AND_NAN)) { - if (read_inf_or_nan(false, &cur, pre, flg, val)) goto doc_end; - } - - msg = "unexpected character, expected a valid root value"; - if (cur == hdr) { - /* RFC 8259: JSON text MUST be encoded using UTF-8 */ - if (is_utf8_bom(hdr)) msg = MSG_ERR_BOM; - else if (len >= 4 && is_utf32_bom(hdr)) msg = MSG_ERR_UTF32; - else if (len >= 2 && is_utf16_bom(hdr)) msg = MSG_ERR_UTF16; - } - return_err(cur, UNEXPECTED_CHARACTER, msg); - -arr_begin: - /* save current container */ - ctn->tag = (((u64)ctn_len + 1) << YYJSON_TAG_BIT) | - (ctn->tag & YYJSON_TAG_MASK); - - /* create a new array value, save parent container offset */ - val_incr(); - val->tag = YYJSON_TYPE_ARR; - val->uni.ofs = (usize)((u8 *)val - (u8 *)ctn); - - /* push the new array value as current container */ - ctn = val; - ctn_len = 0; - -arr_val_begin: - save_incr_state(arr_val_begin); -arr_val_continue: - if (*cur == '{') { - cur++; - goto obj_begin; - } - if (*cur == '[') { - cur++; - goto arr_begin; - } - if (char_is_num(*cur)) { - val_incr(); - ctn_len++; - if (likely(read_num(&cur, pre, flg, val, &msg))) goto arr_val_maybe_end; - goto fail_number; - } - if (*cur == '"') { - val_incr(); - ctn_len++; - if (likely(read_str(&cur, end, inv, val, &msg, con))) goto arr_val_end; - goto fail_string; - } - if (*cur == 't') { - val_incr(); - ctn_len++; - if (likely(read_true(&cur, val))) goto arr_val_end; - goto fail_literal_true; - } - if (*cur == 'f') { - val_incr(); - ctn_len++; - if (likely(read_false(&cur, val))) goto arr_val_end; - goto fail_literal_false; - } - if (*cur == 'n') { - val_incr(); - ctn_len++; - if (likely(read_null(&cur, val))) goto arr_val_end; - if (has_read_flag(ALLOW_INF_AND_NAN)) { - if (read_nan(false, &cur, pre, flg, val)) goto arr_val_end; - } - goto fail_literal_null; - } - if (*cur == ']') { - cur++; - if (likely(ctn_len == 0)) goto arr_end; - if (has_read_flag(ALLOW_TRAILING_COMMAS)) goto arr_end; - while (*cur != ',') cur--; - goto fail_trailing_comma; - } - if (char_is_space(*cur)) { - while (char_is_space(*++cur)); - goto arr_val_continue; - } - if (has_read_flag(ALLOW_INF_AND_NAN) && - (*cur == 'i' || *cur == 'I' || *cur == 'N')) { - val_incr(); - ctn_len++; - if (read_inf_or_nan(false, &cur, pre, flg, val)) goto arr_val_maybe_end; - goto fail_character_val; - } - if (has_read_flag(ALLOW_COMMENTS)) { - if (skip_spaces_and_comments(&cur)) goto arr_val_continue; - if (byte_match_2(cur, "/*")) goto fail_comment; - } - goto fail_character_val; - -arr_val_maybe_end: - /* if incremental parsing stops in the middle of a number, it may continue - with more digits, so arr val maybe didn't end yet */ - check_maybe_truncated_number(); - -arr_val_end: - save_incr_state(arr_val_end); - if (*cur == ',') { - cur++; - goto arr_val_begin; - } - if (*cur == ']') { - cur++; - goto arr_end; - } - if (char_is_space(*cur)) { - while (char_is_space(*++cur)); - goto arr_val_end; - } - if (has_read_flag(ALLOW_COMMENTS)) { - if (skip_spaces_and_comments(&cur)) goto arr_val_end; - if (byte_match_2(cur, "/*")) goto fail_comment; - } - goto fail_character_arr_end; - -arr_end: - /* get parent container */ - ctn_parent = (yyjson_val *)(void *)((u8 *)ctn - ctn->uni.ofs); - - /* save the next sibling value offset */ - ctn->uni.ofs = (usize)((u8 *)val - (u8 *)ctn) + sizeof(yyjson_val); - ctn->tag = ((ctn_len) << YYJSON_TAG_BIT) | YYJSON_TYPE_ARR; - if (unlikely(ctn == ctn_parent)) goto doc_end; - - /* pop parent as current container */ - ctn = ctn_parent; - ctn_len = (usize)(ctn->tag >> YYJSON_TAG_BIT); - if ((ctn->tag & YYJSON_TYPE_MASK) == YYJSON_TYPE_OBJ) { - goto obj_val_end; - } else { - goto arr_val_end; - } - -obj_begin: - /* push container */ - ctn->tag = (((u64)ctn_len + 1) << YYJSON_TAG_BIT) | - (ctn->tag & YYJSON_TAG_MASK); - val_incr(); - val->tag = YYJSON_TYPE_OBJ; - /* offset to the parent */ - val->uni.ofs = (usize)((u8 *)val - (u8 *)ctn); - ctn = val; - ctn_len = 0; - -obj_key_begin: - save_incr_state(obj_key_begin); -obj_key_continue: - if (likely(*cur == '"')) { - val_incr(); - ctn_len++; - if (likely(read_str(&cur, end, inv, val, &msg, con))) goto obj_key_end; - goto fail_string; - } - if (likely(*cur == '}')) { - cur++; - if (likely(ctn_len == 0)) goto obj_end; - if (has_read_flag(ALLOW_TRAILING_COMMAS)) goto obj_end; - while (*cur != ',') cur--; - goto fail_trailing_comma; - } - if (char_is_space(*cur)) { - while (char_is_space(*++cur)); - goto obj_key_continue; - } - if (has_read_flag(ALLOW_COMMENTS)) { - if (skip_spaces_and_comments(&cur)) goto obj_key_continue; - if (byte_match_2(cur, "/*")) goto fail_comment; - } - goto fail_character_obj_key; - -obj_key_end: - save_incr_state(obj_key_end); - if (*cur == ':') { - cur++; - goto obj_val_begin; - } - if (char_is_space(*cur)) { - while (char_is_space(*++cur)); - goto obj_key_end; - } - if (has_read_flag(ALLOW_COMMENTS)) { - if (skip_spaces_and_comments(&cur)) goto obj_key_end; - if (byte_match_2(cur, "/*")) goto fail_comment; - } - goto fail_character_obj_sep; - -obj_val_begin: - save_incr_state(obj_val_begin); -obj_val_continue: - if (*cur == '"') { - val++; - ctn_len++; - if (likely(read_str(&cur, end, inv, val, &msg, con))) goto obj_val_end; - goto fail_string; - } - if (char_is_num(*cur)) { - val++; - ctn_len++; - if (likely(read_num(&cur, pre, flg, val, &msg))) goto obj_val_maybe_end; - goto fail_number; - } - if (*cur == '{') { - cur++; - goto obj_begin; - } - if (*cur == '[') { - cur++; - goto arr_begin; - } - if (*cur == 't') { - val++; - ctn_len++; - if (likely(read_true(&cur, val))) goto obj_val_end; - goto fail_literal_true; - } - if (*cur == 'f') { - val++; - ctn_len++; - if (likely(read_false(&cur, val))) goto obj_val_end; - goto fail_literal_false; - } - if (*cur == 'n') { - val++; - ctn_len++; - if (likely(read_null(&cur, val))) goto obj_val_end; - if (has_read_flag(ALLOW_INF_AND_NAN)) { - if (read_nan(false, &cur, pre, flg, val)) goto obj_val_end; - } - goto fail_literal_null; - } - if (char_is_space(*cur)) { - while (char_is_space(*++cur)); - goto obj_val_continue; - } - if (has_read_flag(ALLOW_INF_AND_NAN) && - (*cur == 'i' || *cur == 'I' || *cur == 'N')) { - val++; - ctn_len++; - if (read_inf_or_nan(false, &cur, pre, flg, val)) goto obj_val_maybe_end; - goto fail_character_val; - } - if (has_read_flag(ALLOW_COMMENTS)) { - if (skip_spaces_and_comments(&cur)) goto obj_val_continue; - if (byte_match_2(cur, "/*")) goto fail_comment; - } - goto fail_character_val; - -obj_val_maybe_end: - /* if incremental parsing stops in the middle of a number, it may continue - with more digits, so obj val maybe didn't end yet */ - check_maybe_truncated_number(); - -obj_val_end: - save_incr_state(obj_val_end); - if (likely(*cur == ',')) { - cur++; - goto obj_key_begin; - } - if (likely(*cur == '}')) { - cur++; - goto obj_end; - } - if (char_is_space(*cur)) { - while (char_is_space(*++cur)); - goto obj_val_end; - } - if (has_read_flag(ALLOW_COMMENTS)) { - if (skip_spaces_and_comments(&cur)) goto obj_val_end; - if (byte_match_2(cur, "/*")) goto fail_comment; - } - goto fail_character_obj_end; - -obj_end: - /* pop container */ - ctn_parent = (yyjson_val *)(void *)((u8 *)ctn - ctn->uni.ofs); - /* point to the next value */ - ctn->uni.ofs = (usize)((u8 *)val - (u8 *)ctn) + sizeof(yyjson_val); - ctn->tag = (ctn_len << (YYJSON_TAG_BIT - 1)) | YYJSON_TYPE_OBJ; - if (unlikely(ctn == ctn_parent)) goto doc_end; - ctn = ctn_parent; - ctn_len = (usize)(ctn->tag >> YYJSON_TAG_BIT); - if ((ctn->tag & YYJSON_TYPE_MASK) == YYJSON_TYPE_OBJ) { - goto obj_val_end; - } else { - goto arr_val_end; - } - -doc_end: - /* check invalid contents after json document */ - if (unlikely(cur < end) && !has_read_flag(STOP_WHEN_DONE)) { - save_incr_state(doc_end); - if (has_read_flag(ALLOW_COMMENTS)) { - skip_spaces_and_comments(&cur); - if (byte_match_2(cur, "/*")) goto fail_comment; - if (*cur == '/' && cur + 1 == end) { - /* truncated beginning of comment */ - goto unexpected_end; - } - } else { - while (char_is_space(*cur)) cur++; - } - if (unlikely(cur < end)) goto fail_garbage; - } - - if (pre && *pre) **pre = '\0'; - doc = (yyjson_doc *)val_hdr; - doc->root = val_hdr + hdr_len; - doc->alc = alc; - doc->dat_read = (usize)(cur - hdr); - doc->val_read = (usize)((val - doc->root) + 1); - doc->str_pool = has_read_flag(INSITU) ? NULL : (char *)hdr; - state->hdr = NULL; - state->val_hdr = NULL; - memset(err, 0, sizeof(yyjson_read_err)); - return doc; - -unexpected_end: - err->pos = len; - if (unlikely(len >= state->len)) { - err->code = YYJSON_READ_ERROR_UNEXPECTED_END; - err->msg = MSG_NOT_END; - return NULL; - } - /* save parser state in extended error struct, in addition to what was - * stored in the last save_incr_state */ - err->code = YYJSON_READ_ERROR_MORE; - err->msg = "need more data"; - state->val_end = val_end; - state->ctn = ctn; - state->alc_len = alc_len; - /* restore the end where we've inserted a null terminator */ - *end = saved_end; - return NULL; - -fail_string: return_err(cur, INVALID_STRING, msg); -fail_number: return_err(cur, INVALID_NUMBER, msg); -fail_alloc: return_err(cur, MEMORY_ALLOCATION, MSG_MALLOC); -fail_trailing_comma: return_err(cur, JSON_STRUCTURE, MSG_COMMA); -fail_literal_true: return_err(cur, LITERAL, MSG_CHAT_T); -fail_literal_false: return_err(cur, LITERAL, MSG_CHAR_F); -fail_literal_null: return_err(cur, LITERAL, MSG_CHAR_N); -fail_character_val: return_err(cur, UNEXPECTED_CHARACTER, MSG_CHAR); -fail_character_arr_end: return_err(cur, UNEXPECTED_CHARACTER, MSG_ARR_END); -fail_character_obj_key: return_err(cur, UNEXPECTED_CHARACTER, MSG_OBJ_KEY); -fail_character_obj_sep: return_err(cur, UNEXPECTED_CHARACTER, MSG_OBJ_SEP); -fail_character_obj_end: return_err(cur, UNEXPECTED_CHARACTER, MSG_OBJ_END); -fail_comment: return_err(cur, INVALID_COMMENT, MSG_COMMENT); -fail_garbage: return_err(cur, UNEXPECTED_CONTENT, MSG_GARBAGE); - -#undef val_incr -#undef return_err -#undef return_err_inv_param -#undef save_incr_state -#undef check_maybe_truncated_number -} - -#endif /* YYJSON_DISABLE_INCR_READER */ - - - yyjson_doc *yyjson_read_file(const char *path, yyjson_read_flag flg, const yyjson_alc *alc_ptr, @@ -7497,11 +6264,11 @@ yyjson_doc *yyjson_read_file(const char *path, return NULL; \ } while (false) - yyjson_read_err dummy_err; + yyjson_read_err tmp_err; yyjson_doc *doc; FILE *file; - if (!err) err = &dummy_err; + if (!err) err = &tmp_err; if (unlikely(!path)) return_err(INVALID_PARAMETER, "input path is NULL"); file = fopen_readonly(path); @@ -7526,7 +6293,7 @@ yyjson_doc *yyjson_read_fp(FILE *file, return NULL; \ } while (false) - yyjson_read_err dummy_err; + yyjson_read_err tmp_err; yyjson_alc alc = alc_ptr ? *alc_ptr : YYJSON_DEFAULT_ALC; yyjson_doc *doc; @@ -7535,7 +6302,7 @@ yyjson_doc *yyjson_read_fp(FILE *file, usize buf_size = 0; /* validate input parameters */ - if (!err) err = &dummy_err; + if (!err) err = &tmp_err; if (unlikely(!file)) return_err(INVALID_PARAMETER, "input file is NULL"); /* get current position */ @@ -7620,18 +6387,18 @@ const char *yyjson_read_number(const char *dat, } while (false) u8 *hdr = constcast(u8 *)dat, *cur = hdr; - bool raw; /* read number as raw */ - u8 *raw_end; /* raw end for null-terminator */ - u8 **pre; /* previous raw end pointer */ + u8 raw_end[1]; /* raw end for null-terminator */ + u8 *raw_ptr = raw_end; + u8 **pre = &raw_ptr; /* previous raw end pointer */ const char *msg; - yyjson_read_err dummy_err; + yyjson_read_err tmp_err; -#if !YYJSON_HAS_IEEE_754 || YYJSON_DISABLE_FAST_FP_CONV +#if YYJSON_DISABLE_FAST_FP_CONV u8 buf[128]; usize dat_len; #endif - if (!err) err = &dummy_err; + if (!err) err = &tmp_err; if (unlikely(!dat)) { return_err(cur, INVALID_PARAMETER, "input data is NULL"); } @@ -7639,7 +6406,7 @@ const char *yyjson_read_number(const char *dat, return_err(cur, INVALID_PARAMETER, "output value is NULL"); } -#if !YYJSON_HAS_IEEE_754 || YYJSON_DISABLE_FAST_FP_CONV +#if YYJSON_DISABLE_FAST_FP_CONV if (!alc) alc = &YYJSON_DEFAULT_ALC; dat_len = strlen(dat); if (dat_len < sizeof(buf)) { @@ -7657,11 +6424,7 @@ const char *yyjson_read_number(const char *dat, hdr[dat_len] = 0; #endif - raw = (flg & (YYJSON_READ_NUMBER_AS_RAW | YYJSON_READ_BIGNUM_AS_RAW)) != 0; - raw_end = NULL; - pre = raw ? &raw_end : NULL; - -#if !YYJSON_HAS_IEEE_754 || YYJSON_DISABLE_FAST_FP_CONV +#if YYJSON_DISABLE_FAST_FP_CONV if (!read_num(&cur, pre, flg, val, &msg)) { if (dat_len >= sizeof(buf)) alc->free(alc->ctx, hdr); return_err(cur, INVALID_NUMBER, msg); @@ -7679,14 +6442,615 @@ const char *yyjson_read_number(const char *dat, #undef return_err } + + +/*============================================================================== + * MARK: - Incremental JSON Reader (Public) + *============================================================================*/ + +#if !YYJSON_DISABLE_INCR_READER + +/* labels within yyjson_incr_read() to resume incremental parsing */ +#define LABEL_doc_begin 0 +#define LABEL_arr_val_begin 1 +#define LABEL_arr_val_end 2 +#define LABEL_obj_key_begin 3 +#define LABEL_obj_key_end 4 +#define LABEL_obj_val_begin 5 +#define LABEL_obj_val_end 6 +#define LABEL_doc_end 7 + +/** State for incremental JSON reader, opaque in the API. */ +struct yyjson_incr_state { + u32 label; /* current parser goto label */ + yyjson_alc alc; /* allocator */ + yyjson_read_flag flg; /* read flags */ + u8 *hdr; /* JSON data header */ + u8 *cur; /* current position in JSON data */ + usize buf_len; /* total buffer length (without padding) */ + usize hdr_len; /* value count used by yyjson_doc */ + usize alc_len; /* value count allocated */ + usize ctn_len; /* the number of elements in current container */ + yyjson_val *val_hdr; /* the head of allocated values */ + yyjson_val *val_end; /* the end of allocated values */ + yyjson_val *val; /* current JSON value */ + yyjson_val *ctn; /* current container */ + u8 *str_con[2]; /* string parser incremental state */ +}; + +yyjson_incr_state *yyjson_incr_new(char *buf, size_t buf_len, + yyjson_read_flag flg, + const yyjson_alc *alc_ptr) { + yyjson_incr_state *state = NULL; + yyjson_alc alc = alc_ptr ? *alc_ptr : YYJSON_DEFAULT_ALC; + + /* remove non-standard flags */ + flg &= ~YYJSON_READ_JSON5; + flg &= ~YYJSON_READ_ALLOW_BOM; + flg &= ~YYJSON_READ_ALLOW_INVALID_UNICODE; + + if (unlikely(!buf)) return NULL; + if (unlikely(buf_len >= USIZE_MAX - YYJSON_PADDING_SIZE)) return NULL; + state = (yyjson_incr_state *)alc.malloc(alc.ctx, sizeof(*state)); + if (!state) return NULL; + memset(state, 0, sizeof(yyjson_incr_state)); + state->alc = alc; + state->flg = flg; + state->buf_len = buf_len; + + /* add 4-byte zero padding for input data if necessary */ + if (has_flg(INSITU)) { + state->hdr = (u8 *)buf; + } else { + state->hdr = (u8 *)alc.malloc(alc.ctx, buf_len + YYJSON_PADDING_SIZE); + if (unlikely(!state->hdr)) { + alc.free(alc.ctx, state); + return NULL; + } + memcpy(state->hdr, buf, buf_len); + } + memset(state->hdr + buf_len, 0, YYJSON_PADDING_SIZE); + state->cur = state->hdr; + state->label = LABEL_doc_begin; + return state; +} + +void yyjson_incr_free(yyjson_incr_state *state) { + if (state) { + yyjson_alc alc = state->alc; + memset(&state->alc, 0, sizeof(alc)); + if (state->val_hdr) { + alc.free(alc.ctx, (void *)state->val_hdr); + } + if (state->hdr && !(state->flg & YYJSON_READ_INSITU)) { + alc.free(alc.ctx, state->hdr); + } + alc.free(alc.ctx, state); + } +} + +yyjson_doc *yyjson_incr_read(yyjson_incr_state *state, size_t len, + yyjson_read_err *err) { +#define return_err_inv_param(_msg) do { \ + err->pos = 0; \ + err->msg = _msg; \ + err->code = YYJSON_READ_ERROR_INVALID_PARAMETER; \ + return NULL; \ +} while (false) + +#define return_err(_pos, _code, _msg) do { \ + if (is_truncated_end(hdr, _pos, end, YYJSON_READ_ERROR_##_code, flg)) { \ + goto unexpected_end; \ + } else { \ + err->pos = (usize)(_pos - hdr); \ + err->code = YYJSON_READ_ERROR_##_code; \ + err->msg = _msg; \ + } \ + return NULL; \ +} while (false) + +#define val_incr() do { \ + val++; \ + if (unlikely(val >= val_end)) { \ + usize alc_old = alc_len; \ + alc_len += alc_len / 2; \ + if ((sizeof(usize) < 8) && (alc_len >= alc_max)) goto fail_alloc; \ + val_tmp = (yyjson_val *)alc.realloc(alc.ctx, (void *)val_hdr, \ + alc_old * sizeof(yyjson_val), \ + alc_len * sizeof(yyjson_val)); \ + if ((!val_tmp)) goto fail_alloc; \ + val = val_tmp + (usize)(val - val_hdr); \ + ctn = val_tmp + (usize)(ctn - val_hdr); \ + state->val = val_tmp + (usize)(state->val - val_hdr); \ + state->val_hdr = val_hdr = val_tmp; \ + val_end = val_tmp + (alc_len - 2); \ + state->val_end = val_end; \ + } \ +} while (false) + + /* save position where it's possible to resume incremental parsing */ +#define save_incr_state(_label) do { \ + state->label = LABEL_##_label; \ + state->cur = cur; \ + state->val = val; \ + state->ctn_len = ctn_len; \ + state->hdr_len = hdr_len; \ + if (unlikely(cur >= end)) goto unexpected_end; \ +} while (false) + +#define check_maybe_truncated_number() do { \ + if (unlikely(cur >= end)) { \ + if (unlikely(cur > state->cur + INCR_NUM_MAX_LEN)) { \ + msg = "number too long"; \ + goto fail_number; \ + } \ + goto unexpected_end; \ + } \ +} while (false) + + u8 *hdr = NULL, *end = NULL, *cur = NULL; + yyjson_read_flag flg; + yyjson_alc alc; + usize dat_len; /* data length in bytes, hint for allocator */ + usize hdr_len; /* value count used by yyjson_doc */ + usize alc_len; /* value count allocated */ + usize alc_max; /* maximum value count for allocator */ + usize ctn_len; /* the number of elements in current container */ + yyjson_val *val_hdr; /* the head of allocated values */ + yyjson_val *val_end; /* the end of allocated values */ + yyjson_val *val_tmp; /* temporary pointer for realloc */ + yyjson_val *val; /* current JSON value */ + yyjson_val *ctn; /* current container */ + yyjson_val *ctn_parent; /* parent of current container */ + yyjson_doc *doc; /* the JSON document, equals to val_hdr */ + const char *msg; /* error message */ + + yyjson_read_err tmp_err; + u8 raw_end[1]; /* raw end for null-terminator */ + u8 *raw_ptr = raw_end; + u8 **pre = &raw_ptr; /* previous raw end pointer */ + u8 **con = NULL; /* for incremental string parsing */ + u8 saved_end = '\0'; /* saved end char */ + + /* validate input parameters */ + if (!err) err = &tmp_err; + if (unlikely(!state)) { + return_err_inv_param("input state is NULL"); + } + if (unlikely(!len)) { + return_err_inv_param("input length is 0"); + } + if (unlikely(len > state->buf_len)) { + return_err_inv_param("length is greater than total input length"); + } + + /* restore state saved from the previous call */ + hdr = state->hdr; + end = state->hdr + len; + cur = state->cur; + flg = state->flg; + alc = state->alc; + ctn_len = state->ctn_len; + hdr_len = state->hdr_len; + alc_len = state->alc_len; + val = state->val; + val_hdr = state->val_hdr; + val_end = state->val_end; + ctn = state->ctn; + con = state->str_con; + alc_max = USIZE_MAX / sizeof(yyjson_val); + + /* insert null terminator to make us stop at the specified end, even if + the data contains more valid JSON */ + saved_end = *end; + *end = '\0'; + + /* resume parsing from the last save point */ + switch (state->label) { + case LABEL_doc_begin: goto doc_begin; + case LABEL_arr_val_begin: goto arr_val_begin; + case LABEL_arr_val_end: goto arr_val_end; + case LABEL_obj_key_begin: goto obj_key_begin; + case LABEL_obj_key_end: goto obj_key_end; + case LABEL_obj_val_begin: goto obj_val_begin; + case LABEL_obj_val_end: goto obj_val_end; + case LABEL_doc_end: goto doc_end; + default: return_err_inv_param("invalid incremental state"); + } + +doc_begin: + /* skip empty contents before json document */ + if (unlikely(!char_is_ctn(*cur))) { + while (char_is_space(*cur)) cur++; + if (unlikely(cur >= end)) goto unexpected_end; /* input data is empty */ + } + + /* allocate memory for document */ + if (!val_hdr) { + hdr_len = sizeof(yyjson_doc) / sizeof(yyjson_val); + hdr_len += (sizeof(yyjson_doc) % sizeof(yyjson_val)) > 0; + if (likely(char_is_ctn(*cur))) { + dat_len = has_flg(STOP_WHEN_DONE) ? 256 : state->buf_len; + alc_len = hdr_len + + (dat_len / YYJSON_READER_ESTIMATED_MINIFY_RATIO) + 4; + alc_len = yyjson_min(alc_len, alc_max); + } else { + alc_len = hdr_len + 1; /* single value */ + } + val_hdr = (yyjson_val *)alc.malloc(alc.ctx, + alc_len * sizeof(yyjson_val)); + if (unlikely(!val_hdr)) goto fail_alloc; + val_end = val_hdr + (alc_len - 2); /* padding for kv pair reading */ + val = val_hdr + hdr_len; + ctn = val; + ctn_len = 0; + state->val_hdr = val_hdr; + state->val_end = val_end; + save_incr_state(doc_begin); + } + + /* read json document */ + if (*cur == '{') { + cur++; + ctn->tag = YYJSON_TYPE_OBJ; + ctn->uni.ofs = 0; + goto obj_key_begin; + } + if (*cur == '[') { + cur++; + ctn->tag = YYJSON_TYPE_ARR; + ctn->uni.ofs = 0; + goto arr_val_begin; + } + if (char_is_num(*cur)) { + if (likely(read_num(&cur, pre, flg, val, &msg))) goto doc_end; + goto fail_number; + } + if (*cur == '"') { + if (likely(read_str_con(&cur, end, flg, val, &msg, con))) goto doc_end; + goto fail_string; + } + if (*cur == 't') { + if (likely(read_true(&cur, val))) goto doc_end; + goto fail_literal_true; + } + if (*cur == 'f') { + if (likely(read_false(&cur, val))) goto doc_end; + goto fail_literal_false; + } + if (*cur == 'n') { + if (likely(read_null(&cur, val))) goto doc_end; + goto fail_literal_null; + } + + msg = "unexpected character, expected a valid root value"; + if (cur == hdr) { + /* RFC 8259: JSON text MUST be encoded using UTF-8 */ + if (is_utf8_bom(hdr)) msg = MSG_ERR_BOM; + else if (len >= 4 && is_utf32_bom(hdr)) msg = MSG_ERR_UTF32; + else if (len >= 2 && is_utf16_bom(hdr)) msg = MSG_ERR_UTF16; + } + return_err(cur, UNEXPECTED_CHARACTER, msg); + +arr_begin: + /* save current container */ + ctn->tag = (((u64)ctn_len + 1) << YYJSON_TAG_BIT) | + (ctn->tag & YYJSON_TAG_MASK); + + /* create a new array value, save parent container offset */ + val_incr(); + val->tag = YYJSON_TYPE_ARR; + val->uni.ofs = (usize)((u8 *)val - (u8 *)ctn); + + /* push the new array value as current container */ + ctn = val; + ctn_len = 0; + +arr_val_begin: + save_incr_state(arr_val_begin); +arr_val_continue: + if (*cur == '{') { + cur++; + goto obj_begin; + } + if (*cur == '[') { + cur++; + goto arr_begin; + } + if (char_is_num(*cur)) { + val_incr(); + ctn_len++; + if (likely(read_num(&cur, pre, flg, val, &msg))) goto arr_val_maybe_end; + goto fail_number; + } + if (*cur == '"') { + val_incr(); + ctn_len++; + if (likely(read_str_con(&cur, end, flg, val, &msg, con))) + goto arr_val_end; + goto fail_string; + } + if (*cur == 't') { + val_incr(); + ctn_len++; + if (likely(read_true(&cur, val))) goto arr_val_end; + goto fail_literal_true; + } + if (*cur == 'f') { + val_incr(); + ctn_len++; + if (likely(read_false(&cur, val))) goto arr_val_end; + goto fail_literal_false; + } + if (*cur == 'n') { + val_incr(); + ctn_len++; + if (likely(read_null(&cur, val))) goto arr_val_end; + goto fail_literal_null; + } + if (*cur == ']') { + cur++; + if (likely(ctn_len == 0)) goto arr_end; + while (*cur != ',') cur--; + goto fail_trailing_comma; + } + if (char_is_space(*cur)) { + while (char_is_space(*++cur)); + goto arr_val_continue; + } + goto fail_character_val; + +arr_val_maybe_end: + /* if incremental parsing stops in the middle of a number, it may continue + with more digits, so arr val maybe didn't end yet */ + check_maybe_truncated_number(); + +arr_val_end: + save_incr_state(arr_val_end); + if (*cur == ',') { + cur++; + goto arr_val_begin; + } + if (*cur == ']') { + cur++; + goto arr_end; + } + if (char_is_space(*cur)) { + while (char_is_space(*++cur)); + goto arr_val_end; + } + goto fail_character_arr_end; + +arr_end: + /* get parent container */ + ctn_parent = (yyjson_val *)(void *)((u8 *)ctn - ctn->uni.ofs); + + /* save the next sibling value offset */ + ctn->uni.ofs = (usize)((u8 *)val - (u8 *)ctn) + sizeof(yyjson_val); + ctn->tag = ((ctn_len) << YYJSON_TAG_BIT) | YYJSON_TYPE_ARR; + if (unlikely(ctn == ctn_parent)) goto doc_end; + + /* pop parent as current container */ + ctn = ctn_parent; + ctn_len = (usize)(ctn->tag >> YYJSON_TAG_BIT); + if ((ctn->tag & YYJSON_TYPE_MASK) == YYJSON_TYPE_OBJ) { + goto obj_val_end; + } else { + goto arr_val_end; + } + +obj_begin: + /* push container */ + ctn->tag = (((u64)ctn_len + 1) << YYJSON_TAG_BIT) | + (ctn->tag & YYJSON_TAG_MASK); + val_incr(); + val->tag = YYJSON_TYPE_OBJ; + /* offset to the parent */ + val->uni.ofs = (usize)((u8 *)val - (u8 *)ctn); + ctn = val; + ctn_len = 0; + +obj_key_begin: + save_incr_state(obj_key_begin); +obj_key_continue: + if (likely(*cur == '"')) { + val_incr(); + ctn_len++; + if (likely(read_str_con(&cur, end, flg, val, &msg, con))) + goto obj_key_end; + goto fail_string; + } + if (likely(*cur == '}')) { + cur++; + if (likely(ctn_len == 0)) goto obj_end; + while (*cur != ',') cur--; + goto fail_trailing_comma; + } + if (char_is_space(*cur)) { + while (char_is_space(*++cur)); + goto obj_key_continue; + } + goto fail_character_obj_key; + +obj_key_end: + save_incr_state(obj_key_end); + if (*cur == ':') { + cur++; + goto obj_val_begin; + } + if (char_is_space(*cur)) { + while (char_is_space(*++cur)); + goto obj_key_end; + } + goto fail_character_obj_sep; + +obj_val_begin: + save_incr_state(obj_val_begin); +obj_val_continue: + if (*cur == '"') { + val++; + ctn_len++; + if (likely(read_str_con(&cur, end, flg, val, &msg, con))) + goto obj_val_end; + goto fail_string; + } + if (char_is_num(*cur)) { + val++; + ctn_len++; + if (likely(read_num(&cur, pre, flg, val, &msg))) goto obj_val_maybe_end; + goto fail_number; + } + if (*cur == '{') { + cur++; + goto obj_begin; + } + if (*cur == '[') { + cur++; + goto arr_begin; + } + if (*cur == 't') { + val++; + ctn_len++; + if (likely(read_true(&cur, val))) goto obj_val_end; + goto fail_literal_true; + } + if (*cur == 'f') { + val++; + ctn_len++; + if (likely(read_false(&cur, val))) goto obj_val_end; + goto fail_literal_false; + } + if (*cur == 'n') { + val++; + ctn_len++; + if (likely(read_null(&cur, val))) goto obj_val_end; + goto fail_literal_null; + } + if (char_is_space(*cur)) { + while (char_is_space(*++cur)); + goto obj_val_continue; + } + goto fail_character_val; + +obj_val_maybe_end: + /* if incremental parsing stops in the middle of a number, it may continue + with more digits, so obj val maybe didn't end yet */ + check_maybe_truncated_number(); + +obj_val_end: + save_incr_state(obj_val_end); + if (likely(*cur == ',')) { + cur++; + goto obj_key_begin; + } + if (likely(*cur == '}')) { + cur++; + goto obj_end; + } + if (char_is_space(*cur)) { + while (char_is_space(*++cur)); + goto obj_val_end; + } + goto fail_character_obj_end; + +obj_end: + /* pop container */ + ctn_parent = (yyjson_val *)(void *)((u8 *)ctn - ctn->uni.ofs); + /* point to the next value */ + ctn->uni.ofs = (usize)((u8 *)val - (u8 *)ctn) + sizeof(yyjson_val); + ctn->tag = (ctn_len << (YYJSON_TAG_BIT - 1)) | YYJSON_TYPE_OBJ; + if (unlikely(ctn == ctn_parent)) goto doc_end; + ctn = ctn_parent; + ctn_len = (usize)(ctn->tag >> YYJSON_TAG_BIT); + if ((ctn->tag & YYJSON_TYPE_MASK) == YYJSON_TYPE_OBJ) { + goto obj_val_end; + } else { + goto arr_val_end; + } + +doc_end: + /* check invalid contents after json document */ + if (unlikely(cur < end) && !has_flg(STOP_WHEN_DONE)) { + save_incr_state(doc_end); + while (char_is_space(*cur)) cur++; + if (unlikely(cur < end)) goto fail_garbage; + } + + **pre = '\0'; + doc = (yyjson_doc *)val_hdr; + doc->root = val_hdr + hdr_len; + doc->alc = alc; + doc->dat_read = (usize)(cur - hdr); + doc->val_read = (usize)((val - doc->root) + 1); + doc->str_pool = has_flg(INSITU) ? NULL : (char *)hdr; + state->hdr = NULL; + state->val_hdr = NULL; + memset(err, 0, sizeof(yyjson_read_err)); + return doc; + +unexpected_end: + err->pos = len; + /* if no nore data, stop the incr read */ + if (unlikely(len >= state->buf_len)) { + err->code = YYJSON_READ_ERROR_UNEXPECTED_END; + err->msg = MSG_NOT_END; + return NULL; + } + /* save parser state in extended error struct, in addition to what was + * stored in the last save_incr_state */ + err->code = YYJSON_READ_ERROR_MORE; + err->msg = "need more data"; + state->val_end = val_end; + state->ctn = ctn; + state->alc_len = alc_len; + /* restore the end where we've inserted a null terminator */ + *end = saved_end; + return NULL; + +fail_string: return_err(cur, INVALID_STRING, msg); +fail_number: return_err(cur, INVALID_NUMBER, msg); +fail_alloc: return_err(cur, MEMORY_ALLOCATION, MSG_MALLOC); +fail_trailing_comma: return_err(cur, JSON_STRUCTURE, MSG_COMMA); +fail_literal_true: return_err(cur, LITERAL, MSG_CHAR_T); +fail_literal_false: return_err(cur, LITERAL, MSG_CHAR_F); +fail_literal_null: return_err(cur, LITERAL, MSG_CHAR_N); +fail_character_val: return_err(cur, UNEXPECTED_CHARACTER, MSG_CHAR); +fail_character_arr_end: return_err(cur, UNEXPECTED_CHARACTER, MSG_ARR_END); +fail_character_obj_key: return_err(cur, UNEXPECTED_CHARACTER, MSG_OBJ_KEY); +fail_character_obj_sep: return_err(cur, UNEXPECTED_CHARACTER, MSG_OBJ_SEP); +fail_character_obj_end: return_err(cur, UNEXPECTED_CHARACTER, MSG_OBJ_END); +fail_garbage: return_err(cur, UNEXPECTED_CONTENT, MSG_GARBAGE); + +#undef val_incr +#undef return_err +#undef return_err_inv_param +#undef save_incr_state +#undef check_maybe_truncated_number +} + +#endif /* YYJSON_DISABLE_INCR_READER */ + +#undef has_flg +#undef has_allow #endif /* YYJSON_DISABLE_READER */ -#if !YYJSON_DISABLE_WRITER +#if !YYJSON_DISABLE_WRITER /* writer begin */ + +/* Check write flag, avoids `always false` warning when disabled. */ +#define has_flg(_flg) unlikely(has_wflag(flg, YYJSON_WRITE_##_flg, 0)) +#define has_allow(_flg) unlikely(has_wflag(flg, YYJSON_WRITE_ALLOW_##_flg, 1)) +static_inline bool has_wflag(yyjson_write_flag flg, yyjson_write_flag chk, + bool non_standard) { +#if YYJSON_DISABLE_NON_STANDARD + if (non_standard) return false; +#endif + return (flg & chk) != 0; +} /*============================================================================== - * Integer Writer + * MARK: - Integer Writer (Private) * * The maximum value of uint32_t is 4294967295 (10 digits), * these digits are named as 'aabbccddee' here. @@ -7860,10 +7224,10 @@ static_inline u8 *write_u64(u64 val, u8 *buf) { /*============================================================================== - * Number Writer + * MARK: - Number Writer (Private) *============================================================================*/ -#if YYJSON_HAS_IEEE_754 && !YYJSON_DISABLE_FAST_FP_CONV /* FP_WRITER */ +#if !YYJSON_DISABLE_FAST_FP_CONV /* FP_WRITER */ /** Trailing zero count table for number 0 to 99. (generate with misc/make_tables.c) */ @@ -7880,15 +7244,6 @@ static const u8 dec_trailing_zero_table[] = { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; -static_inline u8 *write_u32_len_1_to_9(u32 val, u8 *buf) { - if (val >= 100000000) { - u32 hi = val / 10000000; - val = val - hi * 10000000; - *buf++ = (u8)(hi + '0'); - } - return write_u32_len_1_to_8((u32)val, buf); -} - static_inline u8 *write_u64_len_1_to_16(u64 val, u8 *buf) { u64 hgh; u32 low; @@ -8364,11 +7719,11 @@ static_inline void f64_bin_to_dec_fast(u64 sig_raw, u32 exp_raw, /** Write inf/nan if allowed. */ static_inline u8 *write_inf_or_nan(u8 *buf, yyjson_write_flag flg, u64 sig_raw, bool sign) { - if (has_write_flag(INF_AND_NAN_AS_NULL)) { + if (has_flg(INF_AND_NAN_AS_NULL)) { byte_copy_4(buf, "null"); return buf + 4; } - if (has_write_flag(ALLOW_INF_AND_NAN)) { + if (has_allow(INF_AND_NAN)) { if (sig_raw == 0) { buf[0] = '-'; buf += sign; @@ -8399,7 +7754,7 @@ static_noinline u8 *write_f32_raw(u8 *buf, u64 raw_f64, bool sign; /* cast double to float */ - raw = f32_to_raw(f64_to_f32(f64_from_raw(raw_f64))); + raw = f32_to_bits(f64_to_f32(f64_from_bits(raw_f64))); /* decode raw bytes from IEEE-754 double format. */ sign = (bool)(raw >> (F32_BITS - 1)); @@ -8493,7 +7848,7 @@ static_noinline u8 *write_f32_raw(u8 *buf, u64 raw_f64, f32_bin_to_dec(sig_raw, exp_raw, sig_bin, exp_bin, &sig_dec, &exp_dec); /* write significand part */ - end = write_u32_len_1_to_9(sig_dec, buf + 1); + end = write_u32_len_1_to_8(sig_dec, buf + 1); buf[0] = buf[1]; buf[1] = '.'; exp_dec += (i32)(end - buf) - 2; @@ -8803,12 +8158,12 @@ static_noinline u8 *write_fp_reformat(u8 *buf, int len, u8 *cur = buf; if (unlikely(len < 1)) return NULL; cur += (*cur == '-'); - if (unlikely(!digi_is_digit(*cur))) { + if (unlikely(!char_is_digit(*cur))) { /* nan, inf, or bad output */ - if (has_write_flag(INF_AND_NAN_AS_NULL)) { + if (has_flg(INF_AND_NAN_AS_NULL)) { byte_copy_4(buf, "null"); return buf + 4; - } else if (has_write_flag(ALLOW_INF_AND_NAN)) { + } else if (has_allow(INF_AND_NAN)) { if (*cur == 'i') { byte_copy_8(cur, "Infinity"); return cur + 8; @@ -8873,7 +8228,7 @@ static_noinline u8 *write_f64_raw(u8 *buf, u64 raw, yyjson_write_flag flg) { #else int dig = F64_DEC_DIG; #endif - f64 val = f64_from_raw(raw); + f64 val = f64_from_bits(raw); int len = snprintf_num(buf, FP_BUF_LEN, "%.*g", dig, val); return write_fp_reformat(buf, len, flg, false); } @@ -8885,7 +8240,7 @@ static_noinline u8 *write_f32_raw(u8 *buf, u64 raw, yyjson_write_flag flg) { #else int dig = F32_DEC_DIG; #endif - f64 val = (f64)f64_to_f32(f64_from_raw(raw)); + f64 val = (f64)f64_to_f32(f64_from_bits(raw)); int len = snprintf_num(buf, FP_BUF_LEN, "%.*g", dig, val); return write_fp_reformat(buf, len, flg, false); } @@ -8893,7 +8248,7 @@ static_noinline u8 *write_f32_raw(u8 *buf, u64 raw, yyjson_write_flag flg) { /** Write a double number (requires 40 bytes buffer). */ static_noinline u8 *write_f64_raw_fixed(u8 *buf, u64 raw, yyjson_write_flag flg, u32 prec) { - f64 val = (f64)f64_from_raw(raw); + f64 val = (f64)f64_from_bits(raw); if (-1e21 < val && val < 1e21) { int len = snprintf_num(buf, FP_BUF_LEN, "%.*f", (int)prec, val); return write_fp_reformat(buf, len, flg, true); @@ -8938,10 +8293,50 @@ static_inline u8 *write_num(u8 *cur, yyjson_val *val, yyjson_write_flag flg) { } } +char *yyjson_write_number(const yyjson_val *val, char *buf) { + if (unlikely(!val || !buf)) return NULL; + switch (val->tag & YYJSON_TAG_MASK) { + case YYJSON_TYPE_NUM | YYJSON_SUBTYPE_UINT: { + buf = (char *)write_u64(val->uni.u64, (u8 *)buf); + *buf = '\0'; + return buf; + } + case YYJSON_TYPE_NUM | YYJSON_SUBTYPE_SINT: { + u64 pos = val->uni.u64; + u64 neg = ~pos + 1; + usize sign = ((i64)pos < 0); + *buf = '-'; + buf = (char *)write_u64(sign ? neg : pos, (u8 *)buf + sign); + *buf = '\0'; + return buf; + } + case YYJSON_TYPE_NUM | YYJSON_SUBTYPE_REAL: { + u64 raw = val->uni.u64; + u32 fmt = (u32)(val->tag >> 32); + u32 flg = YYJSON_WRITE_ALLOW_INF_AND_NAN; + if (likely(!(fmt >> (32 - YYJSON_WRITE_FP_FLAG_BITS)))) { + buf = (char *)write_f64_raw((u8 *)buf, raw, flg); + } else if (fmt >> (32 - YYJSON_WRITE_FP_PREC_BITS)) { + u32 prec = fmt >> (32 - YYJSON_WRITE_FP_PREC_BITS); + buf = (char *)write_f64_raw_fixed((u8 *)buf, raw, flg, prec); + } else { + if (fmt & YYJSON_WRITE_FP_TO_FLOAT) { + buf = (char *)write_f32_raw((u8 *)buf, raw, flg); + } else { + buf = (char *)write_f64_raw((u8 *)buf, raw, flg); + } + } + if (buf) *buf = '\0'; + return buf; + } + default: return NULL; + } +} + /*============================================================================== - * String Writer + * MARK: - String Writer (Private) *============================================================================*/ /** Character encode type, if (type > CHAR_ENC_ERR_1) bytes = type / 2; */ @@ -9183,14 +8578,14 @@ static const u8 esc_single_char_table[512] = { /** Returns the encode table with options. */ static_inline const char_enc_type *get_enc_table_with_flag( yyjson_write_flag flg) { - if (has_write_flag(ESCAPE_UNICODE)) { - if (has_write_flag(ESCAPE_SLASHES)) { + if (has_flg(ESCAPE_UNICODE)) { + if (has_flg(ESCAPE_SLASHES)) { return enc_table_esc_slash; } else { return enc_table_esc; } } else { - if (has_write_flag(ESCAPE_SLASHES)) { + if (has_flg(ESCAPE_SLASHES)) { return enc_table_cpy_slash; } else { return enc_table_cpy; @@ -9246,79 +8641,6 @@ static_inline u8 *write_str_noesc(u8 *cur, const u8 *str, usize str_len) { static_inline u8 *write_str(u8 *cur, bool esc, bool inv, const u8 *str, usize str_len, const char_enc_type *enc_table) { - - /* UTF-8 character mask and pattern, see `read_str()` for details. */ -#if YYJSON_ENDIAN == YYJSON_BIG_ENDIAN - const u16 b2_mask = 0xE0C0UL; - const u16 b2_patt = 0xC080UL; - const u16 b2_requ = 0x1E00UL; - const u32 b3_mask = 0xF0C0C000UL; - const u32 b3_patt = 0xE0808000UL; - const u32 b3_requ = 0x0F200000UL; - const u32 b3_erro = 0x0D200000UL; - const u32 b4_mask = 0xF8C0C0C0UL; - const u32 b4_patt = 0xF0808080UL; - const u32 b4_requ = 0x07300000UL; - const u32 b4_err0 = 0x04000000UL; - const u32 b4_err1 = 0x03300000UL; -#elif YYJSON_ENDIAN == YYJSON_LITTLE_ENDIAN - const u16 b2_mask = 0xC0E0UL; - const u16 b2_patt = 0x80C0UL; - const u16 b2_requ = 0x001EUL; - const u32 b3_mask = 0x00C0C0F0UL; - const u32 b3_patt = 0x008080E0UL; - const u32 b3_requ = 0x0000200FUL; - const u32 b3_erro = 0x0000200DUL; - const u32 b4_mask = 0xC0C0C0F8UL; - const u32 b4_patt = 0x808080F0UL; - const u32 b4_requ = 0x00003007UL; - const u32 b4_err0 = 0x00000004UL; - const u32 b4_err1 = 0x00003003UL; -#else - /* this should be evaluated at compile-time */ - v16_uni b2_mask_uni = {{ 0xE0, 0xC0 }}; - v16_uni b2_patt_uni = {{ 0xC0, 0x80 }}; - v16_uni b2_requ_uni = {{ 0x1E, 0x00 }}; - v32_uni b3_mask_uni = {{ 0xF0, 0xC0, 0xC0, 0x00 }}; - v32_uni b3_patt_uni = {{ 0xE0, 0x80, 0x80, 0x00 }}; - v32_uni b3_requ_uni = {{ 0x0F, 0x20, 0x00, 0x00 }}; - v32_uni b3_erro_uni = {{ 0x0D, 0x20, 0x00, 0x00 }}; - v32_uni b4_mask_uni = {{ 0xF8, 0xC0, 0xC0, 0xC0 }}; - v32_uni b4_patt_uni = {{ 0xF0, 0x80, 0x80, 0x80 }}; - v32_uni b4_requ_uni = {{ 0x07, 0x30, 0x00, 0x00 }}; - v32_uni b4_err0_uni = {{ 0x04, 0x00, 0x00, 0x00 }}; - v32_uni b4_err1_uni = {{ 0x03, 0x30, 0x00, 0x00 }}; - u16 b2_mask = b2_mask_uni.u; - u16 b2_patt = b2_patt_uni.u; - u16 b2_requ = b2_requ_uni.u; - u32 b3_mask = b3_mask_uni.u; - u32 b3_patt = b3_patt_uni.u; - u32 b3_requ = b3_requ_uni.u; - u32 b3_erro = b3_erro_uni.u; - u32 b4_mask = b4_mask_uni.u; - u32 b4_patt = b4_patt_uni.u; - u32 b4_requ = b4_requ_uni.u; - u32 b4_err0 = b4_err0_uni.u; - u32 b4_err1 = b4_err1_uni.u; -#endif - -#define is_valid_seq_2(uni) ( \ - ((uni & b2_mask) == b2_patt) && \ - ((uni & b2_requ)) \ -) - -#define is_valid_seq_3(uni) ( \ - ((uni & b3_mask) == b3_patt) && \ - ((tmp = (uni & b3_requ))) && \ - ((tmp != b3_erro)) \ -) - -#define is_valid_seq_4(uni) ( \ - ((uni & b4_mask) == b4_patt) && \ - ((tmp = (uni & b4_requ))) && \ - ((tmp & b4_err0) == 0 || (tmp & b4_err1) == 0) \ -) - /* The replacement character U+FFFD, used to indicate invalid character. */ const v32 rep = {{ 'F', 'F', 'F', 'D' }}; const v32 pre = {{ '\\', 'u', '0', '0' }}; @@ -9380,20 +8702,19 @@ copy_utf8: goto copy_ascii; } case CHAR_ENC_CPY_2: { - u16 v; #if YYJSON_DISABLE_UTF8_VALIDATION byte_copy_2(cur, src); #else - v = byte_load_2(src); - if (unlikely(!is_valid_seq_2(v))) goto err_cpy; - byte_copy_2(cur, src); + u32 uni = 0; + byte_copy_2(&uni, src); + if (unlikely(!is_utf8_seq2(uni))) goto err_cpy; + byte_copy_2(cur, &uni); #endif cur += 2; src += 2; goto copy_utf8; } case CHAR_ENC_CPY_3: { - u32 v, tmp; #if YYJSON_DISABLE_UTF8_VALIDATION if (likely(src + 4 <= end)) { byte_copy_4(cur, src); @@ -9402,14 +8723,15 @@ copy_utf8: cur[2] = src[2]; } #else + u32 uni, tmp; if (likely(src + 4 <= end)) { - v = byte_load_4(src); - if (unlikely(!is_valid_seq_3(v))) goto err_cpy; + uni = byte_load_4(src); + if (unlikely(!is_utf8_seq3(uni))) goto err_cpy; byte_copy_4(cur, src); } else { - v = byte_load_3(src); - if (unlikely(!is_valid_seq_3(v))) goto err_cpy; - byte_copy_4(cur, &v); + uni = byte_load_3(src); + if (unlikely(!is_utf8_seq3(uni))) goto err_cpy; + byte_copy_4(cur, &uni); } #endif cur += 3; @@ -9417,12 +8739,12 @@ copy_utf8: goto copy_utf8; } case CHAR_ENC_CPY_4: { - u32 v, tmp; #if YYJSON_DISABLE_UTF8_VALIDATION byte_copy_4(cur, src); #else - v = byte_load_4(src); - if (unlikely(!is_valid_seq_4(v))) goto err_cpy; + u32 uni, tmp; + uni = byte_load_4(src); + if (unlikely(!is_utf8_seq4(uni))) goto err_cpy; byte_copy_4(cur, src); #endif cur += 4; @@ -9443,10 +8765,12 @@ copy_utf8: goto copy_utf8; } case CHAR_ENC_ESC_2: { - u16 u, v; + u16 u; #if !YYJSON_DISABLE_UTF8_VALIDATION - v = byte_load_2(src); - if (unlikely(!is_valid_seq_2(v))) goto err_esc; + u32 v4 = 0; + u16 v2 = byte_load_2(src); + byte_copy_2(&v4, &v2); + if (unlikely(!is_utf8_seq2(v4))) goto err_esc; #endif u = (u16)(((u16)(src[0] & 0x1F) << 6) | ((u16)(src[1] & 0x3F) << 0)); @@ -9462,7 +8786,7 @@ copy_utf8: u32 v, tmp; #if !YYJSON_DISABLE_UTF8_VALIDATION v = byte_load_3(src); - if (unlikely(!is_valid_seq_3(v))) goto err_esc; + if (unlikely(!is_utf8_seq3(v))) goto err_esc; #endif u = (u16)(((u16)(src[0] & 0x0F) << 12) | ((u16)(src[1] & 0x3F) << 6) | @@ -9478,7 +8802,7 @@ copy_utf8: u32 hi, lo, u, v, tmp; #if !YYJSON_DISABLE_UTF8_VALIDATION v = byte_load_4(src); - if (unlikely(!is_valid_seq_4(v))) goto err_esc; + if (unlikely(!is_utf8_seq4(v))) goto err_esc; #endif u = ((u32)(src[0] & 0x07) << 18) | ((u32)(src[1] & 0x3F) << 12) | @@ -9500,7 +8824,7 @@ copy_utf8: case CHAR_ENC_ERR_1: { goto err_one; } - default: break; + default: break; /* unreachable */ } copy_end: @@ -9523,16 +8847,12 @@ err_esc: cur += 6; src += 1; goto copy_utf8; - -#undef is_valid_seq_2 -#undef is_valid_seq_3 -#undef is_valid_seq_4 } /*============================================================================== - * Writer Utilities + * MARK: - JSON Writer Utilities (Private) *============================================================================*/ /** Write null (requires 8 bytes buffer). */ @@ -9578,7 +8898,6 @@ static bool write_dat_to_fp(FILE *fp, u8 *dat, usize len, /** Write data to file. */ static bool write_dat_to_file(const char *path, u8 *dat, usize len, yyjson_write_err *err) { - #define return_err(_code, _msg) do { \ err->msg = _msg; \ err->code = YYJSON_WRITE_ERROR_##_code; \ @@ -9605,7 +8924,7 @@ static bool write_dat_to_file(const char *path, u8 *dat, usize len, /*============================================================================== - * JSON Writer Implementation + * MARK: - JSON Writer Implementation (Private) *============================================================================*/ typedef struct yyjson_write_ctx { @@ -9630,7 +8949,6 @@ static_inline u8 *yyjson_write_single(yyjson_val *val, yyjson_alc alc, usize *dat_len, yyjson_write_err *err) { - #define return_err(_code, _msg) do { \ if (hdr) alc.free(alc.ctx, (void *)hdr); \ *dat_len = 0; \ @@ -9655,9 +8973,9 @@ static_inline u8 *yyjson_write_single(yyjson_val *val, const u8 *str_ptr; const char_enc_type *enc_table = get_enc_table_with_flag(flg); bool cpy = (enc_table == enc_table_cpy); - bool esc = has_write_flag(ESCAPE_UNICODE) != 0; - bool inv = has_write_flag(ALLOW_INVALID_UNICODE) != 0; - bool newline = has_write_flag(NEWLINE_AT_END) != 0; + bool esc = has_flg(ESCAPE_UNICODE) != 0; + bool inv = has_allow(INVALID_UNICODE) != 0; + bool newline = has_flg(NEWLINE_AT_END) != 0; const usize end_len = 2; /* '\n' and '\0' */ switch (unsafe_yyjson_get_type(val)) { @@ -9722,7 +9040,7 @@ static_inline u8 *yyjson_write_single(yyjson_val *val, fail_alloc: return_err(MEMORY_ALLOCATION, MSG_MALLOC); fail_type: return_err(INVALID_VALUE_TYPE, MSG_ERR_TYPE); -fail_num: return_err(NAN_OR_INF, MSG_INF_NAN); +fail_num: return_err(NAN_OR_INF, MSG_NAN_INF); fail_str: return_err(INVALID_STRING, MSG_ERR_UTF8); #undef return_err @@ -9737,7 +9055,6 @@ static_inline u8 *yyjson_write_minify(const yyjson_val *root, const yyjson_alc alc, usize *dat_len, yyjson_write_err *err) { - #define return_err(_code, _msg) do { \ *dat_len = 0; \ err->code = YYJSON_WRITE_ERROR_##_code; \ @@ -9783,9 +9100,9 @@ static_inline u8 *yyjson_write_minify(const yyjson_val *root, const u8 *str_ptr; const char_enc_type *enc_table = get_enc_table_with_flag(flg); bool cpy = (enc_table == enc_table_cpy); - bool esc = has_write_flag(ESCAPE_UNICODE) != 0; - bool inv = has_write_flag(ALLOW_INVALID_UNICODE) != 0; - bool newline = has_write_flag(NEWLINE_AT_END) != 0; + bool esc = has_flg(ESCAPE_UNICODE) != 0; + bool inv = has_allow(INVALID_UNICODE) != 0; + bool newline = has_flg(NEWLINE_AT_END) != 0; alc_len = root->uni.ofs / sizeof(yyjson_val); alc_len = alc_len * YYJSON_WRITER_ESTIMATED_MINIFY_RATIO + 64; @@ -9904,7 +9221,7 @@ doc_end: fail_alloc: return_err(MEMORY_ALLOCATION, MSG_MALLOC); fail_type: return_err(INVALID_VALUE_TYPE, MSG_ERR_TYPE); -fail_num: return_err(NAN_OR_INF, MSG_INF_NAN); +fail_num: return_err(NAN_OR_INF, MSG_NAN_INF); fail_str: return_err(INVALID_STRING, MSG_ERR_UTF8); #undef return_err @@ -9919,7 +9236,6 @@ static_inline u8 *yyjson_write_pretty(const yyjson_val *root, const yyjson_alc alc, usize *dat_len, yyjson_write_err *err) { - #define return_err(_code, _msg) do { \ *dat_len = 0; \ err->code = YYJSON_WRITE_ERROR_##_code; \ @@ -9965,10 +9281,10 @@ static_inline u8 *yyjson_write_pretty(const yyjson_val *root, const u8 *str_ptr; const char_enc_type *enc_table = get_enc_table_with_flag(flg); bool cpy = (enc_table == enc_table_cpy); - bool esc = has_write_flag(ESCAPE_UNICODE) != 0; - bool inv = has_write_flag(ALLOW_INVALID_UNICODE) != 0; - usize spaces = has_write_flag(PRETTY_TWO_SPACES) ? 2 : 4; - bool newline = has_write_flag(NEWLINE_AT_END) != 0; + bool esc = has_flg(ESCAPE_UNICODE) != 0; + bool inv = has_allow(INVALID_UNICODE) != 0; + usize spaces = has_flg(PRETTY_TWO_SPACES) ? 2 : 4; + bool newline = has_flg(NEWLINE_AT_END) != 0; alc_len = root->uni.ofs / sizeof(yyjson_val); alc_len = alc_len * YYJSON_WRITER_ESTIMATED_PRETTY_RATIO + 64; @@ -10112,7 +9428,7 @@ doc_end: fail_alloc: return_err(MEMORY_ALLOCATION, MSG_MALLOC); fail_type: return_err(INVALID_VALUE_TYPE, MSG_ERR_TYPE); -fail_num: return_err(NAN_OR_INF, MSG_INF_NAN); +fail_num: return_err(NAN_OR_INF, MSG_NAN_INF); fail_str: return_err(INVALID_STRING, MSG_ERR_UTF8); #undef return_err @@ -10120,18 +9436,24 @@ fail_str: return_err(INVALID_STRING, MSG_ERR_UTF8); #undef check_str_len } + + +/*============================================================================== + * MARK: - JSON Writer (Public) + *============================================================================*/ + char *yyjson_val_write_opts(const yyjson_val *val, yyjson_write_flag flg, const yyjson_alc *alc_ptr, usize *dat_len, yyjson_write_err *err) { - yyjson_write_err dummy_err; - usize dummy_dat_len; + yyjson_write_err tmp_err; + usize tmp_dat_len; yyjson_alc alc = alc_ptr ? *alc_ptr : YYJSON_DEFAULT_ALC; yyjson_val *root = constcast(yyjson_val *)val; - err = err ? err : &dummy_err; - dat_len = dat_len ? dat_len : &dummy_dat_len; + if (!err) err = &tmp_err; + if (!dat_len) dat_len = &tmp_dat_len; if (unlikely(!root)) { *dat_len = 0; @@ -10163,14 +9485,14 @@ bool yyjson_val_write_file(const char *path, yyjson_write_flag flg, const yyjson_alc *alc_ptr, yyjson_write_err *err) { - yyjson_write_err dummy_err; + yyjson_write_err tmp_err; yyjson_alc alc = alc_ptr ? *alc_ptr : YYJSON_DEFAULT_ALC; u8 *dat; usize dat_len = 0; yyjson_val *root = constcast(yyjson_val *)val; bool suc; - err = err ? err : &dummy_err; + if (!err) err = &tmp_err; if (unlikely(!path || !*path)) { err->msg = "input path is invalid"; err->code = YYJSON_READ_ERROR_INVALID_PARAMETER; @@ -10189,14 +9511,14 @@ bool yyjson_val_write_fp(FILE *fp, yyjson_write_flag flg, const yyjson_alc *alc_ptr, yyjson_write_err *err) { - yyjson_write_err dummy_err; + yyjson_write_err tmp_err; yyjson_alc alc = alc_ptr ? *alc_ptr : YYJSON_DEFAULT_ALC; u8 *dat; usize dat_len = 0; yyjson_val *root = constcast(yyjson_val *)val; bool suc; - err = err ? err : &dummy_err; + if (!err) err = &tmp_err; if (unlikely(!fp)) { err->msg = "input fp is invalid"; err->code = YYJSON_READ_ERROR_INVALID_PARAMETER; @@ -10231,7 +9553,7 @@ bool yyjson_write_fp(FILE *fp, /*============================================================================== - * Mutable JSON Writer Implementation + * MARK: - Mutable JSON Writer Implementation (Private) *============================================================================*/ typedef struct yyjson_mut_write_ctx { @@ -10287,7 +9609,6 @@ static_inline u8 *yyjson_mut_write_minify(const yyjson_mut_val *root, yyjson_alc alc, usize *dat_len, yyjson_write_err *err) { - #define return_err(_code, _msg) do { \ *dat_len = 0; \ err->code = YYJSON_WRITE_ERROR_##_code; \ @@ -10333,9 +9654,9 @@ static_inline u8 *yyjson_mut_write_minify(const yyjson_mut_val *root, const u8 *str_ptr; const char_enc_type *enc_table = get_enc_table_with_flag(flg); bool cpy = (enc_table == enc_table_cpy); - bool esc = has_write_flag(ESCAPE_UNICODE) != 0; - bool inv = has_write_flag(ALLOW_INVALID_UNICODE) != 0; - bool newline = has_write_flag(NEWLINE_AT_END) != 0; + bool esc = has_flg(ESCAPE_UNICODE) != 0; + bool inv = has_allow(INVALID_UNICODE) != 0; + bool newline = has_flg(NEWLINE_AT_END) != 0; alc_len = estimated_val_num * YYJSON_WRITER_ESTIMATED_MINIFY_RATIO + 64; alc_len = size_align_up(alc_len, sizeof(yyjson_mut_write_ctx)); @@ -10459,7 +9780,7 @@ doc_end: fail_alloc: return_err(MEMORY_ALLOCATION, MSG_MALLOC); fail_type: return_err(INVALID_VALUE_TYPE, MSG_ERR_TYPE); -fail_num: return_err(NAN_OR_INF, MSG_INF_NAN); +fail_num: return_err(NAN_OR_INF, MSG_NAN_INF); fail_str: return_err(INVALID_STRING, MSG_ERR_UTF8); #undef return_err @@ -10475,7 +9796,6 @@ static_inline u8 *yyjson_mut_write_pretty(const yyjson_mut_val *root, yyjson_alc alc, usize *dat_len, yyjson_write_err *err) { - #define return_err(_code, _msg) do { \ *dat_len = 0; \ err->code = YYJSON_WRITE_ERROR_##_code; \ @@ -10521,10 +9841,10 @@ static_inline u8 *yyjson_mut_write_pretty(const yyjson_mut_val *root, const u8 *str_ptr; const char_enc_type *enc_table = get_enc_table_with_flag(flg); bool cpy = (enc_table == enc_table_cpy); - bool esc = has_write_flag(ESCAPE_UNICODE) != 0; - bool inv = has_write_flag(ALLOW_INVALID_UNICODE) != 0; - usize spaces = has_write_flag(PRETTY_TWO_SPACES) ? 2 : 4; - bool newline = has_write_flag(NEWLINE_AT_END) != 0; + bool esc = has_flg(ESCAPE_UNICODE) != 0; + bool inv = has_allow(INVALID_UNICODE) != 0; + usize spaces = has_flg(PRETTY_TWO_SPACES) ? 2 : 4; + bool newline = has_flg(NEWLINE_AT_END) != 0; alc_len = estimated_val_num * YYJSON_WRITER_ESTIMATED_PRETTY_RATIO + 64; alc_len = size_align_up(alc_len, sizeof(yyjson_mut_write_ctx)); @@ -10673,7 +9993,7 @@ doc_end: fail_alloc: return_err(MEMORY_ALLOCATION, MSG_MALLOC); fail_type: return_err(INVALID_VALUE_TYPE, MSG_ERR_TYPE); -fail_num: return_err(NAN_OR_INF, MSG_INF_NAN); +fail_num: return_err(NAN_OR_INF, MSG_NAN_INF); fail_str: return_err(INVALID_STRING, MSG_ERR_UTF8); #undef return_err @@ -10687,13 +10007,13 @@ static char *yyjson_mut_write_opts_impl(const yyjson_mut_val *val, const yyjson_alc *alc_ptr, usize *dat_len, yyjson_write_err *err) { - yyjson_write_err dummy_err; - usize dummy_dat_len; + yyjson_write_err tmp_err; + usize tmp_dat_len; yyjson_alc alc = alc_ptr ? *alc_ptr : YYJSON_DEFAULT_ALC; yyjson_mut_val *root = constcast(yyjson_mut_val *)val; - err = err ? err : &dummy_err; - dat_len = dat_len ? dat_len : &dummy_dat_len; + if (!err) err = &tmp_err; + if (!dat_len) dat_len = &tmp_dat_len; if (unlikely(!root)) { *dat_len = 0; @@ -10713,6 +10033,12 @@ static char *yyjson_mut_write_opts_impl(const yyjson_mut_val *val, } } + + +/*============================================================================== + * MARK: - Mutable JSON Writer (Public) + *============================================================================*/ + char *yyjson_mut_val_write_opts(const yyjson_mut_val *val, yyjson_write_flag flg, const yyjson_alc *alc_ptr, @@ -10744,14 +10070,14 @@ bool yyjson_mut_val_write_file(const char *path, yyjson_write_flag flg, const yyjson_alc *alc_ptr, yyjson_write_err *err) { - yyjson_write_err dummy_err; + yyjson_write_err tmp_err; yyjson_alc alc = alc_ptr ? *alc_ptr : YYJSON_DEFAULT_ALC; u8 *dat; usize dat_len = 0; yyjson_mut_val *root = constcast(yyjson_mut_val *)val; bool suc; - err = err ? err : &dummy_err; + if (!err) err = &tmp_err; if (unlikely(!path || !*path)) { err->msg = "input path is invalid"; err->code = YYJSON_WRITE_ERROR_INVALID_PARAMETER; @@ -10770,14 +10096,14 @@ bool yyjson_mut_val_write_fp(FILE *fp, yyjson_write_flag flg, const yyjson_alc *alc_ptr, yyjson_write_err *err) { - yyjson_write_err dummy_err; + yyjson_write_err tmp_err; yyjson_alc alc = alc_ptr ? *alc_ptr : YYJSON_DEFAULT_ALC; u8 *dat; usize dat_len = 0; yyjson_mut_val *root = constcast(yyjson_mut_val *)val; bool suc; - err = err ? err : &dummy_err; + if (!err) err = &tmp_err; if (unlikely(!fp)) { err->msg = "input fp is invalid"; err->code = YYJSON_WRITE_ERROR_INVALID_PARAMETER; @@ -10809,4 +10135,931 @@ bool yyjson_mut_write_fp(FILE *fp, return yyjson_mut_val_write_fp(fp, root, flg, alc_ptr, err); } +#undef has_flg +#undef has_allow #endif /* YYJSON_DISABLE_WRITER */ + + + +#if !YYJSON_DISABLE_UTILS + +/*============================================================================== + * MARK: - JSON Pointer API (RFC 6901) (Public) + *============================================================================*/ + +/** + Get a token from JSON pointer string. + @param ptr [in] string that points to current token prefix `/` + [out] string that points to next token prefix `/`, or string end + @param end [in] end of the entire JSON Pointer string + @param len [out] unescaped token length + @param esc [out] number of escaped characters in this token + @return head of the token, or NULL if syntax error + */ +static_inline const char *ptr_next_token(const char **ptr, const char *end, + usize *len, usize *esc) { + const char *hdr = *ptr + 1; + const char *cur = hdr; + /* skip unescaped characters */ + while (cur < end && *cur != '/' && *cur != '~') cur++; + if (likely(cur == end || *cur != '~')) { + /* no escaped characters, return */ + *ptr = cur; + *len = (usize)(cur - hdr); + *esc = 0; + return hdr; + } else { + /* handle escaped characters */ + usize esc_num = 0; + while (cur < end && *cur != '/') { + if (*cur++ == '~') { + if (cur == end || (*cur != '0' && *cur != '1')) { + *ptr = cur - 1; + return NULL; + } + esc_num++; + } + } + *ptr = cur; + *len = (usize)(cur - hdr) - esc_num; + *esc = esc_num; + return hdr; + } +} + +/** + Convert token string to index. + @param cur [in] token head + @param len [in] token length + @param idx [out] the index number, or USIZE_MAX if token is '-' + @return true if token is a valid array index + */ +static_inline bool ptr_token_to_idx(const char *cur, usize len, usize *idx) { + const char *end = cur + len; + usize num = 0, add; + if (unlikely(len == 0 || len > USIZE_SAFE_DIG)) return false; + if (*cur == '0') { + if (unlikely(len > 1)) return false; + *idx = 0; + return true; + } + if (*cur == '-') { + if (unlikely(len > 1)) return false; + *idx = USIZE_MAX; + return true; + } + for (; cur < end && (add = (usize)((u8)*cur - (u8)'0')) <= 9; cur++) { + num = num * 10 + add; + } + if (unlikely(num == 0 || cur < end)) return false; + *idx = num; + return true; +} + +/** + Compare JSON key with token. + @param key a string key (yyjson_val or yyjson_mut_val) + @param token a JSON pointer token + @param len unescaped token length + @param esc number of escaped characters in this token + @return true if `str` is equals to `token` + */ +static_inline bool ptr_token_eq(void *key, + const char *token, usize len, usize esc) { + yyjson_val *val = (yyjson_val *)key; + if (unsafe_yyjson_get_len(val) != len) return false; + if (likely(!esc)) { + return memcmp(val->uni.str, token, len) == 0; + } else { + const char *str = val->uni.str; + for (; len-- > 0; token++, str++) { + if (*token == '~') { + if (*str != (*++token == '0' ? '~' : '/')) return false; + } else { + if (*str != *token) return false; + } + } + return true; + } +} + +/** + Get a value from array by token. + @param arr an array, should not be NULL or non-array type + @param token a JSON pointer token + @param len unescaped token length + @param esc number of escaped characters in this token + @return value at index, or NULL if token is not index or index is out of range + */ +static_inline yyjson_val *ptr_arr_get(yyjson_val *arr, const char *token, + usize len, usize esc) { + yyjson_val *val = unsafe_yyjson_get_first(arr); + usize num = unsafe_yyjson_get_len(arr), idx = 0; + if (unlikely(num == 0)) return NULL; + if (unlikely(!ptr_token_to_idx(token, len, &idx))) return NULL; + if (unlikely(idx >= num)) return NULL; + if (unsafe_yyjson_arr_is_flat(arr)) { + return val + idx; + } else { + while (idx-- > 0) val = unsafe_yyjson_get_next(val); + return val; + } +} + +/** + Get a value from object by token. + @param obj [in] an object, should not be NULL or non-object type + @param token [in] a JSON pointer token + @param len [in] unescaped token length + @param esc [in] number of escaped characters in this token + @return value associated with the token, or NULL if no value + */ +static_inline yyjson_val *ptr_obj_get(yyjson_val *obj, const char *token, + usize len, usize esc) { + yyjson_val *key = unsafe_yyjson_get_first(obj); + usize num = unsafe_yyjson_get_len(obj); + if (unlikely(num == 0)) return NULL; + for (; num > 0; num--, key = unsafe_yyjson_get_next(key + 1)) { + if (ptr_token_eq(key, token, len, esc)) return key + 1; + } + return NULL; +} + +/** + Get a value from array by token. + @param arr [in] an array, should not be NULL or non-array type + @param token [in] a JSON pointer token + @param len [in] unescaped token length + @param esc [in] number of escaped characters in this token + @param pre [out] previous (sibling) value of the returned value + @param last [out] whether index is last + @return value at index, or NULL if token is not index or index is out of range + */ +static_inline yyjson_mut_val *ptr_mut_arr_get(yyjson_mut_val *arr, + const char *token, + usize len, usize esc, + yyjson_mut_val **pre, + bool *last) { + yyjson_mut_val *val = (yyjson_mut_val *)arr->uni.ptr; /* last (tail) */ + usize num = unsafe_yyjson_get_len(arr), idx; + if (last) *last = false; + if (pre) *pre = NULL; + if (unlikely(num == 0)) { + if (last && len == 1 && (*token == '0' || *token == '-')) *last = true; + return NULL; + } + if (unlikely(!ptr_token_to_idx(token, len, &idx))) return NULL; + if (last) *last = (idx == num || idx == USIZE_MAX); + if (unlikely(idx >= num)) return NULL; + while (idx-- > 0) val = val->next; + if (pre) *pre = val; + return val->next; +} + +/** + Get a value from object by token. + @param obj [in] an object, should not be NULL or non-object type + @param token [in] a JSON pointer token + @param len [in] unescaped token length + @param esc [in] number of escaped characters in this token + @param pre [out] previous (sibling) key of the returned value's key + @return value associated with the token, or NULL if no value + */ +static_inline yyjson_mut_val *ptr_mut_obj_get(yyjson_mut_val *obj, + const char *token, + usize len, usize esc, + yyjson_mut_val **pre) { + yyjson_mut_val *pre_key = (yyjson_mut_val *)obj->uni.ptr, *key; + usize num = unsafe_yyjson_get_len(obj); + if (pre) *pre = NULL; + if (unlikely(num == 0)) return NULL; + for (; num > 0; num--, pre_key = key) { + key = pre_key->next->next; + if (ptr_token_eq(key, token, len, esc)) { + if (pre) *pre = pre_key; + return key->next; + } + } + return NULL; +} + +/** + Create a string value with JSON pointer token. + @param token [in] a JSON pointer token + @param len [in] unescaped token length + @param esc [in] number of escaped characters in this token + @param doc [in] used for memory allocation when creating value + @return new string value, or NULL if memory allocation failed + */ +static_inline yyjson_mut_val *ptr_new_key(const char *token, + usize len, usize esc, + yyjson_mut_doc *doc) { + const char *src = token; + if (likely(!esc)) { + return yyjson_mut_strncpy(doc, src, len); + } else { + const char *end = src + len + esc; + char *dst = unsafe_yyjson_mut_str_alc(doc, len + esc); + char *str = dst; + if (unlikely(!dst)) return NULL; + for (; src < end; src++, dst++) { + if (*src != '~') *dst = *src; + else *dst = (*++src == '0' ? '~' : '/'); + } + *dst = '\0'; + return yyjson_mut_strn(doc, str, len); + } +} + +/* macros for yyjson_ptr */ +#define return_err(_ret, _code, _pos, _msg) do { \ + if (err) { \ + err->code = YYJSON_PTR_ERR_##_code; \ + err->msg = _msg; \ + err->pos = (usize)(_pos); \ + } \ + return _ret; \ +} while (false) + +#define return_err_resolve(_ret, _pos) \ + return_err(_ret, RESOLVE, _pos, "JSON pointer cannot be resolved") +#define return_err_syntax(_ret, _pos) \ + return_err(_ret, SYNTAX, _pos, "invalid escaped character") +#define return_err_alloc(_ret) \ + return_err(_ret, MEMORY_ALLOCATION, 0, "failed to create value") + +yyjson_val *unsafe_yyjson_ptr_getx(yyjson_val *val, + const char *ptr, size_t ptr_len, + yyjson_ptr_err *err) { + + const char *hdr = ptr, *end = ptr + ptr_len, *token; + usize len, esc; + yyjson_type type; + + while (true) { + token = ptr_next_token(&ptr, end, &len, &esc); + if (unlikely(!token)) return_err_syntax(NULL, ptr - hdr); + type = unsafe_yyjson_get_type(val); + if (type == YYJSON_TYPE_OBJ) { + val = ptr_obj_get(val, token, len, esc); + } else if (type == YYJSON_TYPE_ARR) { + val = ptr_arr_get(val, token, len, esc); + } else { + val = NULL; + } + if (!val) return_err_resolve(NULL, token - hdr); + if (ptr == end) return val; + } +} + +yyjson_mut_val *unsafe_yyjson_mut_ptr_getx( + yyjson_mut_val *val, const char *ptr, size_t ptr_len, + yyjson_ptr_ctx *ctx, yyjson_ptr_err *err) { + + const char *hdr = ptr, *end = ptr + ptr_len, *token; + usize len, esc; + yyjson_mut_val *ctn, *pre = NULL; + yyjson_type type; + bool idx_is_last = false; + + while (true) { + token = ptr_next_token(&ptr, end, &len, &esc); + if (unlikely(!token)) return_err_syntax(NULL, ptr - hdr); + ctn = val; + type = unsafe_yyjson_get_type(val); + if (type == YYJSON_TYPE_OBJ) { + val = ptr_mut_obj_get(val, token, len, esc, &pre); + } else if (type == YYJSON_TYPE_ARR) { + val = ptr_mut_arr_get(val, token, len, esc, &pre, &idx_is_last); + } else { + val = NULL; + } + if (ctx && (ptr == end)) { + if (type == YYJSON_TYPE_OBJ || + (type == YYJSON_TYPE_ARR && (val || idx_is_last))) { + ctx->ctn = ctn; + ctx->pre = pre; + } + } + if (!val) return_err_resolve(NULL, token - hdr); + if (ptr == end) return val; + } +} + +bool unsafe_yyjson_mut_ptr_putx( + yyjson_mut_val *val, const char *ptr, size_t ptr_len, + yyjson_mut_val *new_val, yyjson_mut_doc *doc, bool create_parent, + bool insert_new, yyjson_ptr_ctx *ctx, yyjson_ptr_err *err) { + + const char *hdr = ptr, *end = ptr + ptr_len, *token; + usize token_len, esc, ctn_len; + yyjson_mut_val *ctn, *key, *pre = NULL; + yyjson_mut_val *sep_ctn = NULL, *sep_key = NULL, *sep_val = NULL; + yyjson_type ctn_type; + bool idx_is_last = false; + + /* skip exist parent nodes */ + while (true) { + token = ptr_next_token(&ptr, end, &token_len, &esc); + if (unlikely(!token)) return_err_syntax(false, ptr - hdr); + ctn = val; + ctn_type = unsafe_yyjson_get_type(ctn); + if (ctn_type == YYJSON_TYPE_OBJ) { + val = ptr_mut_obj_get(ctn, token, token_len, esc, &pre); + } else if (ctn_type == YYJSON_TYPE_ARR) { + val = ptr_mut_arr_get(ctn, token, token_len, esc, &pre, + &idx_is_last); + } else return_err_resolve(false, token - hdr); + if (!val) break; + if (ptr == end) break; /* is last token */ + } + + /* create parent nodes if not exist */ + if (unlikely(ptr != end)) { /* not last token */ + if (!create_parent) return_err_resolve(false, token - hdr); + + /* add value at last index if container is array */ + if (ctn_type == YYJSON_TYPE_ARR) { + if (!idx_is_last || !insert_new) { + return_err_resolve(false, token - hdr); + } + val = yyjson_mut_obj(doc); + if (!val) return_err_alloc(false); + + /* delay attaching until all operations are completed */ + sep_ctn = ctn; + sep_key = NULL; + sep_val = val; + + /* move to next token */ + ctn = val; + val = NULL; + ctn_type = YYJSON_TYPE_OBJ; + token = ptr_next_token(&ptr, end, &token_len, &esc); + if (unlikely(!token)) return_err_resolve(false, token - hdr); + } + + /* container is object, create parent nodes */ + while (ptr != end) { /* not last token */ + key = ptr_new_key(token, token_len, esc, doc); + if (!key) return_err_alloc(false); + val = yyjson_mut_obj(doc); + if (!val) return_err_alloc(false); + + /* delay attaching until all operations are completed */ + if (!sep_ctn) { + sep_ctn = ctn; + sep_key = key; + sep_val = val; + } else { + yyjson_mut_obj_add(ctn, key, val); + } + + /* move to next token */ + ctn = val; + val = NULL; + token = ptr_next_token(&ptr, end, &token_len, &esc); + if (unlikely(!token)) return_err_syntax(false, ptr - hdr); + } + } + + /* JSON pointer is resolved, insert or replace target value */ + ctn_len = unsafe_yyjson_get_len(ctn); + if (ctn_type == YYJSON_TYPE_OBJ) { + if (ctx) ctx->ctn = ctn; + if (!val || insert_new) { + /* insert new key-value pair */ + key = ptr_new_key(token, token_len, esc, doc); + if (unlikely(!key)) return_err_alloc(false); + if (ctx) ctx->pre = ctn_len ? (yyjson_mut_val *)ctn->uni.ptr : key; + unsafe_yyjson_mut_obj_add(ctn, key, new_val, ctn_len); + } else { + /* replace exist value */ + key = pre->next->next; + if (ctx) ctx->pre = pre; + if (ctx) ctx->old = val; + yyjson_mut_obj_put(ctn, key, new_val); + } + } else { + /* array */ + if (ctx && (val || idx_is_last)) ctx->ctn = ctn; + if (insert_new) { + /* append new value */ + if (val) { + pre->next = new_val; + new_val->next = val; + if (ctx) ctx->pre = pre; + unsafe_yyjson_set_len(ctn, ctn_len + 1); + } else if (idx_is_last) { + if (ctx) ctx->pre = ctn_len ? + (yyjson_mut_val *)ctn->uni.ptr : new_val; + yyjson_mut_arr_append(ctn, new_val); + } else { + return_err_resolve(false, token - hdr); + } + } else { + /* replace exist value */ + if (!val) return_err_resolve(false, token - hdr); + if (ctn_len > 1) { + new_val->next = val->next; + pre->next = new_val; + if (ctn->uni.ptr == val) ctn->uni.ptr = new_val; + } else { + new_val->next = new_val; + ctn->uni.ptr = new_val; + pre = new_val; + } + if (ctx) ctx->pre = pre; + if (ctx) ctx->old = val; + } + } + + /* all operations are completed, attach the new components to the target */ + if (unlikely(sep_ctn)) { + if (sep_key) yyjson_mut_obj_add(sep_ctn, sep_key, sep_val); + else yyjson_mut_arr_append(sep_ctn, sep_val); + } + return true; +} + +yyjson_mut_val *unsafe_yyjson_mut_ptr_replacex( + yyjson_mut_val *val, const char *ptr, size_t len, yyjson_mut_val *new_val, + yyjson_ptr_ctx *ctx, yyjson_ptr_err *err) { + + yyjson_mut_val *cur_val; + yyjson_ptr_ctx cur_ctx; + memset(&cur_ctx, 0, sizeof(cur_ctx)); + if (!ctx) ctx = &cur_ctx; + cur_val = unsafe_yyjson_mut_ptr_getx(val, ptr, len, ctx, err); + if (!cur_val) return NULL; + + if (yyjson_mut_is_obj(ctx->ctn)) { + yyjson_mut_val *key = ctx->pre->next->next; + yyjson_mut_obj_put(ctx->ctn, key, new_val); + } else { + yyjson_ptr_ctx_replace(ctx, new_val); + } + ctx->old = cur_val; + return cur_val; +} + +yyjson_mut_val *unsafe_yyjson_mut_ptr_removex( + yyjson_mut_val *val, const char *ptr, size_t len, + yyjson_ptr_ctx *ctx, yyjson_ptr_err *err) { + + yyjson_mut_val *cur_val; + yyjson_ptr_ctx cur_ctx; + memset(&cur_ctx, 0, sizeof(cur_ctx)); + if (!ctx) ctx = &cur_ctx; + cur_val = unsafe_yyjson_mut_ptr_getx(val, ptr, len, ctx, err); + if (cur_val) { + if (yyjson_mut_is_obj(ctx->ctn)) { + yyjson_mut_val *key = ctx->pre->next->next; + yyjson_mut_obj_put(ctx->ctn, key, NULL); + } else { + yyjson_ptr_ctx_remove(ctx); + } + ctx->pre = NULL; + ctx->old = cur_val; + } + return cur_val; +} + +/* macros for yyjson_ptr */ +#undef return_err +#undef return_err_resolve +#undef return_err_syntax +#undef return_err_alloc + + + +/*============================================================================== + * MARK: - JSON Patch API (RFC 6902) (Public) + *============================================================================*/ + +/* JSON Patch operation */ +typedef enum patch_op { + PATCH_OP_ADD, /* path, value */ + PATCH_OP_REMOVE, /* path */ + PATCH_OP_REPLACE, /* path, value */ + PATCH_OP_MOVE, /* from, path */ + PATCH_OP_COPY, /* from, path */ + PATCH_OP_TEST, /* path, value */ + PATCH_OP_NONE /* invalid */ +} patch_op; + +static patch_op patch_op_get(yyjson_val *op) { + const char *str = op->uni.str; + switch (unsafe_yyjson_get_len(op)) { + case 3: + if (!memcmp(str, "add", 3)) return PATCH_OP_ADD; + return PATCH_OP_NONE; + case 4: + if (!memcmp(str, "move", 4)) return PATCH_OP_MOVE; + if (!memcmp(str, "copy", 4)) return PATCH_OP_COPY; + if (!memcmp(str, "test", 4)) return PATCH_OP_TEST; + return PATCH_OP_NONE; + case 6: + if (!memcmp(str, "remove", 6)) return PATCH_OP_REMOVE; + return PATCH_OP_NONE; + case 7: + if (!memcmp(str, "replace", 7)) return PATCH_OP_REPLACE; + return PATCH_OP_NONE; + default: + return PATCH_OP_NONE; + } +} + +/* macros for yyjson_patch */ +#define return_err(_code, _msg) do { \ + if (err->ptr.code == YYJSON_PTR_ERR_MEMORY_ALLOCATION) { \ + err->code = YYJSON_PATCH_ERROR_MEMORY_ALLOCATION; \ + err->msg = _msg; \ + memset(&err->ptr, 0, sizeof(yyjson_ptr_err)); \ + } else { \ + err->code = YYJSON_PATCH_ERROR_##_code; \ + err->msg = _msg; \ + err->idx = iter.idx ? iter.idx - 1 : 0; \ + } \ + return NULL; \ +} while (false) + +#define return_err_copy() \ + return_err(MEMORY_ALLOCATION, "failed to copy value") +#define return_err_key(_key) \ + return_err(MISSING_KEY, "missing key " _key) +#define return_err_val(_key) \ + return_err(INVALID_MEMBER, "invalid member " _key) + +#define ptr_get(_ptr) yyjson_mut_ptr_getx( \ + root, _ptr->uni.str, _ptr##_len, NULL, &err->ptr) +#define ptr_add(_ptr, _val) yyjson_mut_ptr_addx( \ + root, _ptr->uni.str, _ptr##_len, _val, doc, false, NULL, &err->ptr) +#define ptr_remove(_ptr) yyjson_mut_ptr_removex( \ + root, _ptr->uni.str, _ptr##_len, NULL, &err->ptr) +#define ptr_replace(_ptr, _val)yyjson_mut_ptr_replacex( \ + root, _ptr->uni.str, _ptr##_len, _val, NULL, &err->ptr) + +yyjson_mut_val *yyjson_patch(yyjson_mut_doc *doc, + yyjson_val *orig, + yyjson_val *patch, + yyjson_patch_err *err) { + + yyjson_mut_val *root; + yyjson_val *obj; + yyjson_arr_iter iter; + yyjson_patch_err err_tmp; + if (!err) err = &err_tmp; + memset(err, 0, sizeof(*err)); + memset(&iter, 0, sizeof(iter)); + + if (unlikely(!doc || !orig || !patch)) { + return_err(INVALID_PARAMETER, "input parameter is NULL"); + } + if (unlikely(!yyjson_is_arr(patch))) { + return_err(INVALID_PARAMETER, "input patch is not array"); + } + root = yyjson_val_mut_copy(doc, orig); + if (unlikely(!root)) return_err_copy(); + + /* iterate through the patch array */ + yyjson_arr_iter_init(patch, &iter); + while ((obj = yyjson_arr_iter_next(&iter))) { + patch_op op_enum; + yyjson_val *op, *path, *from = NULL, *value; + yyjson_mut_val *val = NULL, *test; + usize path_len, from_len = 0; + if (unlikely(!unsafe_yyjson_is_obj(obj))) { + return_err(INVALID_OPERATION, "JSON patch operation is not object"); + } + + /* get required member: op */ + op = yyjson_obj_get(obj, "op"); + if (unlikely(!op)) return_err_key("`op`"); + if (unlikely(!yyjson_is_str(op))) return_err_val("`op`"); + op_enum = patch_op_get(op); + + /* get required member: path */ + path = yyjson_obj_get(obj, "path"); + if (unlikely(!path)) return_err_key("`path`"); + if (unlikely(!yyjson_is_str(path))) return_err_val("`path`"); + path_len = unsafe_yyjson_get_len(path); + + /* get required member: value, from */ + switch ((int)op_enum) { + case PATCH_OP_ADD: case PATCH_OP_REPLACE: case PATCH_OP_TEST: + value = yyjson_obj_get(obj, "value"); + if (unlikely(!value)) return_err_key("`value`"); + val = yyjson_val_mut_copy(doc, value); + if (unlikely(!val)) return_err_copy(); + break; + case PATCH_OP_MOVE: case PATCH_OP_COPY: + from = yyjson_obj_get(obj, "from"); + if (unlikely(!from)) return_err_key("`from`"); + if (unlikely(!yyjson_is_str(from))) return_err_val("`from`"); + from_len = unsafe_yyjson_get_len(from); + break; + default: + break; + } + + /* perform an operation */ + switch ((int)op_enum) { + case PATCH_OP_ADD: /* add(path, val) */ + if (unlikely(path_len == 0)) { root = val; break; } + if (unlikely(!ptr_add(path, val))) { + return_err(POINTER, "failed to add `path`"); + } + break; + case PATCH_OP_REMOVE: /* remove(path) */ + if (unlikely(!ptr_remove(path))) { + return_err(POINTER, "failed to remove `path`"); + } + break; + case PATCH_OP_REPLACE: /* replace(path, val) */ + if (unlikely(path_len == 0)) { root = val; break; } + if (unlikely(!ptr_replace(path, val))) { + return_err(POINTER, "failed to replace `path`"); + } + break; + case PATCH_OP_MOVE: /* val = remove(from), add(path, val) */ + if (unlikely(from_len == 0 && path_len == 0)) break; + val = ptr_remove(from); + if (unlikely(!val)) { + return_err(POINTER, "failed to remove `from`"); + } + if (unlikely(path_len == 0)) { root = val; break; } + if (unlikely(!ptr_add(path, val))) { + return_err(POINTER, "failed to add `path`"); + } + break; + case PATCH_OP_COPY: /* val = get(from).copy, add(path, val) */ + val = ptr_get(from); + if (unlikely(!val)) { + return_err(POINTER, "failed to get `from`"); + } + if (unlikely(path_len == 0)) { root = val; break; } + val = yyjson_mut_val_mut_copy(doc, val); + if (unlikely(!val)) return_err_copy(); + if (unlikely(!ptr_add(path, val))) { + return_err(POINTER, "failed to add `path`"); + } + break; + case PATCH_OP_TEST: /* test = get(path), test.eq(val) */ + test = ptr_get(path); + if (unlikely(!test)) { + return_err(POINTER, "failed to get `path`"); + } + if (unlikely(!yyjson_mut_equals(val, test))) { + return_err(EQUAL, "failed to test equal"); + } + break; + default: + return_err(INVALID_MEMBER, "unsupported `op`"); + } + } + return root; +} + +yyjson_mut_val *yyjson_mut_patch(yyjson_mut_doc *doc, + yyjson_mut_val *orig, + yyjson_mut_val *patch, + yyjson_patch_err *err) { + yyjson_mut_val *root, *obj; + yyjson_mut_arr_iter iter; + yyjson_patch_err err_tmp; + if (!err) err = &err_tmp; + memset(err, 0, sizeof(*err)); + memset(&iter, 0, sizeof(iter)); + + if (unlikely(!doc || !orig || !patch)) { + return_err(INVALID_PARAMETER, "input parameter is NULL"); + } + if (unlikely(!yyjson_mut_is_arr(patch))) { + return_err(INVALID_PARAMETER, "input patch is not array"); + } + root = yyjson_mut_val_mut_copy(doc, orig); + if (unlikely(!root)) return_err_copy(); + + /* iterate through the patch array */ + yyjson_mut_arr_iter_init(patch, &iter); + while ((obj = yyjson_mut_arr_iter_next(&iter))) { + patch_op op_enum; + yyjson_mut_val *op, *path, *from = NULL, *value; + yyjson_mut_val *val = NULL, *test; + usize path_len, from_len = 0; + if (!unsafe_yyjson_is_obj(obj)) { + return_err(INVALID_OPERATION, "JSON patch operation is not object"); + } + + /* get required member: op */ + op = yyjson_mut_obj_get(obj, "op"); + if (unlikely(!op)) return_err_key("`op`"); + if (unlikely(!yyjson_mut_is_str(op))) return_err_val("`op`"); + op_enum = patch_op_get((yyjson_val *)(void *)op); + + /* get required member: path */ + path = yyjson_mut_obj_get(obj, "path"); + if (unlikely(!path)) return_err_key("`path`"); + if (unlikely(!yyjson_mut_is_str(path))) return_err_val("`path`"); + path_len = unsafe_yyjson_get_len(path); + + /* get required member: value, from */ + switch ((int)op_enum) { + case PATCH_OP_ADD: case PATCH_OP_REPLACE: case PATCH_OP_TEST: + value = yyjson_mut_obj_get(obj, "value"); + if (unlikely(!value)) return_err_key("`value`"); + val = yyjson_mut_val_mut_copy(doc, value); + if (unlikely(!val)) return_err_copy(); + break; + case PATCH_OP_MOVE: case PATCH_OP_COPY: + from = yyjson_mut_obj_get(obj, "from"); + if (unlikely(!from)) return_err_key("`from`"); + if (unlikely(!yyjson_mut_is_str(from))) { + return_err_val("`from`"); + } + from_len = unsafe_yyjson_get_len(from); + break; + default: + break; + } + + /* perform an operation */ + switch ((int)op_enum) { + case PATCH_OP_ADD: /* add(path, val) */ + if (unlikely(path_len == 0)) { root = val; break; } + if (unlikely(!ptr_add(path, val))) { + return_err(POINTER, "failed to add `path`"); + } + break; + case PATCH_OP_REMOVE: /* remove(path) */ + if (unlikely(!ptr_remove(path))) { + return_err(POINTER, "failed to remove `path`"); + } + break; + case PATCH_OP_REPLACE: /* replace(path, val) */ + if (unlikely(path_len == 0)) { root = val; break; } + if (unlikely(!ptr_replace(path, val))) { + return_err(POINTER, "failed to replace `path`"); + } + break; + case PATCH_OP_MOVE: /* val = remove(from), add(path, val) */ + if (unlikely(from_len == 0 && path_len == 0)) break; + val = ptr_remove(from); + if (unlikely(!val)) { + return_err(POINTER, "failed to remove `from`"); + } + if (unlikely(path_len == 0)) { root = val; break; } + if (unlikely(!ptr_add(path, val))) { + return_err(POINTER, "failed to add `path`"); + } + break; + case PATCH_OP_COPY: /* val = get(from).copy, add(path, val) */ + val = ptr_get(from); + if (unlikely(!val)) { + return_err(POINTER, "failed to get `from`"); + } + if (unlikely(path_len == 0)) { root = val; break; } + val = yyjson_mut_val_mut_copy(doc, val); + if (unlikely(!val)) return_err_copy(); + if (unlikely(!ptr_add(path, val))) { + return_err(POINTER, "failed to add `path`"); + } + break; + case PATCH_OP_TEST: /* test = get(path), test.eq(val) */ + test = ptr_get(path); + if (unlikely(!test)) { + return_err(POINTER, "failed to get `path`"); + } + if (unlikely(!yyjson_mut_equals(val, test))) { + return_err(EQUAL, "failed to test equal"); + } + break; + default: + return_err(INVALID_MEMBER, "unsupported `op`"); + } + } + return root; +} + +/* macros for yyjson_patch */ +#undef return_err +#undef return_err_copy +#undef return_err_key +#undef return_err_val +#undef ptr_get +#undef ptr_add +#undef ptr_remove +#undef ptr_replace + + + +/*============================================================================== + * MARK: - JSON Merge-Patch API (RFC 7386) (Public) + *============================================================================*/ + +yyjson_mut_val *yyjson_merge_patch(yyjson_mut_doc *doc, + yyjson_val *orig, + yyjson_val *patch) { + usize idx, max; + yyjson_val *key, *orig_val, *patch_val, local_orig; + yyjson_mut_val *builder, *mut_key, *mut_val, *merged_val; + + if (unlikely(!yyjson_is_obj(patch))) { + return yyjson_val_mut_copy(doc, patch); + } + + builder = yyjson_mut_obj(doc); + if (unlikely(!builder)) return NULL; + + memset(&local_orig, 0, sizeof(local_orig)); + if (!yyjson_is_obj(orig)) { + orig = &local_orig; + orig->tag = builder->tag; + orig->uni = builder->uni; + } + + /* If orig is contributing, copy any items not modified by the patch */ + if (orig != &local_orig) { + yyjson_obj_foreach(orig, idx, max, key, orig_val) { + patch_val = yyjson_obj_getn(patch, + unsafe_yyjson_get_str(key), + unsafe_yyjson_get_len(key)); + if (!patch_val) { + mut_key = yyjson_val_mut_copy(doc, key); + mut_val = yyjson_val_mut_copy(doc, orig_val); + if (!yyjson_mut_obj_add(builder, mut_key, mut_val)) return NULL; + } + } + } + + /* Merge items modified by the patch. */ + yyjson_obj_foreach(patch, idx, max, key, patch_val) { + /* null indicates the field is removed. */ + if (unsafe_yyjson_is_null(patch_val)) { + continue; + } + mut_key = yyjson_val_mut_copy(doc, key); + orig_val = yyjson_obj_getn(orig, + unsafe_yyjson_get_str(key), + unsafe_yyjson_get_len(key)); + merged_val = yyjson_merge_patch(doc, orig_val, patch_val); + if (!yyjson_mut_obj_add(builder, mut_key, merged_val)) return NULL; + } + + return builder; +} + +yyjson_mut_val *yyjson_mut_merge_patch(yyjson_mut_doc *doc, + yyjson_mut_val *orig, + yyjson_mut_val *patch) { + usize idx, max; + yyjson_mut_val *key, *orig_val, *patch_val, local_orig; + yyjson_mut_val *builder, *mut_key, *mut_val, *merged_val; + + if (unlikely(!yyjson_mut_is_obj(patch))) { + return yyjson_mut_val_mut_copy(doc, patch); + } + + builder = yyjson_mut_obj(doc); + if (unlikely(!builder)) return NULL; + + memset(&local_orig, 0, sizeof(local_orig)); + if (!yyjson_mut_is_obj(orig)) { + orig = &local_orig; + orig->tag = builder->tag; + orig->uni = builder->uni; + } + + /* If orig is contributing, copy any items not modified by the patch */ + if (orig != &local_orig) { + yyjson_mut_obj_foreach(orig, idx, max, key, orig_val) { + patch_val = yyjson_mut_obj_getn(patch, + unsafe_yyjson_get_str(key), + unsafe_yyjson_get_len(key)); + if (!patch_val) { + mut_key = yyjson_mut_val_mut_copy(doc, key); + mut_val = yyjson_mut_val_mut_copy(doc, orig_val); + if (!yyjson_mut_obj_add(builder, mut_key, mut_val)) return NULL; + } + } + } + + /* Merge items modified by the patch. */ + yyjson_mut_obj_foreach(patch, idx, max, key, patch_val) { + /* null indicates the field is removed. */ + if (unsafe_yyjson_is_null(patch_val)) { + continue; + } + mut_key = yyjson_mut_val_mut_copy(doc, key); + orig_val = yyjson_mut_obj_getn(orig, + unsafe_yyjson_get_str(key), + unsafe_yyjson_get_len(key)); + merged_val = yyjson_mut_merge_patch(doc, orig_val, patch_val); + if (!yyjson_mut_obj_add(builder, mut_key, merged_val)) return NULL; + } + + return builder; +} + +#endif /* YYJSON_DISABLE_UTILS */ diff --git a/vendor/yyjson/yyjson.h b/vendor/yyjson/yyjson.h index 824026059..5eb6d4680 100644 --- a/vendor/yyjson/yyjson.h +++ b/vendor/yyjson/yyjson.h @@ -32,7 +32,7 @@ /*============================================================================== - * Header Files + * MARK: - Header Files *============================================================================*/ #include @@ -45,7 +45,7 @@ /*============================================================================== - * Compile-time Options + * MARK: - Compile-time Options *============================================================================*/ /* @@ -89,14 +89,8 @@ #endif /* - Define as 1 to disable non-standard JSON features support at compile-time: - - YYJSON_READ_ALLOW_INF_AND_NAN - - YYJSON_READ_ALLOW_COMMENTS - - YYJSON_READ_ALLOW_TRAILING_COMMAS - - YYJSON_READ_ALLOW_INVALID_UNICODE - - YYJSON_READ_ALLOW_BOM - - YYJSON_WRITE_ALLOW_INF_AND_NAN - - YYJSON_WRITE_ALLOW_INVALID_UNICODE + Define as 1 to disable non-standard JSON features support at compile-time, + such as YYJSON_READ_ALLOW_XXX and YYJSON_WRITE_ALLOW_XXX. This reduces binary size by about 10%, and slightly improves performance. */ @@ -151,7 +145,7 @@ /*============================================================================== - * Compiler Macros + * MARK: - Compiler Macros *============================================================================*/ /** compiler version (MSVC) */ @@ -179,8 +173,10 @@ #endif /** real gcc check */ -#if !defined(__clang__) && !defined(__INTEL_COMPILER) && !defined(__ICC) && \ - defined(__GNUC__) +#if defined(__GNUC__) && defined(__GNUC_MINOR__) && \ + !defined(__clang__) && !defined(__llvm__) && \ + !defined(__INTEL_COMPILER) && !defined(__ICC) && \ + !defined(__NVCC__) && !defined(__PGI) && !defined(__TINYC__) # define YYJSON_IS_REAL_GCC 1 #else # define YYJSON_IS_REAL_GCC 0 @@ -465,7 +461,7 @@ /*============================================================================== - * Compile Hint Begin + * MARK: - Compile Hint Begin *============================================================================*/ /* extern "C" begin */ @@ -492,23 +488,23 @@ extern "C" { /*============================================================================== - * Version + * MARK: - Version *============================================================================*/ /** The major version of yyjson. */ #define YYJSON_VERSION_MAJOR 0 /** The minor version of yyjson. */ -#define YYJSON_VERSION_MINOR 11 +#define YYJSON_VERSION_MINOR 12 /** The patch version of yyjson. */ -#define YYJSON_VERSION_PATCH 1 +#define YYJSON_VERSION_PATCH 0 /** The version of yyjson in hex: `(major << 16) | (minor << 8) | (patch)`. */ -#define YYJSON_VERSION_HEX 0x000B01 +#define YYJSON_VERSION_HEX 0x000C00 /** The version string of yyjson. */ -#define YYJSON_VERSION_STRING "0.11.1" +#define YYJSON_VERSION_STRING "0.12.0" /** The version of yyjson in hex, same as `YYJSON_VERSION_HEX`. */ yyjson_api uint32_t yyjson_version(void); @@ -516,7 +512,7 @@ yyjson_api uint32_t yyjson_version(void); /*============================================================================== - * JSON Types + * MARK: - JSON Types *============================================================================*/ /** Type of a JSON value (3 bit). */ @@ -578,7 +574,7 @@ typedef uint8_t yyjson_subtype; /*============================================================================== - * Allocator + * MARK: - Allocator *============================================================================*/ /** @@ -662,7 +658,7 @@ yyjson_api void yyjson_alc_dyn_free(yyjson_alc *alc); /*============================================================================== - * Text Locating + * MARK: - Text Locating *============================================================================*/ /** @@ -686,7 +682,7 @@ yyjson_api bool yyjson_locate_pos(const char *str, size_t len, size_t pos, /*============================================================================== - * JSON Structure + * MARK: - JSON Structure *============================================================================*/ /** @@ -720,7 +716,7 @@ typedef struct yyjson_mut_val yyjson_mut_val; /*============================================================================== - * JSON Reader API + * MARK: - JSON Reader API *============================================================================*/ /** Run-time options for JSON reader. */ @@ -734,7 +730,7 @@ typedef uint32_t yyjson_read_flag; - Report error if double number is infinity. - Report error if string contains invalid UTF-8 character or BOM. - Report error on trailing commas, comments, inf and nan literals. */ -static const yyjson_read_flag YYJSON_READ_NOFLAG = 0; +static const yyjson_read_flag YYJSON_READ_NOFLAG = 0; /** Read the input data in-situ. This option allows the reader to modify and use input data to store string @@ -742,27 +738,27 @@ static const yyjson_read_flag YYJSON_READ_NOFLAG = 0; The caller should hold the input data before free the document. The input data must be padded by at least `YYJSON_PADDING_SIZE` bytes. For example: `[1,2]` should be `[1,2]\0\0\0\0`, input length should be 5. */ -static const yyjson_read_flag YYJSON_READ_INSITU = 1 << 0; +static const yyjson_read_flag YYJSON_READ_INSITU = 1 << 0; /** Stop when done instead of issuing an error if there's additional content after a JSON document. This option may be used to parse small pieces of JSON in larger data, such as `NDJSON`. */ -static const yyjson_read_flag YYJSON_READ_STOP_WHEN_DONE = 1 << 1; +static const yyjson_read_flag YYJSON_READ_STOP_WHEN_DONE = 1 << 1; /** Allow single trailing comma at the end of an object or array, such as `[1,2,3,]`, `{"a":1,"b":2,}` (non-standard). */ -static const yyjson_read_flag YYJSON_READ_ALLOW_TRAILING_COMMAS = 1 << 2; +static const yyjson_read_flag YYJSON_READ_ALLOW_TRAILING_COMMAS = 1 << 2; -/** Allow C-style single line and multiple line comments (non-standard). */ -static const yyjson_read_flag YYJSON_READ_ALLOW_COMMENTS = 1 << 3; +/** Allow C-style single-line and mult-line comments (non-standard). */ +static const yyjson_read_flag YYJSON_READ_ALLOW_COMMENTS = 1 << 3; /** Allow inf/nan number and literal, case-insensitive, such as 1e999, NaN, inf, -Infinity (non-standard). */ -static const yyjson_read_flag YYJSON_READ_ALLOW_INF_AND_NAN = 1 << 4; +static const yyjson_read_flag YYJSON_READ_ALLOW_INF_AND_NAN = 1 << 4; /** Read all numbers as raw strings (value with `YYJSON_TYPE_RAW` type), inf/nan literal is also read as raw with `ALLOW_INF_AND_NAN` flag. */ -static const yyjson_read_flag YYJSON_READ_NUMBER_AS_RAW = 1 << 5; +static const yyjson_read_flag YYJSON_READ_NUMBER_AS_RAW = 1 << 5; /** Allow reading invalid unicode when parsing string values (non-standard). Invalid characters will be allowed to appear in the string values, but @@ -772,16 +768,62 @@ static const yyjson_read_flag YYJSON_READ_NUMBER_AS_RAW = 1 << 5; @warning Strings in JSON values may contain incorrect encoding when this option is used, you need to handle these strings carefully to avoid security risks. */ -static const yyjson_read_flag YYJSON_READ_ALLOW_INVALID_UNICODE = 1 << 6; +static const yyjson_read_flag YYJSON_READ_ALLOW_INVALID_UNICODE = 1 << 6; /** Read big numbers as raw strings. These big numbers include integers that cannot be represented by `int64_t` and `uint64_t`, and floating-point numbers that cannot be represented by finite `double`. The flag will be overridden by `YYJSON_READ_NUMBER_AS_RAW` flag. */ -static const yyjson_read_flag YYJSON_READ_BIGNUM_AS_RAW = 1 << 7; +static const yyjson_read_flag YYJSON_READ_BIGNUM_AS_RAW = 1 << 7; /** Allow UTF-8 BOM and skip it before parsing if any (non-standard). */ -static const yyjson_read_flag YYJSON_READ_ALLOW_BOM = 1 << 8; +static const yyjson_read_flag YYJSON_READ_ALLOW_BOM = 1 << 8; + +/** Allow extended number formats (non-standard): + - Hexadecimal numbers, such as `0x7B`. + - Numbers with leading or trailing decimal point, such as `.123`, `123.`. + - Numbers with a leading plus sign, such as `+123`. */ +static const yyjson_read_flag YYJSON_READ_ALLOW_EXT_NUMBER = 1 << 9; + +/** Allow extended escape sequences in strings (non-standard): + - Additional escapes: `\a`, `\e`, `\v`, ``\'``, `\?`, `\0`. + - Hex escapes: `\xNN`, such as `\x7B`. + - Line continuation: backslash followed by line terminator sequences. + - Unknown escape: if backslash is followed by an unsupported character, + the backslash will be removed and the character will be kept as-is. + However, `\1`-`\9` will still trigger an error. */ +static const yyjson_read_flag YYJSON_READ_ALLOW_EXT_ESCAPE = 1 << 10; + +/** Allow extended whitespace characters (non-standard): + - Vertical tab `\v` and form feed `\f`. + - Line separator `\u2028` and paragraph separator `\u2029`. + - Non-breaking space `\xA0`. + - Byte order mark: `\uFEFF`. + - Other Unicode characters in the Zs (Separator, space) category. */ +static const yyjson_read_flag YYJSON_READ_ALLOW_EXT_WHITESPACE = 1 << 11; + +/** Allow strings enclosed in single quotes (non-standard), such as ``'ab'``. */ +static const yyjson_read_flag YYJSON_READ_ALLOW_SINGLE_QUOTED_STR = 1 << 12; + +/** Allow object keys without quotes (non-standard), such as `{a:1,b:2}`. + This extends the ECMAScript IdentifierName rule by allowing any + non-whitespace character with code point above `U+007F`. */ +static const yyjson_read_flag YYJSON_READ_ALLOW_UNQUOTED_KEY = 1 << 13; + +/** Allow JSON5 format, see: [https://json5.org]. + This flag supports all JSON5 features with some additional extensions: + - Accepts more escape sequences than JSON5 (e.g. `\a`, `\e`). + - Unquoted keys are not limited to ECMAScript IdentifierName. + - Allow case-insensitive `NaN`, `Inf` and `Infinity` literals. */ +static const yyjson_read_flag YYJSON_READ_JSON5 = + (1 << 2) | /* YYJSON_READ_ALLOW_TRAILING_COMMAS */ + (1 << 3) | /* YYJSON_READ_ALLOW_COMMENTS */ + (1 << 4) | /* YYJSON_READ_ALLOW_INF_AND_NAN */ + (1 << 9) | /* YYJSON_READ_ALLOW_EXT_NUMBER */ + (1 << 10) | /* YYJSON_READ_ALLOW_EXT_ESCAPE */ + (1 << 11) | /* YYJSON_READ_ALLOW_EXT_WHITESPACE */ + (1 << 12) | /* YYJSON_READ_ALLOW_SINGLE_QUOTED_STR */ + (1 << 13); /* YYJSON_READ_ALLOW_UNQUOTED_KEY */ @@ -794,7 +836,7 @@ static const yyjson_read_code YYJSON_READ_SUCCESS = 0; /** Invalid parameter, such as NULL input string or 0 input length. */ static const yyjson_read_code YYJSON_READ_ERROR_INVALID_PARAMETER = 1; -/** Memory allocation failure occurs. */ +/** Memory allocation failed. */ static const yyjson_read_code YYJSON_READ_ERROR_MEMORY_ALLOCATION = 2; /** Input JSON string is empty. */ @@ -803,7 +845,7 @@ static const yyjson_read_code YYJSON_READ_ERROR_EMPTY_CONTENT = 3; /** Unexpected content after document, such as `[123]abc`. */ static const yyjson_read_code YYJSON_READ_ERROR_UNEXPECTED_CONTENT = 4; -/** Unexpected ending, such as `[123`. */ +/** Unexpected end of input, the parsed part is valid, such as `[123`. */ static const yyjson_read_code YYJSON_READ_ERROR_UNEXPECTED_END = 5; /** Unexpected character inside the document, such as `[abc]`. */ @@ -812,7 +854,7 @@ static const yyjson_read_code YYJSON_READ_ERROR_UNEXPECTED_CHARACTER = 6; /** Invalid JSON structure, such as `[1,]`. */ static const yyjson_read_code YYJSON_READ_ERROR_JSON_STRUCTURE = 7; -/** Invalid comment, such as unclosed multi-line comment. */ +/** Invalid comment, deprecated, use `UNEXPECTED_END` for unclosed comment. */ static const yyjson_read_code YYJSON_READ_ERROR_INVALID_COMMENT = 8; /** Invalid number, such as `123.e12`, `000`. */ @@ -830,7 +872,7 @@ static const yyjson_read_code YYJSON_READ_ERROR_FILE_OPEN = 12; /** Failed to read a file. */ static const yyjson_read_code YYJSON_READ_ERROR_FILE_READ = 13; -/** Unexpected ending during incremental parsing. Parsing state is saved. */ +/** Incomplete input during incremental parsing; parsing state is preserved. */ static const yyjson_read_code YYJSON_READ_ERROR_MORE = 14; /** Error information for JSON reader. */ @@ -884,6 +926,7 @@ yyjson_api yyjson_doc *yyjson_read_opts(char *dat, 2. The `alc` is thread-safe or NULL. @param path The JSON file's path. + This should be a null-terminated string using the system's native encoding. If this path is NULL or invalid, the function will fail and return NULL. @param flg The JSON read options. Multiple options can be combined with `|` operator. 0 means no options. @@ -960,6 +1003,9 @@ typedef struct yyjson_incr_state yyjson_incr_state; 2. Call `yyjson_incr_read()` repeatedly. 3. Call `yyjson_incr_free()` to free the state. + Note: The incremental JSON reader only supports standard JSON. + Flags for non-standard features (e.g. comments, trailing commas) are ignored. + @param buf The JSON data, null-terminator is not required. If this parameter is NULL, the function will fail and return NULL. @param buf_len The length of the JSON data in `buf`. @@ -1088,27 +1134,7 @@ yyjson_api const char *yyjson_read_number(const char *dat, const yyjson_alc *alc, yyjson_read_err *err); -/** - Read a JSON number. - - This function is thread-safe when data is not modified by other threads. - - @param dat The JSON data (UTF-8 without BOM), null-terminator is required. - If this parameter is NULL, the function will fail and return NULL. - @param val The output value where result is stored. - If this parameter is NULL, the function will fail and return NULL. - The value will hold either UINT or SINT or REAL number; - @param flg The JSON read options. - Multiple options can be combined with `|` operator. 0 means no options. - Supports `YYJSON_READ_NUMBER_AS_RAW` and `YYJSON_READ_ALLOW_INF_AND_NAN`. - @param alc The memory allocator used for long number. - It is only used when the built-in floating point reader is disabled. - Pass NULL to use the libc's default allocator. - @param err A pointer to receive error information. - Pass NULL if you don't need error information. - @return If successful, a pointer to the character after the last character - used in the conversion, NULL if an error occurs. - */ +/** Same as `yyjson_read_number()`. */ yyjson_api_inline const char *yyjson_mut_read_number(const char *dat, yyjson_mut_val *val, yyjson_read_flag flg, @@ -1122,7 +1148,7 @@ yyjson_api_inline const char *yyjson_mut_read_number(const char *dat, /*============================================================================== - * JSON Writer API + * MARK: - JSON Writer API *============================================================================*/ /** Run-time options for JSON writer. */ @@ -1230,7 +1256,7 @@ typedef struct yyjson_write_err { #if !defined(YYJSON_DISABLE_WRITER) || !YYJSON_DISABLE_WRITER /*============================================================================== - * JSON Document Writer API + * MARK: - JSON Document Writer API *============================================================================*/ /** @@ -1267,6 +1293,7 @@ yyjson_api char *yyjson_write_opts(const yyjson_doc *doc, 2. The `alc` is thread-safe or NULL. @param path The JSON file's path. + This should be a null-terminated string using the system's native encoding. If this path is NULL or invalid, the function will fail and return false. If this file is not empty, the content will be discarded. @param doc The JSON document. @@ -1370,6 +1397,7 @@ yyjson_api char *yyjson_mut_write_opts(const yyjson_mut_doc *doc, 3. The `alc` is thread-safe or NULL. @param path The JSON file's path. + This should be a null-terminated string using the system's native encoding. If this path is NULL or invalid, the function will fail and return false. If this file is not empty, the content will be discarded. @param doc The mutable JSON document. @@ -1439,7 +1467,7 @@ yyjson_api_inline char *yyjson_mut_write(const yyjson_mut_doc *doc, /*============================================================================== - * JSON Value Writer API + * MARK: - JSON Value Writer API *============================================================================*/ /** @@ -1476,6 +1504,7 @@ yyjson_api char *yyjson_val_write_opts(const yyjson_val *val, 2. The `alc` is thread-safe or NULL. @param path The JSON file's path. + This should be a null-terminated string using the system's native encoding. If this path is NULL or invalid, the function will fail and return false. If this file is not empty, the content will be discarded. @param val The JSON root value. @@ -1577,6 +1606,7 @@ yyjson_api char *yyjson_mut_val_write_opts(const yyjson_mut_val *val, 3. The `alc` is thread-safe or NULL. @param path The JSON file's path. + This should be a null-terminated string using the system's native encoding. If this path is NULL or invalid, the function will fail and return false. If this file is not empty, the content will be discarded. @param val The mutable JSON root value. @@ -1643,12 +1673,40 @@ yyjson_api_inline char *yyjson_mut_val_write(const yyjson_mut_val *val, return yyjson_mut_val_write_opts(val, flg, NULL, len, NULL); } +/** + Write a JSON number. + + @param val A JSON number value to be converted to a string. + If this parameter is invalid, the function will fail and return NULL. + @param buf A buffer to store the resulting null-terminated string. + If this parameter is NULL, the function will fail and return NULL. + For integer values, the buffer must be at least 21 bytes. + For floating-point values, the buffer must be at least 40 bytes. + @return On success, returns a pointer to the character after the last + written character. On failure, returns NULL. + @note + - This function is thread-safe and does not allocate memory + (when `YYJSON_DISABLE_FAST_FP_CONV` is not defined). + - This function will fail and return NULL only in the following cases: + 1) `val` or `buf` is NULL; + 2) `val` is not a number type; + 3) `val` is `inf` or `nan`, and non-standard JSON is explicitly disabled + via the `YYJSON_DISABLE_NON_STANDARD` flag. + */ +yyjson_api char *yyjson_write_number(const yyjson_val *val, char *buf); + +/** Same as `yyjson_write_number()`. */ +yyjson_api_inline char *yyjson_mut_write_number(const yyjson_mut_val *val, + char *buf) { + return yyjson_write_number((const yyjson_val *)val, buf); +} + #endif /* YYJSON_DISABLE_WRITER */ /*============================================================================== - * JSON Document API + * MARK: - JSON Document API *============================================================================*/ /** Returns the root value of this JSON document. @@ -1673,7 +1731,7 @@ yyjson_api_inline void yyjson_doc_free(yyjson_doc *doc); /*============================================================================== - * JSON Value Type API + * MARK: - JSON Value Type API *============================================================================*/ /** Returns whether the JSON value is raw. @@ -1735,7 +1793,7 @@ yyjson_api_inline bool yyjson_is_ctn(yyjson_val *val); /*============================================================================== - * JSON Value Content API + * MARK: - JSON Value Content API *============================================================================*/ /** Returns the JSON value's type. @@ -1760,7 +1818,7 @@ yyjson_api_inline const char *yyjson_get_type_desc(yyjson_val *val); yyjson_api_inline const char *yyjson_get_raw(yyjson_val *val); /** Returns the content if the value is bool. - Returns NULL if `val` is NULL or type is not bool. */ + Returns false if `val` is NULL or type is not bool. */ yyjson_api_inline bool yyjson_get_bool(yyjson_val *val); /** Returns the content and cast to uint64_t. @@ -1888,7 +1946,7 @@ yyjson_api_inline bool yyjson_set_str_noesc(yyjson_val *val, bool noesc); /*============================================================================== - * JSON Array API + * MARK: - JSON Array API *============================================================================*/ /** Returns the number of elements in this array. @@ -1914,7 +1972,7 @@ yyjson_api_inline yyjson_val *yyjson_arr_get_last(yyjson_val *arr); /*============================================================================== - * JSON Array Iterator API + * MARK: - JSON Array Iterator API *============================================================================*/ /** @@ -1996,7 +2054,7 @@ yyjson_api_inline yyjson_val *yyjson_arr_iter_next(yyjson_arr_iter *iter); /*============================================================================== - * JSON Object API + * MARK: - JSON Object API *============================================================================*/ /** Returns the number of key-value pairs in this object. @@ -2026,7 +2084,7 @@ yyjson_api_inline yyjson_val *yyjson_obj_getn(yyjson_val *obj, const char *key, /*============================================================================== - * JSON Object Iterator API + * MARK: - JSON Object Iterator API *============================================================================*/ /** @@ -2169,7 +2227,7 @@ yyjson_api_inline yyjson_val *yyjson_obj_iter_getn(yyjson_obj_iter *iter, /*============================================================================== - * Mutable JSON Document API + * MARK: - Mutable JSON Document API *============================================================================*/ /** Returns the root value of this JSON document. @@ -2273,7 +2331,7 @@ yyjson_api yyjson_doc *yyjson_mut_val_imut_copy(yyjson_mut_val *val, /*============================================================================== - * Mutable JSON Value Type API + * MARK: - Mutable JSON Value Type API *============================================================================*/ /** Returns whether the JSON value is raw. @@ -2335,7 +2393,7 @@ yyjson_api_inline bool yyjson_mut_is_ctn(yyjson_mut_val *val); /*============================================================================== - * Mutable JSON Value Content API + * MARK: - Mutable JSON Value Content API *============================================================================*/ /** Returns the JSON value's type. @@ -2504,7 +2562,7 @@ yyjson_api_inline bool yyjson_mut_set_obj(yyjson_mut_val *val); /*============================================================================== - * Mutable JSON Value Creation API + * MARK: - Mutable JSON Value Creation API *============================================================================*/ /** Creates and returns a raw value, returns NULL on error. @@ -2605,7 +2663,7 @@ yyjson_api_inline yyjson_mut_val *yyjson_mut_strncpy(yyjson_mut_doc *doc, /*============================================================================== - * Mutable JSON Array API + * MARK: - Mutable JSON Array API *============================================================================*/ /** Returns the number of elements in this array. @@ -2629,7 +2687,7 @@ yyjson_api_inline yyjson_mut_val *yyjson_mut_arr_get_last(yyjson_mut_val *arr); /*============================================================================== - * Mutable JSON Array Iterator API + * MARK: - Mutable JSON Array Iterator API *============================================================================*/ /** @@ -2731,7 +2789,7 @@ yyjson_api_inline yyjson_mut_val *yyjson_mut_arr_iter_remove( /*============================================================================== - * Mutable JSON Array Creation API + * MARK: - Mutable JSON Array Creation API *============================================================================*/ /** @@ -3092,7 +3150,7 @@ yyjson_api_inline yyjson_mut_val *yyjson_mut_arr_with_strncpy( /*============================================================================== - * Mutable JSON Array Modification API + * MARK: - Mutable JSON Array Modification API *============================================================================*/ /** @@ -3205,7 +3263,7 @@ yyjson_api_inline bool yyjson_mut_arr_rotate(yyjson_mut_val *arr, /*============================================================================== - * Mutable JSON Array Modification Convenience API + * MARK: - Mutable JSON Array Modification Convenience API *============================================================================*/ /** @@ -3411,7 +3469,7 @@ yyjson_api_inline yyjson_mut_val *yyjson_mut_arr_add_obj(yyjson_mut_doc *doc, /*============================================================================== - * Mutable JSON Object API + * MARK: - Mutable JSON Object API *============================================================================*/ /** Returns the number of key-value pairs in this object. @@ -3443,7 +3501,7 @@ yyjson_api_inline yyjson_mut_val *yyjson_mut_obj_getn(yyjson_mut_val *obj, /*============================================================================== - * Mutable JSON Object Iterator API + * MARK: - Mutable JSON Object Iterator API *============================================================================*/ /** @@ -3605,7 +3663,7 @@ yyjson_api_inline yyjson_mut_val *yyjson_mut_obj_iter_getn( /*============================================================================== - * Mutable JSON Object Creation API + * MARK: - Mutable JSON Object Creation API *============================================================================*/ /** Creates and returns a mutable object, returns NULL on error. */ @@ -3652,7 +3710,7 @@ yyjson_api_inline yyjson_mut_val *yyjson_mut_obj_with_kv(yyjson_mut_doc *doc, /*============================================================================== - * Mutable JSON Object Modification API + * MARK: - Mutable JSON Object Modification API *============================================================================*/ /** @@ -3762,7 +3820,7 @@ yyjson_api_inline bool yyjson_mut_obj_rotate(yyjson_mut_val *obj, /*============================================================================== - * Mutable JSON Object Modification Convenience API + * MARK: - Mutable JSON Object Modification Convenience API *============================================================================*/ /** Adds a `null` value at the end of the object. @@ -4002,7 +4060,7 @@ yyjson_api_inline bool yyjson_mut_obj_rename_keyn(yyjson_mut_doc *doc, #if !defined(YYJSON_DISABLE_UTILS) || !YYJSON_DISABLE_UTILS /*============================================================================== - * JSON Pointer API (RFC 6901) + * MARK: - JSON Pointer API (RFC 6901) * https://tools.ietf.org/html/rfc6901 *============================================================================*/ @@ -4587,7 +4645,7 @@ yyjson_api_inline bool yyjson_ptr_ctx_remove(yyjson_ptr_ctx *ctx); /*============================================================================== - * JSON Patch API (RFC 6902) + * MARK: - JSON Patch API (RFC 6902) * https://tools.ietf.org/html/rfc6902 *============================================================================*/ @@ -4655,7 +4713,7 @@ yyjson_api yyjson_mut_val *yyjson_mut_patch(yyjson_mut_doc *doc, /*============================================================================== - * JSON Merge-Patch API (RFC 7386) + * MARK: - JSON Merge-Patch API (RFC 7386) * https://tools.ietf.org/html/rfc7386 *============================================================================*/ @@ -4688,7 +4746,7 @@ yyjson_api yyjson_mut_val *yyjson_mut_merge_patch(yyjson_mut_doc *doc, /*============================================================================== - * JSON Structure (Implementation) + * MARK: - JSON Structure (Implementation) *============================================================================*/ /** Payload of a JSON value (8 bytes). */ @@ -4725,7 +4783,7 @@ struct yyjson_doc { /*============================================================================== - * Unsafe JSON Value API (Implementation) + * MARK: - Unsafe JSON Value API (Implementation) *============================================================================*/ /* @@ -5054,7 +5112,7 @@ yyjson_api_inline void unsafe_yyjson_set_obj(void *val, size_t size) { /*============================================================================== - * JSON Document API (Implementation) + * MARK: - JSON Document API (Implementation) *============================================================================*/ yyjson_api_inline yyjson_val *yyjson_doc_get_root(yyjson_doc *doc) { @@ -5081,7 +5139,7 @@ yyjson_api_inline void yyjson_doc_free(yyjson_doc *doc) { /*============================================================================== - * JSON Value Type API (Implementation) + * MARK: - JSON Value Type API (Implementation) *============================================================================*/ yyjson_api_inline bool yyjson_is_raw(yyjson_val *val) { @@ -5143,7 +5201,7 @@ yyjson_api_inline bool yyjson_is_ctn(yyjson_val *val) { /*============================================================================== - * JSON Value Content API (Implementation) + * MARK: - JSON Value Content API (Implementation) *============================================================================*/ yyjson_api_inline yyjson_type yyjson_get_type(yyjson_val *val) { @@ -5326,7 +5384,7 @@ yyjson_api_inline bool yyjson_set_str_noesc(yyjson_val *val, bool noesc) { /*============================================================================== - * JSON Array API (Implementation) + * MARK: - JSON Array API (Implementation) *============================================================================*/ yyjson_api_inline size_t yyjson_arr_size(yyjson_val *arr) { @@ -5376,7 +5434,7 @@ yyjson_api_inline yyjson_val *yyjson_arr_get_last(yyjson_val *arr) { /*============================================================================== - * JSON Array Iterator API (Implementation) + * MARK: - JSON Array Iterator API (Implementation) *============================================================================*/ yyjson_api_inline bool yyjson_arr_iter_init(yyjson_val *arr, @@ -5415,7 +5473,7 @@ yyjson_api_inline yyjson_val *yyjson_arr_iter_next(yyjson_arr_iter *iter) { /*============================================================================== - * JSON Object API (Implementation) + * MARK: - JSON Object API (Implementation) *============================================================================*/ yyjson_api_inline size_t yyjson_obj_size(yyjson_val *obj) { @@ -5444,7 +5502,7 @@ yyjson_api_inline yyjson_val *yyjson_obj_getn(yyjson_val *obj, /*============================================================================== - * JSON Object Iterator API (Implementation) + * MARK: - JSON Object Iterator API (Implementation) *============================================================================*/ yyjson_api_inline bool yyjson_obj_iter_init(yyjson_val *obj, @@ -5521,7 +5579,7 @@ yyjson_api_inline yyjson_val *yyjson_obj_iter_getn(yyjson_obj_iter *iter, /*============================================================================== - * Mutable JSON Structure (Implementation) + * MARK: - Mutable JSON Structure (Implementation) *============================================================================*/ /** @@ -5637,7 +5695,7 @@ yyjson_api_inline yyjson_mut_val *unsafe_yyjson_mut_val(yyjson_mut_doc *doc, /*============================================================================== - * Mutable JSON Document API (Implementation) + * MARK: - Mutable JSON Document API (Implementation) *============================================================================*/ yyjson_api_inline yyjson_mut_val *yyjson_mut_doc_get_root(yyjson_mut_doc *doc) { @@ -5652,7 +5710,7 @@ yyjson_api_inline void yyjson_mut_doc_set_root(yyjson_mut_doc *doc, /*============================================================================== - * Mutable JSON Value Type API (Implementation) + * MARK: - Mutable JSON Value Type API (Implementation) *============================================================================*/ yyjson_api_inline bool yyjson_mut_is_raw(yyjson_mut_val *val) { @@ -5714,7 +5772,7 @@ yyjson_api_inline bool yyjson_mut_is_ctn(yyjson_mut_val *val) { /*============================================================================== - * Mutable JSON Value Content API (Implementation) + * MARK: - Mutable JSON Value Content API (Implementation) *============================================================================*/ yyjson_api_inline yyjson_type yyjson_mut_get_type(yyjson_mut_val *val) { @@ -5893,7 +5951,7 @@ yyjson_api_inline bool yyjson_mut_set_obj(yyjson_mut_val *val) { /*============================================================================== - * Mutable JSON Value Creation API (Implementation) + * MARK: - Mutable JSON Value Creation API (Implementation) *============================================================================*/ #define yyjson_mut_val_one(func) \ @@ -6034,7 +6092,7 @@ yyjson_api_inline yyjson_mut_val *yyjson_mut_strncpy(yyjson_mut_doc *doc, /*============================================================================== - * Mutable JSON Array API (Implementation) + * MARK: - Mutable JSON Array API (Implementation) *============================================================================*/ yyjson_api_inline size_t yyjson_mut_arr_size(yyjson_mut_val *arr) { @@ -6070,7 +6128,7 @@ yyjson_api_inline yyjson_mut_val *yyjson_mut_arr_get_last( /*============================================================================== - * Mutable JSON Array Iterator API (Implementation) + * MARK: - Mutable JSON Array Iterator API (Implementation) *============================================================================*/ yyjson_api_inline bool yyjson_mut_arr_iter_init(yyjson_mut_val *arr, @@ -6130,7 +6188,7 @@ yyjson_api_inline yyjson_mut_val *yyjson_mut_arr_iter_remove( /*============================================================================== - * Mutable JSON Array Creation API (Implementation) + * MARK: - Mutable JSON Array Creation API (Implementation) *============================================================================*/ yyjson_api_inline yyjson_mut_val *yyjson_mut_arr(yyjson_mut_doc *doc) { @@ -6310,7 +6368,7 @@ yyjson_api_inline yyjson_mut_val *yyjson_mut_arr_with_strncpy( /*============================================================================== - * Mutable JSON Array Modification API (Implementation) + * MARK: - Mutable JSON Array Modification API (Implementation) *============================================================================*/ yyjson_api_inline bool yyjson_mut_arr_insert(yyjson_mut_val *arr, @@ -6520,7 +6578,7 @@ yyjson_api_inline bool yyjson_mut_arr_rotate(yyjson_mut_val *arr, /*============================================================================== - * Mutable JSON Array Modification Convenience API (Implementation) + * MARK: - Mutable JSON Array Modification Convenience API (Implementation) *============================================================================*/ yyjson_api_inline bool yyjson_mut_arr_add_val(yyjson_mut_val *arr, @@ -6686,7 +6744,7 @@ yyjson_api_inline yyjson_mut_val *yyjson_mut_arr_add_obj(yyjson_mut_doc *doc, /*============================================================================== - * Mutable JSON Object API (Implementation) + * MARK: - Mutable JSON Object API (Implementation) *============================================================================*/ yyjson_api_inline size_t yyjson_mut_obj_size(yyjson_mut_val *obj) { @@ -6715,7 +6773,7 @@ yyjson_api_inline yyjson_mut_val *yyjson_mut_obj_getn(yyjson_mut_val *obj, /*============================================================================== - * Mutable JSON Object Iterator API (Implementation) + * MARK: - Mutable JSON Object Iterator API (Implementation) *============================================================================*/ yyjson_api_inline bool yyjson_mut_obj_iter_init(yyjson_mut_val *obj, @@ -6806,7 +6864,7 @@ yyjson_api_inline yyjson_mut_val *yyjson_mut_obj_iter_getn( /*============================================================================== - * Mutable JSON Object Creation API (Implementation) + * MARK: - Mutable JSON Object Creation API (Implementation) *============================================================================*/ yyjson_api_inline yyjson_mut_val *yyjson_mut_obj(yyjson_mut_doc *doc) { @@ -6886,7 +6944,7 @@ yyjson_api_inline yyjson_mut_val *yyjson_mut_obj_with_kv(yyjson_mut_doc *doc, /*============================================================================== - * Mutable JSON Object Modification API (Implementation) + * MARK: - Mutable JSON Object Modification API (Implementation) *============================================================================*/ yyjson_api_inline void unsafe_yyjson_mut_obj_add(yyjson_mut_val *obj, @@ -7079,7 +7137,7 @@ yyjson_api_inline bool yyjson_mut_obj_rotate(yyjson_mut_val *obj, /*============================================================================== - * Mutable JSON Object Modification Convenience API (Implementation) + * MARK: - Mutable JSON Object Modification Convenience API (Implementation) *============================================================================*/ #define yyjson_mut_obj_add_func(func) \ @@ -7306,7 +7364,7 @@ yyjson_api_inline bool yyjson_mut_obj_rename_keyn(yyjson_mut_doc *doc, #if !defined(YYJSON_DISABLE_UTILS) || !YYJSON_DISABLE_UTILS /*============================================================================== - * JSON Pointer API (Implementation) + * MARK: - JSON Pointer API (Implementation) *============================================================================*/ #define yyjson_ptr_set_err(_code, _msg) do { \ @@ -7965,7 +8023,7 @@ yyjson_api_inline bool yyjson_ptr_ctx_remove(yyjson_ptr_ctx *ctx) { /*============================================================================== - * JSON Value at Pointer API (Implementation) + * MARK: - JSON Value at Pointer API (Implementation) *============================================================================*/ /** @@ -8068,7 +8126,7 @@ yyjson_api_inline bool yyjson_ptr_get_str( /*============================================================================== - * Deprecated + * MARK: - Deprecated *============================================================================*/ /** @deprecated renamed to `yyjson_doc_ptr_get` */ @@ -8152,7 +8210,7 @@ yyjson_api_inline yyjson_mut_val *unsafe_yyjson_mut_get_pointer( /*============================================================================== - * Compiler Hint End + * MARK: - Compiler Hint End *============================================================================*/ #if defined(__clang__)