JSON-Parse-0.61/000755 001751 001751 00000000000 14011073361 012631 5ustar00benben000000 000000 JSON-Parse-0.61/CONTRIBUTING.md000644 001751 001751 00000000537 13773471644 015113 0ustar00benben000000 000000 If you want to contribute to this module, you can file a bug report at github issues or email us with your suggestions. As of version 0.58, it is just about possible to install this module from the github repository, so you may be able to fork the module and install it successfully on your local computer. If so, please try sending a pull request. JSON-Parse-0.61/examples/000755 001751 001751 00000000000 14011073360 014446 5ustar00benben000000 000000 JSON-Parse-0.61/unicode.c000444 001751 001751 00000111255 14011073345 014430 0ustar00benben000000 000000 /* This is a Unicode library in the programming language C which deals with conversions to and from the UTF-8 format. */ /* Author: Ben Bullock , Repository: https://github.com/benkasminbullock/unicode-c */ #include #include #include "unicode.h" #ifdef HEADER /* _ _ _ _ | | (_)_ __ ___ (_) |_ ___ | | | | '_ ` _ \| | __/ __| | |___| | | | | | | | |_\__ \ |_____|_|_| |_| |_|_|\__|___/ */ /* The maximum number of bytes we need to contain any Unicode code point as UTF-8 as a C string. This length includes one trailing nul byte. */ #define UTF8_MAX_LENGTH 5 /* The maximum possible value of a Unicode code point. See http://www.cl.cam.ac.uk/~mgk25/unicode.html#ucs. */ #define UNICODE_MAXIMUM 0x10ffff /* The maximum possible value which will fit into four bytes of UTF-8. This is larger than UNICODE_MAXIMUM. */ #define UNICODE_UTF8_4 0x1fffff /* ____ _ _ | _ \ ___| |_ _ _ _ __ _ __ __ ____ _| |_ _ ___ ___ | |_) / _ \ __| | | | '__| '_ \ \ \ / / _` | | | | |/ _ \/ __| | _ < __/ |_| |_| | | | | | | \ V / (_| | | |_| | __/\__ \ |_| \_\___|\__|\__,_|_| |_| |_| \_/ \__,_|_|\__,_|\___||___/ */ /* All of the functions in this library return an "int32_t". Negative values are used to indicate errors. */ /* This return value indicates the successful completion of a routine which doesn't use the return value to communicate data back to the caller. */ #define UNICODE_OK 0 /* This return value means that the leading byte of a UTF-8 sequence was not valid. */ #define UTF8_BAD_LEADING_BYTE -1 /* This return value means the caller attempted to turn a code point for a surrogate pair to or from UTF-8. */ #define UNICODE_SURROGATE_PAIR -2 /* This return value means that code points which did not form a surrogate pair were tried to be converted into a code point as if they were a surrogate pair. */ #define UNICODE_NOT_SURROGATE_PAIR -3 /* This return value means that input which was supposed to be UTF-8 encoded contained an invalid continuation byte. If the leading byte of a UTF-8 sequence is not valid, UTF8_BAD_LEADING_BYTE is returned instead of this. */ #define UTF8_BAD_CONTINUATION_BYTE -4 /* This return value indicates a zero byte was found in a string which was supposed to contain UTF-8 bytes. It is returned only by the functions which are documented as not allowing zero bytes. */ #define UNICODE_EMPTY_INPUT -5 /* This return value indicates that UTF-8 bytes were not in the shortest possible form. See http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8. This return value is currently unused. If a character is not in the shortest form, the error UTF8_BAD_CONTINUATION_BYTE is returned. */ #define UTF8_NON_SHORTEST -6 /* This return value indicates that there was an attempt to convert a code point which was greater than UNICODE_MAXIMUM or UNICODE_UTF8_4 into UTF-8 bytes. */ #define UNICODE_TOO_BIG -7 /* This return value indicates that the Unicode code-point ended with either 0xFFFF or 0xFFFE, meaning it cannot be used as a character code point, or it was in the disallowed range FDD0 to FDEF. */ #define UNICODE_NOT_CHARACTER -8 /* This return value indicates that the UTF-8 is valid. It is only used by "valid_utf8". */ #define UTF8_VALID 1 /* This return value indicates that the UTF-8 is not valid. It is only used by "valid_utf8". */ #define UTF8_INVALID 0 #endif /* def HEADER */ /* This table contains the length of a sequence which begins with the byte given. A value of zero indicates that the byte can not begin a UTF-8 sequence. */ /* https://metacpan.org/source/CHANSEN/Unicode-UTF8-0.60/UTF8.xs#L8 */ const uint8_t utf8_sequence_len[0x100] = { 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0x00-0x0F */ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0x10-0x1F */ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0x20-0x2F */ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0x30-0x3F */ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0x40-0x4F */ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0x50-0x5F */ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0x60-0x6F */ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0x70-0x7F */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x80-0x8F */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x90-0x9F */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xA0-0xAF */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xB0-0xBF */ 0,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2, /* 0xC0-0xCF */ 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, /* 0xD0-0xDF */ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, /* 0xE0-0xEF */ 4,4,4,4,4,0,0,0,0,0,0,0,0,0,0,0, /* 0xF0-0xFF */ }; /* This function returns the number of bytes of UTF-8 a sequence starting with byte "c" will become, either 1 (c = 0000xxxx), 2 (c = 110xxxxx), 3 (c = 1110xxxx), or 4 (c = 111100xx or c = 11110100). If "c" is not a valid UTF-8 first byte, the value UTF8_BAD_LEADING_BYTE is returned. */ int32_t utf8_bytes (uint8_t c) { int32_t r; r = utf8_sequence_len[c]; if (r == 0) { return UTF8_BAD_LEADING_BYTE; } return r; } /* This macro converts four bytes of UTF-8 into the corresponding code point. */ #define FOUR(x) \ (((int32_t) (x[0] & 0x07)) << 18) \ | (((int32_t) (x[1] & 0x3F)) << 12) \ | (((int32_t) (x[2] & 0x3F)) << 6) \ | (((int32_t) (x[3] & 0x3F))) /* Reject code points which end in either FFFE or FFFF. */ #define REJECT_FFFF(x) \ if ((x & 0xFFFF) >= 0xFFFE) { \ return UNICODE_NOT_CHARACTER; \ } /* Reject code points in a certain range. */ #define REJECT_NOT_CHAR(r) \ if (r >= UNI_NOT_CHAR_MIN && r <= UNI_NOT_CHAR_MAX) { \ return UNICODE_NOT_CHARACTER; \ } /* Reject surrogates. */ #define REJECT_SURROGATE(ucs2) \ if (ucs2 >= UNI_SUR_HIGH_START && ucs2 <= UNI_SUR_LOW_END) { \ /* Ill-formed. */ \ return UNICODE_SURROGATE_PAIR; \ } /* Try to convert "input" from UTF-8 to UCS-2, and return a value even if the input is partly broken. This checks the first byte of the input, but it doesn't check the subsequent bytes. */ int32_t utf8_no_checks (const uint8_t * input, const uint8_t ** end_ptr) { uint8_t c; c = input[0]; switch (utf8_sequence_len[c]) { case 1: * end_ptr = input + 1; return c; case 2: * end_ptr = input + 2; return (c & 0x1F) << 6 | (input[1] & 0x3F); case 3: * end_ptr = input + 3; return (c & 0x0F) << 12 | (input[1] & 0x3F) << 6 | (input[2] & 0x3F); case 4: * end_ptr = input + 4; return FOUR (input); case 0: /* fall through */ default: return UTF8_BAD_LEADING_BYTE; } } /* Surrogate pair zone. */ #define UNI_SUR_HIGH_START 0xD800 #define UNI_SUR_HIGH_END 0xDBFF #define UNI_SUR_LOW_START 0xDC00 #define UNI_SUR_LOW_END 0xDFFF /* Start of the "not character" range. */ #define UNI_NOT_CHAR_MIN 0xFDD0 /* End of the "not character" range. */ #define UNI_NOT_CHAR_MAX 0xFDEF /* This function converts UTF-8 encoded bytes in "input" into the equivalent Unicode code point. The return value is the Unicode code point corresponding to the UTF-8 character in "input" if successful, and a negative number if not successful. Nul bytes are rejected. "*end_ptr" is set to the next character after the read character on success. "*end_ptr" is set to the start of input on all failures. "end_ptr" may not be NULL. If the first byte of "input" is zero, in other words a NUL or '\0', UNICODE_EMPTY_INPUT is returned. If the first byte of "input" is not valid UTF-8, UTF8_BAD_LEADING_BYTE is returned. If the second or later bytes of "input" are not valid UTF-8, including NUL, UTF8_BAD_CONTINUATION_BYTE is returned. If the value extrapolated from "input" is greater than UNICODE_MAXIMUM, UNICODE_TOO_BIG is returned. If the value extrapolated from "input" ends in 0xFFFF or 0xFFFE, UNICODE_NOT_CHARACTER is returned. If the value extrapolated from "input" is between 0xFDD0 and 0xFDEF, UNICODE_NOT_CHARACTER is returned. If the value is within the range of surrogate pairs, the error UNICODE_SURROGATE_PAIR is returned. */ int32_t utf8_to_ucs2 (const uint8_t * input, const uint8_t ** end_ptr) { uint8_t c; uint8_t l; *end_ptr = input; c = input[0]; if (c == 0) { return UNICODE_EMPTY_INPUT; } l = utf8_sequence_len[c]; if (l == 1) { * end_ptr = input + 1; return (int32_t) c; } if (l == 2) { uint8_t d; d = input[1]; /* Two byte case. */ if (d < 0x80 || d > 0xBF) { return UTF8_BAD_CONTINUATION_BYTE; } if (c <= 0xC1) { return UTF8_BAD_CONTINUATION_BYTE; } * end_ptr = input + 2; return ((int32_t) (c & 0x1F) << 6) | ((int32_t) (d & 0x3F)); } if (l == 3) { uint8_t d; uint8_t e; int32_t r; d = input[1]; e = input[2]; /* Three byte case. */ if (d < 0x80 || d > 0xBF || e < 0x80 || e > 0xBF) { return UTF8_BAD_CONTINUATION_BYTE; } if (c == 0xe0 && d < 0xa0) { /* We don't need to check the value of input[2], because the if statement above this one already guarantees that it is 10xxxxxx. */ return UTF8_BAD_CONTINUATION_BYTE; } r = ((int32_t) (c & 0x0F)) << 12 | ((int32_t) (d & 0x3F)) << 6 | ((int32_t) (e & 0x3F)); REJECT_SURROGATE(r); REJECT_FFFF(r); REJECT_NOT_CHAR(r); * end_ptr = input + 3; return r; } else if (l == 4) { /* Four byte case. */ uint8_t d; uint8_t e; uint8_t f; int32_t v; d = input[1]; e = input[2]; f = input[3]; if (/* c must be 11110xxx. */ c >= 0xf8 || /* d, e, f must be 10xxxxxx. */ d < 0x80 || d >= 0xC0 || e < 0x80 || e >= 0xC0 || f < 0x80 || f >= 0xC0) { return UTF8_BAD_CONTINUATION_BYTE; } if (c == 0xf0 && d < 0x90) { /* We don't need to check the values of e and f, because the if statement above this one already guarantees that e and f are 10xxxxxx. */ return UTF8_BAD_CONTINUATION_BYTE; } /* Calculate the code point. */ v = FOUR (input); /* Greater than U+10FFFF */ if (v > UNICODE_MAXIMUM) { return UNICODE_TOO_BIG; } /* Non-characters U+nFFFE..U+nFFFF on plane 1-16 */ REJECT_FFFF(v); /* We don't need to check for surrogate pairs here, since the minimum value of UCS2 if there are four bytes of UTF-8 is 0x10000. */ * end_ptr = input + 4; return v; } return UTF8_BAD_LEADING_BYTE; } /* Input: a Unicode code point, "ucs2". Output: UTF-8 characters in buffer "utf8". Return value: the number of bytes written into "utf8", or a negative number if there was an error. If the value of "ucs2" is invalid because of being in the surrogate pair range from 0xD800 to 0xDFFF, the return value is UNICODE_SURROGATE_PAIR. If the value of "ucs2" is in the range 0xFDD0 to 0xFDEF inclusive, the return value is UNICODE_NOT_CHARACTER. If the lower two bytes of "ucs2" are either 0xFFFE or 0xFFFF, the return value is UNICODE_NOT_CHARACTER. If the value is too big to fit into four bytes of UTF-8, UNICODE_UTF8_4, the return value is UNICODE_TOO_BIG. However, it does not insist on ucs2 being less than UNICODE_MAXIMUM, so the user needs to check that "ucs2" is a valid code point. This adds a zero byte to the end of the string. It assumes that the buffer "utf8" has at least UNICODE_MAX_LENGTH (5) bytes of space to write to, without checking. */ int32_t ucs2_to_utf8 (int32_t ucs2, uint8_t * utf8) { REJECT_FFFF(ucs2); if (ucs2 < 0x80) { utf8[0] = ucs2; utf8[1] = '\0'; return 1; } if (ucs2 < 0x800) { utf8[0] = (ucs2 >> 6) | 0xC0; utf8[1] = (ucs2 & 0x3F) | 0x80; utf8[2] = '\0'; return 2; } if (ucs2 < 0xFFFF) { utf8[0] = ((ucs2 >> 12) ) | 0xE0; utf8[1] = ((ucs2 >> 6 ) & 0x3F) | 0x80; utf8[2] = ((ucs2 ) & 0x3F) | 0x80; utf8[3] = '\0'; REJECT_SURROGATE(ucs2); REJECT_NOT_CHAR(ucs2); return 3; } if (ucs2 <= UNICODE_UTF8_4) { /* http://tidy.sourceforge.net/cgi-bin/lxr/source/src/utf8.c#L380 */ utf8[0] = 0xF0 | (ucs2 >> 18); utf8[1] = 0x80 | ((ucs2 >> 12) & 0x3F); utf8[2] = 0x80 | ((ucs2 >> 6) & 0x3F); utf8[3] = 0x80 | ((ucs2 & 0x3F)); utf8[4] = '\0'; return 4; } return UNICODE_TOO_BIG; } /* For shifting by 10 bits. */ #define TEN_BITS 10 #define HALF_BASE 0x0010000UL /* 0b1111111111 */ #define LOW_TEN_BITS 0x3FF /* This converts the Unicode code point in "unicode" into a surrogate pair, and returns the two parts in "* hi_ptr" and "* lo_ptr". Return value: If "unicode" does not need to be a surrogate pair, the error UNICODE_NOT_SURROGATE_PAIR is returned, and the values of "*hi_ptr" and "*lo_ptr" are undefined. If the conversion is successful, UNICODE_OK is returned. */ int32_t unicode_to_surrogates (int32_t unicode, int32_t * hi_ptr, int32_t * lo_ptr) { int32_t hi = UNI_SUR_HIGH_START; int32_t lo = UNI_SUR_LOW_START; if (unicode < HALF_BASE) { /* Doesn't need to be a surrogate pair. */ return UNICODE_NOT_SURROGATE_PAIR; } unicode -= HALF_BASE; hi |= ((unicode >> TEN_BITS) & LOW_TEN_BITS); lo |= ((unicode) & LOW_TEN_BITS); * hi_ptr = hi; * lo_ptr = lo; return UNICODE_OK; } /* Convert a surrogate pair in "hi" and "lo" to a single Unicode value. The return value is the Unicode value. If the return value is negative, an error has occurred. If "hi" and "lo" do not form a surrogate pair, the error value UNICODE_NOT_SURROGATE_PAIR is returned. https://android.googlesource.com/platform/external/id3lib/+/master/unicode.org/ConvertUTF.c */ int32_t surrogates_to_unicode (int32_t hi, int32_t lo) { int32_t u; if (hi < UNI_SUR_HIGH_START || hi > UNI_SUR_HIGH_END || lo < UNI_SUR_LOW_START || lo > UNI_SUR_LOW_END) { return UNICODE_NOT_SURROGATE_PAIR; } u = ((hi - UNI_SUR_HIGH_START) << TEN_BITS) + (lo - UNI_SUR_LOW_START) + HALF_BASE; return u; } #undef UNI_SUR_HIGH_START #undef UNI_SUR_HIGH_END #undef UNI_SUR_LOW_START #undef UNI_SUR_LOW_END #undef TEN_BITS #undef HALF_BASE #undef LOW_TEN_BITS /* Convert the surrogate pair in "hi" and "lo" to UTF-8 in "utf8". This calls "surrogates_to_unicode" and "ucs2_to_utf8", thus it can return the same errors as them, and has the same restriction on "utf8" as "ucs2_to_utf8". */ int32_t surrogate_to_utf8 (int32_t hi, int32_t lo, uint8_t * utf8) { int32_t C; C = surrogates_to_unicode (hi, lo); if (C < 0) { return C; } return ucs2_to_utf8 (C, utf8); } /* Given a nul-terminated string "utf8" and a number of Unicode characters "n_chars", return the number of bytes into "utf8" at which the end of the characters occurs. A negative value indicates some kind of error. If "utf8" contains a zero byte, the return value is UNICODE_EMPTY_INPUT. This may also return any of the error values of "utf8_to_ucs2". */ int32_t unicode_chars_to_bytes (const uint8_t * utf8, int32_t n_chars) { int32_t i; const uint8_t * p = utf8; int32_t len = strlen ((const char *) utf8); if (len == 0 && n_chars != 0) { return UNICODE_EMPTY_INPUT; } for (i = 0; i < n_chars; i++) { int32_t ucs2 = utf8_to_ucs2 (p, & p); if (ucs2 < 0) { return ucs2; } } return p - utf8; } /* Like unicode_count_chars, but without error checks or validation of the input. This only checks the first byte of each UTF-8 sequence, then jumps over the succeeding bytes. It may return UTF8_BAD_LEADING_BYTE if the first byte is invalid. */ int32_t unicode_count_chars_fast (const uint8_t * utf8) { int32_t chars; const uint8_t * p; chars = 0; p = utf8; while (*p) { int32_t len; len = utf8_sequence_len[*p]; if (len == 0) { /* The first byte of a UTF-8 sequence is bad, so return this, not BAD_UTF8. */ return UTF8_BAD_LEADING_BYTE; } p += len; chars++; } return chars; } /* Given a nul-terminated string "utf8", return the total number of Unicode characters it contains. Return value If an error occurs, this may return UTF8_BAD_LEADING_BYTE or any of the errors of "utf8_to_ucs2". */ int32_t unicode_count_chars (const uint8_t * utf8) { int32_t chars = 0; const uint8_t * p = utf8; int32_t len = strlen ((const char *) utf8); if (len == 0) { return 0; } while (p - utf8 < len) { int32_t ucs2; ucs2 = utf8_to_ucs2 (p, & p); if (ucs2 < 0) { /* Return the error from utf8_to_ucs2. */ return ucs2; } chars++; if (*p == '\0') { return chars; } } /* Cannot be reached in practice, since strlen indicates the null byte. */ return UTF8_BAD_LEADING_BYTE; } #ifdef HEADER /* These are intended for use in switch statements, for example switch (c) { case BYTE_80_8F: do_something; They originally come from the Json3 project. */ #define BYTE_80_8F \ 0x80: case 0x81: case 0x82: case 0x83: case 0x84: case 0x85: case 0x86: \ case 0x87: case 0x88: case 0x89: case 0x8A: case 0x8B: case 0x8C: case 0x8D: \ case 0x8E: case 0x8F #define BYTE_80_9F \ 0x80: case 0x81: case 0x82: case 0x83: case 0x84: case 0x85: case 0x86: \ case 0x87: case 0x88: case 0x89: case 0x8A: case 0x8B: case 0x8C: case 0x8D: \ case 0x8E: case 0x8F: case 0x90: case 0x91: case 0x92: case 0x93: case 0x94: \ case 0x95: case 0x96: case 0x97: case 0x98: case 0x99: case 0x9A: case 0x9B: \ case 0x9C: case 0x9D: case 0x9E: case 0x9F #define BYTE_80_BF \ 0x80: case 0x81: case 0x82: case 0x83: case 0x84: case 0x85: case 0x86: \ case 0x87: case 0x88: case 0x89: case 0x8A: case 0x8B: case 0x8C: case 0x8D: \ case 0x8E: case 0x8F: case 0x90: case 0x91: case 0x92: case 0x93: case 0x94: \ case 0x95: case 0x96: case 0x97: case 0x98: case 0x99: case 0x9A: case 0x9B: \ case 0x9C: case 0x9D: case 0x9E: case 0x9F: case 0xA0: case 0xA1: case 0xA2: \ case 0xA3: case 0xA4: case 0xA5: case 0xA6: case 0xA7: case 0xA8: case 0xA9: \ case 0xAA: case 0xAB: case 0xAC: case 0xAD: case 0xAE: case 0xAF: case 0xB0: \ case 0xB1: case 0xB2: case 0xB3: case 0xB4: case 0xB5: case 0xB6: case 0xB7: \ case 0xB8: case 0xB9: case 0xBA: case 0xBB: case 0xBC: case 0xBD: case 0xBE: \ case 0xBF #define BYTE_80_8F_B0_BF \ 0x80: case 0x81: case 0x82: case 0x83: case 0x84: case 0x85: case 0x86: \ case 0x87: case 0x88: case 0x89: case 0x8A: case 0x8B: case 0x8C: case 0x8D: \ case 0x8E: case 0x8F: case 0xB0: \ case 0xB1: case 0xB2: case 0xB3: case 0xB4: case 0xB5: case 0xB6: case 0xB7: \ case 0xB8: case 0xB9: case 0xBA: case 0xBB: case 0xBC: case 0xBD: case 0xBE: \ case 0xBF #define BYTE_80_B6_B8_BF \ 0x80: case 0x81: case 0x82: case 0x83: case 0x84: case 0x85: case 0x86: \ case 0x87: case 0x88: case 0x89: case 0x8A: case 0x8B: case 0x8C: case 0x8D: \ case 0x8E: case 0x8F: case 0x90: case 0x91: case 0x92: case 0x93: case 0x94: \ case 0x95: case 0x96: case 0x97: case 0x98: case 0x99: case 0x9A: case 0x9B: \ case 0x9C: case 0x9D: case 0x9E: case 0x9F: case 0xA0: case 0xA1: case 0xA2: \ case 0xA3: case 0xA4: case 0xA5: case 0xA6: case 0xA7: case 0xA8: case 0xA9: \ case 0xAA: case 0xAB: case 0xAC: case 0xAD: case 0xAE: case 0xAF: case 0xB0: \ case 0xB1: case 0xB2: case 0xB3: case 0xB4: case 0xB5: case 0xB6: \ case 0xB8: case 0xB9: case 0xBA: case 0xBB: case 0xBC: case 0xBD: case 0xBE: \ case 0xBF #define BYTE_80_BD \ 0x80: case 0x81: case 0x82: case 0x83: case 0x84: case 0x85: case 0x86: \ case 0x87: case 0x88: case 0x89: case 0x8A: case 0x8B: case 0x8C: case 0x8D: \ case 0x8E: case 0x8F: case 0x90: case 0x91: case 0x92: case 0x93: case 0x94: \ case 0x95: case 0x96: case 0x97: case 0x98: case 0x99: case 0x9A: case 0x9B: \ case 0x9C: case 0x9D: case 0x9E: case 0x9F: case 0xA0: case 0xA1: case 0xA2: \ case 0xA3: case 0xA4: case 0xA5: case 0xA6: case 0xA7: case 0xA8: case 0xA9: \ case 0xAA: case 0xAB: case 0xAC: case 0xAD: case 0xAE: case 0xAF: case 0xB0: \ case 0xB1: case 0xB2: case 0xB3: case 0xB4: case 0xB5: case 0xB6: case 0xB7: \ case 0xB8: case 0xB9: case 0xBA: case 0xBB: case 0xBC: case 0xBD #define BYTE_90_BF \ 0x90: case 0x91: case 0x92: case 0x93: case 0x94: case 0x95: case 0x96: \ case 0x97: case 0x98: case 0x99: case 0x9A: case 0x9B: case 0x9C: case 0x9D: \ case 0x9E: case 0x9F: case 0xA0: case 0xA1: case 0xA2: case 0xA3: case 0xA4: \ case 0xA5: case 0xA6: case 0xA7: case 0xA8: case 0xA9: case 0xAA: case 0xAB: \ case 0xAC: case 0xAD: case 0xAE: case 0xAF: case 0xB0: case 0xB1: case 0xB2: \ case 0xB3: case 0xB4: case 0xB5: case 0xB6: case 0xB7: case 0xB8: case 0xB9: \ case 0xBA: case 0xBB: case 0xBC: case 0xBD: case 0xBE: case 0xBF #define BYTE_A0_BF \ 0xA0: case 0xA1: case 0xA2: case 0xA3: case 0xA4: case 0xA5: case 0xA6: \ case 0xA7: case 0xA8: case 0xA9: case 0xAA: case 0xAB: case 0xAC: case 0xAD: \ case 0xAE: case 0xAF: case 0xB0: case 0xB1: case 0xB2: case 0xB3: case 0xB4: \ case 0xB5: case 0xB6: case 0xB7: case 0xB8: case 0xB9: case 0xBA: case 0xBB: \ case 0xBC: case 0xBD: case 0xBE: case 0xBF #define BYTE_C2_DF \ 0xC2: case 0xC3: case 0xC4: case 0xC5: case 0xC6: case 0xC7: case 0xC8: \ case 0xC9: case 0xCA: case 0xCB: case 0xCC: case 0xCD: case 0xCE: case 0xCF: \ case 0xD0: case 0xD1: case 0xD2: case 0xD3: case 0xD4: case 0xD5: case 0xD6: \ case 0xD7: case 0xD8: case 0xD9: case 0xDA: case 0xDB: case 0xDC: case 0xDD: \ case 0xDE: case 0xDF #define BYTE_E1_EC \ 0xE1: case 0xE2: case 0xE3: case 0xE4: case 0xE5: case 0xE6: case 0xE7: \ case 0xE8: case 0xE9: case 0xEA: case 0xEB: case 0xEC #define BYTE_F1_F3 \ 0xF1: case 0xF2: case 0xF3 #endif /* def HEADER */ #define UNICODEADDBYTE i++ #define UNICODEFAILUTF8(want) return UTF8_INVALID #define UNICODENEXTBYTE c = input[i] /* Given "input" and "input_length", validate "input" byte by byte up to "input_length". The return value may be UTF8_VALID or UTF8_INVALID. */ int32_t valid_utf8 (const uint8_t * input, int32_t input_length) { int32_t error; utf8_info_t info; error = validate_utf8 (input, input_length, & info); if (error < 0) { return UTF8_INVALID; } return UTF8_VALID; } #define FAIL(x) \ info->len_read = i; \ return x #ifdef HEADER typedef struct utf8_info { int32_t len_read; int32_t runes_read; } utf8_info_t; #endif /* def HEADER */ /* Given "input" and "len", validate "input" byte by byte up to "len". The return value is "UNICODE_OK" (zero) on success or the error found (a negative number) on failure. utf8_info_t is defined in "unicode.h". The value of "info.len_read" is the number of bytes processed. the value of "info.runes_read" is the number of Unicode code points in the input. */ int32_t validate_utf8 (const uint8_t * input, int32_t len, utf8_info_t * info) { int32_t i; uint8_t c; info->len_read = 0; /* We want to increment the runes after "string_start", but that would give us one too many. */ info->runes_read = -1; i = 0; string_start: /* We get here after successfully reading a "rune". */ info->runes_read++; if (i >= len) { info->len_read = len; return UNICODE_OK; /* 0 */ } /* Set c separately here since we use a range comparison before the switch statement. */ c = input[i]; if (c == 0) { FAIL (UNICODE_EMPTY_INPUT); } /* Admit all bytes < 0x80. */ if (c < 0x80) { i++; goto string_start; } switch (c) { case BYTE_C2_DF: UNICODEADDBYTE; goto byte_last_80_bf; case 0xE0: UNICODEADDBYTE; goto byte23_a0_bf; case BYTE_E1_EC: UNICODEADDBYTE; goto byte_penultimate_80_bf; case 0xED: UNICODEADDBYTE; goto byte23_80_9f; case 0xEE: UNICODEADDBYTE; goto byte_penultimate_80_bf; case 0xEF: UNICODEADDBYTE; goto byte_ef_80_bf; case 0xF0: UNICODEADDBYTE; goto byte24_90_bf; case BYTE_F1_F3: UNICODEADDBYTE; goto byte24_80_bf; case 0xF4: UNICODEADDBYTE; goto byte24_80_8f; default: FAIL (UTF8_BAD_LEADING_BYTE); } byte_last_80_bf: switch (UNICODENEXTBYTE) { case BYTE_80_BF: UNICODEADDBYTE; goto string_start; default: FAIL (UTF8_BAD_CONTINUATION_BYTE); } byte_ef_b7: switch (UNICODENEXTBYTE) { case BYTE_80_8F_B0_BF: UNICODEADDBYTE; goto string_start; default: if (c >= 0x90 && c <= 0xAF) { FAIL (UNICODE_NOT_CHARACTER); } else { FAIL (UTF8_BAD_CONTINUATION_BYTE); } } byte_last_80_bd: switch (UNICODENEXTBYTE) { case BYTE_80_BD: UNICODEADDBYTE; goto string_start; case 0xBE: case 0xBF: FAIL (UNICODE_NOT_CHARACTER); default: FAIL (UTF8_BAD_CONTINUATION_BYTE); } byte_penultimate_80_bf: switch (UNICODENEXTBYTE) { case BYTE_80_BF: UNICODEADDBYTE; goto byte_last_80_bf; default: FAIL (UTF8_BAD_CONTINUATION_BYTE); } byte_ef_80_bf: switch (UNICODENEXTBYTE) { case BYTE_80_B6_B8_BF: UNICODEADDBYTE; goto byte_last_80_bd; case 0xB7: UNICODEADDBYTE; /* FDD0 - FDE7 */ goto byte_ef_b7; default: FAIL (UTF8_BAD_CONTINUATION_BYTE); } byte24_90_bf: switch (UNICODENEXTBYTE) { case BYTE_90_BF: UNICODEADDBYTE; goto byte_penultimate_80_bf; default: FAIL (UTF8_BAD_CONTINUATION_BYTE); } byte23_80_9f: switch (UNICODENEXTBYTE) { case BYTE_80_9F: UNICODEADDBYTE; goto byte_last_80_bf; default: if (c >= 0xA0 && c <= 0xBF) { FAIL (UNICODE_SURROGATE_PAIR); } else { FAIL (UTF8_BAD_CONTINUATION_BYTE); } } byte23_a0_bf: switch (UNICODENEXTBYTE) { case BYTE_A0_BF: UNICODEADDBYTE; goto byte_last_80_bf; default: FAIL (UTF8_BAD_CONTINUATION_BYTE); } byte24_80_bf: switch (UNICODENEXTBYTE) { case BYTE_80_BF: UNICODEADDBYTE; goto byte_ef_80_bf; default: FAIL (UTF8_BAD_CONTINUATION_BYTE); } byte24_80_8f: switch (UNICODENEXTBYTE) { case BYTE_80_8F: UNICODEADDBYTE; goto byte_ef_80_bf; default: if (c >= 0x90) { FAIL (UNICODE_TOO_BIG); } else { FAIL (UTF8_BAD_CONTINUATION_BYTE); } } } #define REJECT_FE_FF(c) \ if (c == 0xFF || c == 0xFE) { \ return UNICODE_NOT_CHARACTER; \ } /* Make "* ptr" point to the start of the first UTF-8 character after its initial value. This assumes that there are at least four bytes which can be read, and that "* ptr" points to valid UTF-8. If "** ptr" does not have its top bit set, 00xx_xxxx, this does not change the value of "* ptr", and it returns UNICODE_OK. If "** ptr" has its top two bits set, 11xx_xxxx, this does not change the value of "* ptr" and it returns UNICODE_OK. If "**ptr" has its top bit set but its second-to-top bit unset, 10xx_xxxx, so it is the second, third, or fourth byte of a multibyte sequence, "* ptr" is incremented until either "** ptr" is a valid first byte of a UTF-8 sequence, or too many bytes have passed for it to be valid UTF-8. If too many bytes have passed, UTF8_BAD_CONTINUATION_BYTE is returned and "*ptr" is left unchanged. If a valid UTF-8 first byte was found, either 11xx_xxxx or 00xx_xxxx, UNICODE_OK is returned, and "*ptr" is set to the address of the valid byte. Nul bytes (bytes containing zero) are considered valid. If any of the bytes read contains invalid UTF-8 bytes 0xFE and 0xFF, the error code UNICODE_NOT_CHARACTER is returned and "*ptr" is left unchanged. */ int32_t trim_to_utf8_start (const uint8_t ** ptr) { const uint8_t * p = *ptr; uint8_t c; int32_t i; c = * p; REJECT_FE_FF (c); /* 0xC0 = 1100_0000. */ c &= 0xC0; if (c == 0xC0 || c == 0x00) { return UNICODE_OK; } for (i = 0; i < UTF8_MAX_LENGTH - 1; i++) { c = p[i]; REJECT_FE_FF (c); if ((c & 0x80) != 0x80 || (c & 0x40) != 0) { * ptr = p + i; return UNICODE_OK; } } return UTF8_BAD_CONTINUATION_BYTE; } /* Given a return value "code" which is negative or zero, return a string which describes what the return value means. Positive non-zero return values never indicate errors or statuses in this library. */ const char * unicode_code_to_error (int32_t code) { switch (code) { case UTF8_BAD_LEADING_BYTE: return "The leading byte of a UTF-8 sequence was invalid"; case UTF8_BAD_CONTINUATION_BYTE: return "A continuation byte of a UTF-8 sequence was invalid"; case UNICODE_SURROGATE_PAIR: return "A surrogate pair code point could not be converted to UTF-8"; case UNICODE_NOT_SURROGATE_PAIR: return "Input code points did not form a surrogate pair"; case UNICODE_OK: return "Successful completion"; case UNICODE_TOO_BIG: return "A code point was beyond limits"; case UNICODE_NOT_CHARACTER: return "A number ending in hex FFFF or FFFE is not valid Unicode"; case UTF8_NON_SHORTEST: return "A UTF-8 input was not in the shortest form"; case UNICODE_EMPTY_INPUT: return "A byte with value zero was found in UTF-8 input"; default: return "Unknown/invalid error code"; } } /* _____ _ |_ _|__ ___| |_ ___ | |/ _ \/ __| __/ __| | | __/\__ \ |_\__ \ |_|\___||___/\__|___/ */ /* Below this is code for testing which is not normally compiled. Use "make test" to compile the testing version. */ #ifdef TEST #include #include #include "c-tap-test.h" static const uint8_t * utf8 = (uint8_t *) "漢数字ÔÕÖX"; static const uint8_t bad[] = {0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x0}; #define BUFFSIZE 0x100 static void test_ucs2_to_utf8 () { /* Buffer to print utf8 out into. */ uint8_t buffer[BUFFSIZE]; /* Offset into buffer. */ uint8_t * offset; const uint8_t * start = utf8; offset = buffer; while (1) { int32_t unicode; int32_t bytes; const uint8_t * end; unicode = utf8_to_ucs2 (start, & end); if (unicode == UNICODE_EMPTY_INPUT) { break; } if (unicode < 0) { fprintf (stderr, "%s:%d: unexpected error %s converting unicode.\n", __FILE__, __LINE__, unicode_code_to_error (unicode)); // exit ok in test exit (EXIT_FAILURE); } bytes = ucs2_to_utf8 (unicode, offset); TAP_TEST_MSG (bytes > 0, "no bad conversion"); TAP_TEST_MSG (strncmp ((const char *) offset, (const char *) start, bytes) == 0, "round trip OK for %X (%d bytes)", unicode, bytes); start = end; offset += bytes; if (offset - buffer >= BUFFSIZE) { fprintf (stderr, "%s:%d: out of space in buffer.\n", __FILE__, __LINE__); // exit ok exit (EXIT_FAILURE); } } * offset = '\0'; TAP_TEST_MSG (strcmp ((const char *) buffer, (const char *) utf8) == 0, "input %s resulted in identical output %s", utf8, buffer); } static void test_invalid_utf8 () { uint8_t invalid_utf8[UTF8_MAX_LENGTH]; int32_t unicode; int32_t valid; const uint8_t * end; snprintf ((char *) invalid_utf8, UTF8_MAX_LENGTH - 1, "%c%c%c", 0xe8, 0xe4, 0xe5); unicode = utf8_to_ucs2 (invalid_utf8, & end); TAP_TEST_MSG (unicode == UTF8_BAD_CONTINUATION_BYTE, "invalid UTF-8 gives incorrect result"); valid = valid_utf8 (invalid_utf8, strlen ((char *) invalid_utf8)); TAP_TEST_MSG (valid == UTF8_INVALID, "Invalid UTF-8 fails valid_utf8"); } static void test_surrogate_pairs () { int32_t status; int32_t hi; int32_t lo; int32_t rt; /* This is the wide character space, which does not require representation as a surrogate pair. */ int32_t nogood = 0x3000; /* Two examples from the Wikipedia article on UTF-16 https://en.wikipedia.org/w/index.php?title=UTF-16&oldid=744329865#Examples. */ int32_t wikipedia_1 = 0x10437; int32_t wikipedia_2 = 0x24b62; /* An example from the JSON RFC http://rfc7159.net/rfc7159#rfc.section.7 */ int32_t json_spec = 0x1D11E; status = unicode_to_surrogates (nogood, & hi, & lo); TAP_TEST_MSG (status == UNICODE_NOT_SURROGATE_PAIR, "low value to surrogate pair breaker returns error"); status = unicode_to_surrogates (wikipedia_1, & hi, & lo); TAP_TEST_MSG (status == UNICODE_OK, "Ok with %X", wikipedia_1); TAP_TEST_MSG (hi == 0xD801, "Got expected %X == 0xD801", hi); TAP_TEST_MSG (lo == 0xDC37, "Got expected %X == 0xDC37", lo); rt = surrogates_to_unicode (hi, lo); TAP_TEST_MSG (rt == wikipedia_1, "Round trip %X == initial %X", rt, wikipedia_1); status = unicode_to_surrogates (wikipedia_2, & hi, & lo); TAP_TEST_MSG (status == UNICODE_OK, "Ok with %X", wikipedia_1); TAP_TEST_MSG (hi == 0xD852, "Got expected %X == 0xD852", hi); TAP_TEST_MSG (lo == 0xDF62, "Got expected %X == 0xDF62", lo); rt = surrogates_to_unicode (hi, lo); TAP_TEST_MSG (rt == wikipedia_2, "Round trip %X == initial %X", rt, wikipedia_2); status = unicode_to_surrogates (json_spec, & hi, & lo); TAP_TEST_MSG (status == UNICODE_OK, "Ok with %X", json_spec); TAP_TEST_MSG (hi == 0xD834, "Got expected %X == 0xD834", hi); TAP_TEST_MSG (lo == 0xDd1e, "Got expected %X == 0xDD1e", lo); rt = surrogates_to_unicode (hi, lo); TAP_TEST_MSG (rt == json_spec, "Round trip %X == initial %X", rt, json_spec); } /* Test sending various bytes into "utf8_bytes" and seeing whether the return value is what we expected. */ static void test_utf8_bytes () { struct tub { int32_t first; int32_t expect; } tests[] = { {'a', 1}, {0xb0, UTF8_BAD_LEADING_BYTE}, {0xc2, 2}, {0xff, UTF8_BAD_LEADING_BYTE}, }; int32_t n_tests = sizeof (tests) / sizeof (struct tub); int32_t i; for (i = 0; i < n_tests; i++) { /* Expected bytes. */ int32_t xbytes; int32_t firstbyte; firstbyte = tests[i].first; xbytes = utf8_bytes (firstbyte); TAP_TEST_MSG (xbytes == tests[i].expect, "Got %d (%d) with input %d", xbytes, tests[i].expect, firstbyte); } } /* Test the conversion from utf-8 to ucs-2 (UTF-16). */ static void test_utf8_to_ucs2 () { const uint8_t * start = utf8; while (*start) { int32_t unicode; const uint8_t * end; unicode = utf8_to_ucs2 (start, & end); TAP_TEST_MSG (unicode > 0, "no bad value at %s", start); printf ("# %s is %04X, length is %d\n", start, unicode, (int) (end - start)); start = end; } } /* Test counting of unicode characters. */ static void test_unicode_count_chars () { int32_t cc; cc = unicode_count_chars (utf8); TAP_TEST_MSG (cc == 7, "unicode_count_chars gets seven characters for utf8"); cc = unicode_count_chars_fast (utf8); TAP_TEST_MSG (cc == 7, "unicode_count_chars_fast gets seven characters for utf8"); } static void test_valid_utf8 () { int32_t valid; valid = valid_utf8 (utf8, strlen ((const char *) utf8)); TAP_TEST_MSG (valid == UTF8_VALID, "Valid UTF-8 passes valid_utf8"); } static void test_trim_to_utf8_start () { int32_t status; const uint8_t * p; /* Invalid UTF-8. */ /* Valid UTF-8. */ uint8_t good[] = "化苦"; uint8_t good2[] = "化abc"; p = bad; status = trim_to_utf8_start (& p); TAP_TEST_MSG (status == UTF8_BAD_CONTINUATION_BYTE, "Non-UTF-8 causes error"); TAP_TEST_MSG (p == bad, "Did not change pointer"); p = good + 1; status = trim_to_utf8_start (& p); TAP_TEST_MSG (status == UNICODE_OK, "Got TAP_TEST_MSG result"); TAP_TEST_MSG (p != good + 1, "Moved p"); TAP_TEST_MSG (p == good + 3, "Moved p to the right position"); p = good2 + 1; status = trim_to_utf8_start (& p); TAP_TEST_MSG (status == UNICODE_OK, "Got TAP_TEST_MSG result"); TAP_TEST_MSG (p != good2 + 1, "Moved p"); TAP_TEST_MSG (p == good2 + 3, "Moved p to the right position"); } static void test_constants () { TAP_TEST (UNICODE_UTF8_4 > UNICODE_MAXIMUM); } static void test_utf8_validate () { int r; int l; utf8_info_t info; r = validate_utf8 ((const uint8_t *) "", 0, & info); TAP_TEST_EQUAL (r, UNICODE_OK); TAP_TEST_EQUAL (info.len_read, 0); TAP_TEST_EQUAL (info.runes_read, 0); l = strlen ((const char *) utf8); r = validate_utf8 (utf8, l, & info); TAP_TEST_EQUAL (r, UNICODE_OK); TAP_TEST_EQUAL (info.len_read, l); TAP_TEST_EQUAL (info.runes_read, 7); l = strlen ((const char *) bad); r = validate_utf8 (bad, l, & info); TAP_TEST (r != UNICODE_OK); } int main () { test_utf8_to_ucs2 (); test_ucs2_to_utf8 (); test_invalid_utf8 (); test_unicode_count_chars (); test_surrogate_pairs (); test_utf8_bytes (); test_valid_utf8 (); test_trim_to_utf8_start (); test_constants (); test_utf8_validate (); TAP_PLAN; } #endif /* def TEST */ JSON-Parse-0.61/lib/000755 001751 001751 00000000000 14011073360 013376 5ustar00benben000000 000000 JSON-Parse-0.61/Parse.xs000644 001751 001751 00000012426 14010625451 014266 0ustar00benben000000 000000 #include "EXTERN.h" #include "perl.h" #include #include "XSUB.h" /* TESTRANDOM should never be defined in the code released to CPAN. That this is not defined is tested in "xt/testrandom-invalid.t". */ //#define TESTRANDOM #ifdef TESTRANDOM #include #endif /* def TESTRANDOM */ /* A structure representing the "null" in JSON. Although we're now using PL_sv_yes and PL_sv_no, we don't use PL_sv_undef, because perldoc perlguts says it's a bad idea. */ static SV * json_null; /* Code starts here. */ #include "unicode.h" #include "unicode.c" #include "json-common.c" #define PERLING #include "json-perl.c" #undef PERLING #define TOKENING #include "json-perl.c" #undef TOKENING #include "json-perl.c" #include "json-entry-points.c" #ifdef TESTRANDOM #include "json-random-test.c" #endif /* def TESTRANDOM */ #include "json-whitespace.c" #ifdef NOPERL #error "Cannot define NOPERL error when compiling Perl version" #endif /* def NOPERL */ typedef json_parse_t * JSON__Parse; typedef json_token_t * JSON__Tokenize; MODULE=JSON::Parse PACKAGE=JSON::Parse PROTOTYPES: DISABLE BOOT: { json_null = get_sv ("JSON::Parse::null", GV_ADD); SvREADONLY_on (json_null); } SV * parse_json (json) SV * json; CODE: RETVAL = parse (json); OUTPUT: RETVAL SV * parse_json_safer (json) SV * json; CODE: RETVAL = parse_safe (json); OUTPUT: RETVAL void assert_valid_json (json) SV * json; CODE: validate (json, 0); JSON::Parse new (char * class, ...) CODE: if (! class) { croak ("no class"); } Newxz (RETVAL, 1, json_parse_t); json_parse_init (RETVAL); OUTPUT: RETVAL SV * run_internal (parser, json) JSON::Parse parser SV * json CODE: RETVAL = json_parse_run (parser, json); OUTPUT: RETVAL void check (parser, json) JSON::Parse parser SV * json CODE: check (parser, json); void DESTROY (parser) JSON::Parse parser; CODE: json_parse_free (parser); void set_true (parser, user_true) JSON::Parse parser; SV * user_true; CODE: json_parse_set_true (parser, user_true); void set_false (parser, user_false) JSON::Parse parser; SV * user_false; CODE: json_parse_set_false (parser, user_false); void set_null (parser, user_null) JSON::Parse parser; SV * user_null; CODE: json_parse_set_null (parser, user_null); void delete_true (parser) JSON::Parse parser; CODE: json_parse_delete_true (parser); void delete_false (parser) JSON::Parse parser; CODE: json_parse_delete_false (parser); void copy_literals (parser, onoff) JSON::Parse parser; SV * onoff; CODE: json_parse_copy_literals (parser, onoff); void delete_null (parser) JSON::Parse parser; CODE: json_parse_delete_null (parser); void no_warn_literals (parser, onoff) JSON::Parse parser; SV * onoff; CODE: parser->no_warn_literals = SvTRUE (onoff) ? 1 : 0; void detect_collisions (parser, onoff) JSON::Parse parser; SV * onoff; CODE: parser->detect_collisions = SvTRUE (onoff) ? 1 : 0; void diagnostics_hash (parser, onoff) JSON::Parse parser; SV * onoff; CODE: #if PERL_VERSION > 12 parser->diagnostics_hash = SvTRUE (onoff) ? 1 : 0; #else warn ("diagnostics_hash () requires Perl 5.14 or later; this is 5.%d", PERL_VERSION); #endif void warn_only (parser, onoff) JSON::Parse parser; SV * onoff; CODE: parser->warn_only = SvTRUE (onoff) ? 1 : 0; int get_warn_only (parser) JSON::Parse parser; CODE: if (parser->warn_only) { RETVAL = 1; } else { RETVAL = 0; } OUTPUT: RETVAL void set_max_depth (json, max_depth) JSON::Parse json; int max_depth; CODE: if (max_depth < 0) { croak ("Invalid max depth %d", max_depth); } json->max_depth = max_depth; int get_max_depth (json) JSON::Parse json; CODE: RETVAL = json->max_depth; if (json->max_depth == 0) { RETVAL = JSON_PARSE_DEFAULT_MAX_DEPTH; } OUTPUT: RETVAL #ifdef TESTRANDOM int random_json () CODE: RETVAL = random_json (); OUTPUT: RETVAL #endif /* def TESTRANDOM */ void upgrade_utf8 (parser, onoff) JSON::Parse parser; SV * onoff; CODE: parser->upgrade_utf8 = SvTRUE (onoff) ? 1 : 0; MODULE=JSON::Parse PACKAGE=JSON::Tokenize JSON::Tokenize tokenize_json (json) SV * json; CODE: RETVAL = tokenize (json); RETVAL->blessed = 1; OUTPUT: RETVAL JSON::Tokenize tokenize_child (token) JSON::Tokenize token CODE: RETVAL = 0; if (token->child) { RETVAL = token->child; RETVAL->blessed = 1; } OUTPUT: RETVAL JSON::Tokenize tokenize_next (token) JSON::Tokenize token CODE: if (token->next) { RETVAL = token->next; RETVAL->blessed = 1; } else { RETVAL = 0; } OUTPUT: RETVAL int tokenize_start (token) JSON::Tokenize token CODE: RETVAL = token->start; OUTPUT: RETVAL int tokenize_end (token) JSON::Tokenize token CODE: RETVAL = token->end; OUTPUT: RETVAL SV * tokenize_type (token) JSON::Tokenize token CODE: /* Only set this to the real value if everything is OK. */ RETVAL = & PL_sv_undef; if (token->type > json_token_invalid && token->type < n_json_tokens) { RETVAL = newSVpv (token_names[token->type], 0); } else { warn ("Invalid JSON token type %d", token->type); } OUTPUT: RETVAL void DESTROY (token) JSON::Tokenize token CODE: tokenize_free (token); MODULE=JSON::Parse PACKAGE=JSON::Whitespace SV * strip_whitespace (tokens, json) JSON::Tokenize tokens; SV * json; CODE: RETVAL = strip_whitespace (tokens, json); OUTPUT: RETVAL JSON-Parse-0.61/t/000755 001751 001751 00000000000 14011073360 013073 5ustar00benben000000 000000 JSON-Parse-0.61/typemap000644 001751 001751 00000000135 12614536151 014242 0ustar00benben000000 000000 json_parse_t * T_PTROBJ JSON::Parse T_PTROBJ json_token_t * T_PTROBJ JSON::Tokenize T_PTROBJ JSON-Parse-0.61/README000644 001751 001751 00000006454 14011073353 013523 0ustar00benben000000 000000 _ ____ ___ _ _ ____ | / ___| / _ \| \ | | _ _ | _ \ __ _ _ __ ___ ___ _ | \___ \| | | | \| | (_|_) | |_) / _` | '__/ __|/ _ \ | |_| |___) | |_| | |\ | _ _ | __/ (_| | | \__ \ __/ \___/|____/ \___/|_| \_| (_|_) |_| \__,_|_| |___/\___| This is the README for JSON::Parse version 0.61. JSON::Parse is a "module" for the Perl computer programming language, a library of computer code to install on a computer. This document contains four sections: 1. About - what the module does 2. Documentation - how to learn more about the module 3. Installation - how to install this module on a computer 4. Help - what to do if you get stuck ----------------------------------------------------------------------------- 1. ABOUT JSON::Parse - Parse JSON A module for parsing JSON. (JSON means "JavaScript Object Notation" and it is specified in "RFC 8259".) JSON::Parse offers the function "parse_json", which takes a string containing JSON, and returns an equivalent Perl structure. It also offers validation of JSON via "valid_json", which returns true or false depending on whether the JSON is correct or not, and "assert_valid_json", which produces a descriptive fatal error if the JSON is invalid. A function "read_json" reads JSON from a file, and there is a safer version of "parse_json" called "parse_json_safe" which doesn't throw exceptions. For special cases of parsing, there are also methods "new" and "parse", which create a JSON parsing object and run it on text. See "METHODS". JSON::Parse accepts only UTF-8 as input. See "UTF-8 only" and "Handling of Unicode". ----------------------------------------------------------------------------- 2. DOCUMENTATION You can read the documentation for the module online at the following website: * http://metacpan.org/release/JSON-Parse (This link goes to the latest version of the module.) After installing the module, you can read the documentation on your computer using perldoc JSON::Parse ----------------------------------------------------------------------------- 3. INSTALLATION This module requires Perl version 5.8.9 or later. To install the module from CPAN, use cpan JSON::Parse If you have the App::cpanminus installer, you may prefer cpanm JSON::Parse To install the module from the source file, JSON-Parse-0.61.tar.gz, follow this sequence of commands: tar xfz JSON-Parse-0.61.tar.gz cd JSON-Parse-0.61 perl Makefile.PL make make install If you want to test the module before installing it, use "make test" after "make" and before "make install". ----------------------------------------------------------------------------- 4. HELP To get help with the module, you can email the author, Ben Bullock, at . If you think there is a problem in the module, you can report a bug at , or if you want to alter the source code of JSON::Parse, try the public repository on github at . ----------------------------------------------------------------------------- This README was written on Thu Feb 11 09:14:35 2021. ----------------------------------------------------------------------------- JSON-Parse-0.61/Changes000644 001751 001751 00000012110 14011073306 014116 0ustar00benben000000 000000 0.61 2021-02-11 * Add upgrade_utf8 method 0.60 2021-01-26 * Add "read" method to read a file from an object * Rename "run" to "parse" (old name is still OK) * "tokenize_child" no longer returns parent on failure * Add JSON::Whitespace (manipulate JSON whitespace) to the module 0.59 2021-01-26 * "json_file_to_perl" renamed "read_json" * Documentation work -- Benchmarks section updated with latest versions, add Cpanel::JSON::XS -- Add Metacpan vote ratings to other CPAN modules section -- Sections reordered to put methods above parsing details 0.58 2021-01-01 * Protect against stack overflows by having maximum parsing depth -- set_max_depth, get_max_depth methods added * Documentation updated -- JSON RFC changed to 8259 -- Discussion of Unicode tests in JSON Test Suite 0.57 2020-07-09 * Bug fix for long strings 0.56 2020-02-06 * Allow build on Solaris and SunOS 0.55_02 2019-01-24 * For testing of Daxim segfault through CPAN testers 0.55 2017-10-21 * Versions synchronised * Memory problem on Cygwin addressed 0.54 2017-10-20 * Refuse to support Solaris 0.53 2017-10-18 * Windows build fix 0.52 2017-10-18 * Removed unnecessary test 0.50 2017-10-18 * tokenize_text method added to JSON::Tokenize 0.49 2016-11-26 * Behaviour for undefined, whitespace-only inputs was altered * JSON Parsing Test Suite compliance 0.48 2016-11-20 * Add "check" method for the benefit of JSON::Repair 0.47 2016-11-16 * Bug fixes for Perl 12 and earlier regarding diagnostics_hash method 0.46 2016-11-16 * Remove experimental feature $json_diagnostics * Add diagnostics_hash method as a replacement for $json_diagnostics * Better warnings for parse_json_safe * Add the examples directory files to the CPAN distribution * Documentation rewrites 0.45 2016-11-15 * Remove non-functioning JSON::Whitespace module from CPAN distribution 0.44 2016-11-13 * Add a primitive interface to JSON::Tokenize * Document the JSON::Tokenize interface 0.43 2016-11-05 * Remove validjson dependence on Path::Tiny. * Produce better outputs from validjson script 0.42 2016-08-22 * Long string copying bug fixed. 0.41 2016-06-14 * Implement "warn_only" * Documentation clarifications 0.40 2016-05-22 * Compilation warning, possible memory leaks (SHLOMIF) 0.39 2015-11-18 * Fix for 64 bit * Changed error message documentation to machine-readable 0.38 2015-10-30 * Add object interface for more flexible parsing. * Add "parse_json_safe" with no exceptions and hash collision detection. * Add choice to copy literals. * Better documentation of changes and examples 0.37 2015-10-29 * Remove $JSON::Parse::true and false, use &PL_sv_yes and &PL_sv_no instead. 0.36 2015-10-24 * Document and install "validjson" script. * Document the "$JSON::Parse::json_diagnostics" global variable. 0.35 2015-10-19 * Minor documentation changes 0.34 2015-10-19 * Document key collisions * Extend SEE ALSO section on other JSON modules on CPAN. 0.33 2015-10-09 * Memory leak plug * Small speed improvements * Minor bug about commas after bare values * Documentation rewritten to clarify changes in 0.32 and testing. 0.32 2015-07-29 * Update to accept bare values, per RFC 7159. 0.31 2014-12-07 * Make sure to cast to STRLEN in newSVpv(n) calls. * Remove reference to deleted Google Group from documentation. 0.30 2014-03-23 * Prototypes removed from functions * Documentation rewrites * Changes to compile on Darwin OS * Use "assert_valid_json" in DIAGNOSTICS section of pod. 0.29 2013-12-25 * UTF-8 validation in strings * Number, object, array parsing improvements * Fewer error messages & documented (see DIAGNOSTICS in pod) * Testing done with random input to root out problems (see TESTING in pod) 0.28 2013-12-12 * Change function name from "validate_json" to "assert_valid_json" (old name is still accepted). * More tests * Style of error messages changed for "end of input" errors. * More error checking for invalid \u escapes. 0.27 2013-12-11 * More thorough checking of input for format errors (missing commas etc.) * Check for bytes 0x00 - 0x1F in strings and throw error if present * Don't upgrade \u0000 - \u0080 to character strings * Documentation rewrites 0.26 2013-12-10 * Remove core file from distribution. 0.25 2013-12-10 * Fix bug where missing commas were not detected. * Now parses \uXXXX constructions. * "valid_json" now back to being a fast validator. * Add new "validate_json", a fast, exception throwing validator. * Surrogate pairs now handled (tentative) * Upgrade \uXXXX to character string. 0.24 2013-11-27 * Update "Changes" to conform to CPAN::Changes::Spec 0.23 2013-09-15 0.22 2013-08-26 * Typos and minor details in documentation. * Changes to compile on Microsoft C compiler. 0.21 2013-08-07 * Changes documented. 0.20 2013-08-06 * From-scratch rewrite of module. 0.19 2013-03-15 0.18 2013-03-12 0.17 2013-03-10 0.16 2013-03-08 0.15 2013-03-08 0.14 2012-06-12 0.13 2012-06-12 0.12 2012-06-12 0.11 2012-04-13 0.10 2012-03-17 0.09 2012-03-16 0.07 2011-06-09 0.06 2011-06-02 0.05 2011-05-31 * Changed name from JSON::Argo to JSON::Parse 0.04 2011-05-29 0.03 2011-05-25 0.02 2011-05-25 0.01 2011-05-23 JSON-Parse-0.61/json-perl.c000644 001751 001751 00000063207 14010627445 014725 0ustar00benben000000 000000 /* The C part is broken into three pieces, "json-common.c", "json-perl.c", and "json-entry-points.c". This file contains the "Perl" stuff, for example if we have a string, the stuff to convert it into a Perl hash key or a Perl scalar is in this file. */ /* There are two routes through the code, the PERLING route and the non-PERLING route. If we go via the non-PERLING route, we never create or alter any Perl-related stuff, we just parse each byte and possibly throw an error. This makes validation faster. */ #ifdef PERLING /* We are creating Perl structures from the JSON. */ #define PREFIX(x) x #define SVPTR SV * #define SETVALUE value = #elif defined(TOKENING) /* We are just tokenizing the JSON. */ #define PREFIX(x) tokenize_ ## x #define SVPTR json_token_t * #define SETVALUE value = #else /* not def PERLING/TOKENING */ /* Turn off everything to do with creating Perl things. */ #define PREFIX(x) valid_ ## x #define SVPTR void #define SETVALUE #endif /* def PERLING */ /* This is what INT_MAX_DIGITS is, but #defining it like this causes huge amounts of unnecessary calculation, so this is commented out. #define INT_MAX_DIGITS ((int) (log (INT_MAX) / log (10)) - 1) */ /* The maximum digits we allow an integer before throwing in the towel and returning a Perl string type. */ #define INT_MAX_DIGITS 8 #define USEDIGIT guess = guess * 10 + (c - '0') static INLINE SVPTR PREFIX (number) (json_parse_t * parser) { /* End marker for strtod. */ char * end; /* Start marker for strtod. */ char * start; /* A guess for integer numbers. */ int guess; /* The parsed character itself, the cause of our motion. */ unsigned char c; /* If it has exp or dot in it. */ double d; /* Negative number. */ int minus; /* When this is called, it means that a byte indicating a number was found. We need to re-examine that byte as a number. */ parser->end--; start = (char *) parser->end; #define FAILNUMBER(err) \ if (STRINGEND && \ parser->top_level_value && \ c == '\0') { \ goto exp_number_end; \ } \ parser->bad_byte = parser->end - 1; \ parser->error = json_error_ ## err; \ parser->bad_type = json_number; \ parser->bad_beginning = \ (unsigned char*) start; \ failbadinput (parser) #define NUMBEREND \ WHITESPACE: \ case ']': \ case '}': \ case ',' #define XNUMBEREND (XCOMMA|XWHITESPACE|parser->end_expected) guess = 0; minus = 0; switch (NEXTBYTE) { case DIGIT19: guess = c - '0'; goto leading_digit19; case '0': goto leading_zero; case '-': minus = 1; goto leading_minus; default: parser->expected = XDIGIT | XMINUS; FAILNUMBER (unexpected_character); } leading_digit19: switch (NEXTBYTE) { case DIGIT: USEDIGIT; goto leading_digit19; case '.': goto dot; case 'e': case 'E': goto exp; case NUMBEREND: goto int_number_end; default: parser->expected = XDIGIT | XDOT | XEXPONENTIAL | XNUMBEREND; if (parser->top_level_value) { parser->expected &= ~XCOMMA; } FAILNUMBER (unexpected_character); } leading_zero: switch (NEXTBYTE) { case '.': /* "0." */ goto dot; case 'e': case 'E': /* "0e" */ goto exp; case NUMBEREND: /* "0" */ goto int_number_end; default: parser->expected = XDOT | XEXPONENTIAL | XNUMBEREND; if (parser->top_level_value) { parser->expected &= ~XCOMMA; } FAILNUMBER (unexpected_character); } leading_minus: switch (NEXTBYTE) { case DIGIT19: USEDIGIT; goto leading_digit19; case '0': goto leading_zero; default: parser->expected = XDIGIT; FAILNUMBER (unexpected_character); } /* Things like "5." are not allowed so there is no NUMBEREND here. */ dot: switch (NEXTBYTE) { case DIGIT: goto dot_digits; default: parser->expected = XDIGIT; FAILNUMBER (unexpected_character); } /* We have as much as 5.5 so we can stop. */ dot_digits: switch (NEXTBYTE) { case DIGIT: goto dot_digits; case 'e': case 'E': goto exp; case NUMBEREND: goto exp_number_end; default: parser->expected = XDIGIT | XNUMBEREND | XEXPONENTIAL; if (parser->top_level_value) { parser->expected &= ~XCOMMA; } FAILNUMBER (unexpected_character); } /* Things like "10E" are not allowed so there is no NUMBEREND here. */ exp: switch (NEXTBYTE) { case '-': case '+': goto exp_sign; case DIGIT: goto exp_digits; default: parser->expected = XDIGIT | XMINUS | XPLUS; FAILNUMBER (unexpected_character); } exp_sign: switch (NEXTBYTE) { case DIGIT: goto exp_digits; default: parser->expected = XDIGIT; FAILNUMBER (unexpected_character); } /* We have as much as "3.0e1" or similar. */ exp_digits: switch (NEXTBYTE) { case DIGIT: goto exp_digits; case NUMBEREND: goto exp_number_end; default: parser->expected = XDIGIT | XNUMBEREND; if (parser->top_level_value) { parser->expected &= ~XCOMMA; } FAILNUMBER (unexpected_character); } exp_number_end: parser->end--; #ifdef PERLING d = strtod (start, & end); #else strtod (start, & end); #endif if ((unsigned char *) end == parser->end) { /* Success, strtod worked as planned. */ #ifdef PERLING return newSVnv (d); #elif defined (TOKENING) return json_token_new (parser, (unsigned char *) start, parser->end, json_token_number); #else return; #endif } else { /* Failure, strtod rejected the number. */ goto string_number_end; } int_number_end: parser->end--; if (parser->end - (unsigned char *) start < INT_MAX_DIGITS + minus) { if (minus) { guess = -guess; } /* printf ("number debug: '%.*s': %d\n", parser->end - (unsigned char *) start, start, guess); */ #ifdef PERLING return newSViv (guess); #elif defined (TOKENING) return json_token_new (parser, (unsigned char *) start, parser->end - 1, json_token_number); #else return; #endif } else { goto string_number_end; } string_number_end: /* We could not convert this number using a number conversion routine, so we are going to convert it to a string. This might happen with ridiculously long numbers or something. The JSON standard doesn't explicitly disallow integers with a million digits. */ #ifdef PERLING return newSVpv (start, (STRLEN) ((char *) parser->end - start)); #elif defined (TOKENING) return json_token_new (parser, (unsigned char *) start, parser->end - 1, json_token_number); #else return; #endif } #ifdef PERLING /* This copies our on-stack buffer "buffer" of size "size" into the end of a Perl SV called "string". */ #define COPYBUFFER { \ if (! string) { \ string = newSVpvn ((char *) buffer, size); \ } \ else { \ char * svbuf; \ STRLEN cur = SvCUR (string); \ if (SvLEN (string) <= cur + size) { \ SvGROW (string, cur + size); \ } \ svbuf = SvPVX (string); \ memcpy (svbuf + cur, buffer, size); \ SvCUR_set (string, cur + size); \ } \ } /* The size of the on-stack buffer. */ #define BUFSIZE 0x1000 /* We need a safety margin when dealing with the buffer, for example if we hit a Unicode \uabcd escape which needs to be decoded, we need to have enough bytes to write into the buffer. */ #define MARGIN 0x10 /* Speedup hack, a special "get_string" for Perl parsing which doesn't use parser->buffer but its own buffer on the stack. */ static INLINE SV * perl_get_string (json_parse_t * parser, STRLEN prefixlen) { unsigned char * b; unsigned char c; unsigned char * start; unsigned char buffer[BUFSIZE]; STRLEN size; SV * string; string = 0; start = parser->end; b = buffer; if (prefixlen > 0) { /* The string from parser->end to parser->end + prefixlen has already been checked and found not to contain the end of the string or any escapes, so we just copy the memory straight into the buffer. This was supposed to speed things up, but it didn't seem to. However this presumably cannot hurt either. */ if (prefixlen > BUFSIZE - MARGIN) { /* This is to account for the very unlikely case that the key of the JSON object is more than BUFSIZE - MARGIN bytes long and has an escape after more than BUFSIZE - MARGIN bytes. */ prefixlen = BUFSIZE - MARGIN; } memcpy (buffer, parser->end, prefixlen); start += prefixlen; } string_start: size = b - buffer; if (size >= BUFSIZE - MARGIN) { /* Spot-check for an overflow. */ if (STRINGEND) { STRINGFAIL (unexpected_end_of_input); } /* "string_start" is a label for a goto which is applied until we get to the end of the string, so size keeps getting larger and larger. Now the string being parsed has proved to be too big for our puny BUFSIZE buffer, so we copy the contents of the buffer into the nice Perl scalar. */ COPYBUFFER; /* Set the point of copying bytes back to the beginning of buffer. We don't reset the memory in buffer. */ b = buffer; size = b - buffer; } NEXTBYTE; /* "if" statements seem to compile to something marginally faster than "switch" statements, for some reason. */ if (c < 0x20) { ILLEGALBYTE; } else if (c >= 0x20 && c <= 0x80) { /* For some reason or another, putting the following "if" statements after the above one results in about 4% faster code than putting them before it. */ if (c == '"') { goto string_end; } if (c == '\\') { HANDLE_ESCAPES (parser->end, start - 1); goto string_start; } * b++ = c; goto string_start; } else { /* Resort to switch statements for the UTF-8 stuff. This actually also contains statements to handle ASCII but they will never be executed. */ switch (c) { #define ADDBYTE * b = c; b++ #define startofutf8string start #include "utf8-byte-one.c" default: /* We have to give up, this byte is too mysterious for our weak minds. */ ILLEGALBYTE; } } string_end: if (STRINGEND) { STRINGFAIL (unexpected_end_of_input); } COPYBUFFER; return string; /* The rest of the UTF-8 stuff goes in here. */ #include "utf8-next-byte.c" #undef ADDBYTE goto string_end; } #endif /* PERLING */ static SVPTR PREFIX (string) (json_parse_t * parser) { unsigned char c; #ifdef PERLING SV * string; STRLEN len; STRLEN prefixlen; #elif defined (TOKENING) json_token_t * string; int len; #else int len; #endif unsigned char * start; start = parser->end; len = 0; /* First of all, we examine the string to work out how long it is and to look for escapes. If we find them, we go to "contains_escapes" and go back and do all the hard work of converting the escapes into the right things. If we don't find any escapes, we just use "start" and "len" and copy the string from inside "input". This is a trick to increase the speed of processing. */ string_start: switch (NEXTBYTE) { case '"': goto string_end; case '\\': goto contains_escapes; #define ADDBYTE len++ #include "utf8-byte-one.c" /* Not a fall through. */ case BADBYTES: ILLEGALBYTE; } /* Parsing of the string ended due to a \0 byte flipping the "while" switch and we dropped into this section before reaching the string's end. */ ILLEGALBYTE; #include "utf8-next-byte.c" #undef ADDBYTE string_end: #ifdef PERLING /* Our string didn't contain any escape sequences, so we can just make a new SV * by copying the string from "start", the old position within the thing we're parsing to start + len. */ string = newSVpvn ((char *) start, len); #elif defined (TOKENING) string = json_token_new (parser, start - 1, start + len, json_token_string); #endif goto string_done; contains_escapes: #ifdef PERLING /* Use "perl_get_string" which keeps the buffer on the stack. Results in a minor speed increase. */ parser->end = start; prefixlen = (STRLEN) (parser->end - start); string = perl_get_string (parser, prefixlen); #elif defined (TOKENING) /* Don't use "len" here since it subtracts the escapes. */ parser->end = start; len = get_string (parser); string = json_token_new (parser, /* Location of first quote. */ start - 1, /* Location of last quote. */ parser->end, json_token_string); #else parser->end = start; len = get_string (parser); #endif string_done: #ifdef PERLING if (parser->unicode || parser->force_unicode || parser->upgrade_utf8) { SvUTF8_on (string); parser->force_unicode = 0; } #endif #if defined (PERLING) || defined (TOKENING) return string; #else return; #endif } #define FAILLITERAL(c) \ parser->expected = XIN_LITERAL; \ parser->literal_char = c; \ parser->bad_beginning = start; \ parser->error = json_error_unexpected_character; \ parser->bad_type = json_literal; \ parser->bad_byte = parser->end - 1; \ failbadinput (parser) static SVPTR PREFIX (literal_true) (json_parse_t * parser) { unsigned char * start; start = parser->end - 1; if (* parser->end++ == 'r') { if (* parser->end++ == 'u') { if (* parser->end++ == 'e') { #ifdef PERLING if (parser->user_true) { return newSVsv (parser->user_true); } else if (parser->copy_literals) { return newSVsv (&PL_sv_yes); } else { return &PL_sv_yes; } #elif defined (TOKENING) return json_token_new (parser, start, parser->end - 1, json_token_literal); #else return; #endif } FAILLITERAL ('e'); } FAILLITERAL ('u'); } FAILLITERAL ('r'); } static SVPTR PREFIX (literal_false) (json_parse_t * parser) { unsigned char * start; start = parser->end - 1; if (* parser->end++ == 'a') { if (* parser->end++ == 'l') { if (* parser->end++ == 's') { if (* parser->end++ == 'e') { #ifdef PERLING if (parser->user_false) { return newSVsv (parser->user_false); } else if (parser->copy_literals) { return newSVsv (&PL_sv_no); } else { return &PL_sv_no; } #elif defined (TOKENING) return json_token_new (parser, start, parser->end - 1, json_token_literal); #else return; #endif } FAILLITERAL ('e'); } FAILLITERAL ('s'); } FAILLITERAL ('l'); } FAILLITERAL ('a'); } static SVPTR PREFIX (literal_null) (json_parse_t * parser) { unsigned char * start; start = parser->end - 1; if (* parser->end++ == 'u') { if (* parser->end++ == 'l') { if (* parser->end++ == 'l') { #ifdef PERLING if (parser->user_null) { return newSVsv (parser->user_null); } else if (parser->copy_literals) { return newSVsv (&PL_sv_undef); } else { SvREFCNT_inc (json_null); return json_null; } #elif defined (TOKENING) return json_token_new (parser, start, parser-> end - 1, json_token_literal); #else return; #endif } FAILLITERAL ('l'); } FAILLITERAL ('l'); } FAILLITERAL ('u'); } static SVPTR PREFIX (object) (json_parse_t * parser); /* Given one character, decide what to do next. This goes in the switch statement in both "object ()" and "array ()". */ #define PARSE(start,expected) \ \ case WHITESPACE: \ goto start; \ \ case '"': \ SETVALUE PREFIX (string) (parser); \ break; \ \ case '-': \ case DIGIT: \ parser->end_expected = expected; \ SETVALUE PREFIX (number) (parser); \ break; \ \ case '{': \ INCDEPTH; \ SETVALUE PREFIX (object) (parser); \ break; \ \ case '[': \ INCDEPTH; \ SETVALUE PREFIX (array) (parser); \ break; \ \ case 'f': \ SETVALUE PREFIX (literal_false) (parser); \ break; \ \ case 'n': \ SETVALUE PREFIX (literal_null) (parser); \ break; \ \ case 't': \ SETVALUE PREFIX (literal_true) (parser); \ break #define FAILARRAY(err) \ parser->bad_byte = parser->end - 1; \ parser->bad_type = json_array; \ parser->bad_beginning = start; \ parser->error = json_error_ ## err; \ failbadinput (parser) /* We have seen "[", so now deal with the contents of an array. At the end of this routine, "parser->end" is pointing one beyond the final "]" of the array. */ static SVPTR PREFIX (array) (json_parse_t * parser) { unsigned char c; unsigned char * start; #ifdef PERLING AV * av; SV * value = & PL_sv_undef; #elif defined (TOKENING) json_token_t * av; json_token_t * prev; json_token_t * value; #endif start = parser->end - 1; #ifdef PERLING av = newAV (); #elif defined (TOKENING) av = json_token_new (parser, start, 0, json_token_array); prev = 0; #endif array_start: switch (NEXTBYTE) { PARSE (array_start, XARRAY_END); case ']': goto array_end; default: parser->expected = VALUE_START | XWHITESPACE | XARRAY_END; FAILARRAY (unexpected_character); } #ifdef PERLING av_push (av, value); #elif defined (TOKENING) prev = json_token_set_child (parser, av, value); #endif /* Accept either a comma or whitespace or the end of the array. */ array_middle: switch (NEXTBYTE) { case WHITESPACE: goto array_middle; case ',': #ifdef TOKENING value = json_token_new (parser, parser->end - 1, parser->end - 1, json_token_comma); prev = json_token_set_next (prev, value); #endif goto array_next; case ']': /* Array with at least one element. */ goto array_end; default: parser->expected = XWHITESPACE | XCOMMA | XARRAY_END; FAILARRAY (unexpected_character); } array_next: switch (NEXTBYTE) { PARSE (array_next, XARRAY_END); default: parser->expected = VALUE_START | XWHITESPACE; FAILARRAY (unexpected_character); } #ifdef PERLING av_push (av, value); #elif defined (TOKENING) prev = json_token_set_next (prev, value); #endif goto array_middle; array_end: DECDEPTH; #ifdef PERLING return newRV_noinc ((SV *) av); #elif defined (TOKENING) /* We didn't know where the end was until now. */ json_token_set_end (parser, av, parser->end - 1); return av; #else return; #endif } #define FAILOBJECT(err) \ parser->bad_byte = parser->end - 1; \ parser->bad_type = json_object; \ parser->bad_beginning = start; \ parser->error = json_error_ ## err; \ failbadinput (parser) /* We have seen "{", so now deal with the contents of an object. At the end of this routine, "parser->end" is pointing one beyond the final "}" of the object. */ static SVPTR PREFIX (object) (json_parse_t * parser) { char c; #ifdef PERLING HV * hv; SV * value; /* This is set to -1 if we want a Unicode key. See "perldoc perlapi" under "hv_store". */ int uniflag; #elif defined (TOKENING) json_token_t * hv; json_token_t * value; json_token_t * prev; #endif string_t key; /* Start of parsing. */ unsigned char * start; start = parser->end - 1; #ifdef PERLING if (parser->unicode || parser->upgrade_utf8) { /* Keys are unicode. */ uniflag = -1; } else { /* Keys are not unicode. */ uniflag = 1; } hv = newHV (); #elif defined (TOKENING) hv = json_token_new (parser, start, 0, json_token_object); prev = 0; #endif hash_start: switch (NEXTBYTE) { case WHITESPACE: goto hash_start; case '}': goto hash_end; case '"': #ifdef TOKENING value = json_token_new (parser, parser->end - 1, 0, json_token_string); /* We only come past the label "hash_start" once, so we don't need to check that there is not already a child. */ json_token_set_child (parser, hv, value); prev = value; #endif get_key_string (parser, & key); #ifdef TOKENING /* We didn't know where the end of the string was until now so we wait until after "get_key_string" to set the end. */ json_token_set_end (parser, value, parser->end - 1); #endif goto hash_next; default: parser->expected = XWHITESPACE | XSTRING_START | XOBJECT_END; FAILOBJECT (unexpected_character); } hash_middle: /* We are in the middle of a hash. We have seen a key:value pair, and now we're looking for either a comma and then another key-value pair, or a closing curly brace and the end of the hash. */ switch (NEXTBYTE) { case WHITESPACE: goto hash_middle; case '}': goto hash_end; case ',': #ifdef TOKENING value = json_token_new (parser, parser->end - 1, parser->end - 1, json_token_comma); prev = json_token_set_next (prev, value); #endif goto hash_key; default: parser->expected = XWHITESPACE | XCOMMA | XOBJECT_END; FAILOBJECT (unexpected_character); } hash_key: /* We're looking for a key in the hash, which is a string starting with a double quotation mark. */ switch (NEXTBYTE) { case WHITESPACE: goto hash_key; case '"': #ifdef TOKENING value = json_token_new (parser, parser->end - 1, 0, json_token_string); prev = json_token_set_next (prev, value); #endif get_key_string (parser, & key); #ifdef TOKENING /* We didn't know where the end of the string was until now so we wait until after "get_key_string" to set the end. */ json_token_set_end (parser, value, parser->end - 1); #endif goto hash_next; default: parser->expected = XWHITESPACE | XSTRING_START; FAILOBJECT (unexpected_character); } hash_next: /* We've seen a key, now we're looking for a colon. */ switch (NEXTBYTE) { case WHITESPACE: goto hash_next; case ':': #ifdef TOKENING value = json_token_new (parser, parser->end - 1, parser->end - 1, json_token_colon); prev = json_token_set_next (prev, value); #endif goto hash_value; default: parser->expected = XWHITESPACE | XVALUE_SEPARATOR; FAILOBJECT (unexpected_character); } hash_value: /* We've seen a colon, now we're looking for a value, which can be anything at all, including another hash. Most of the cases are dealt with in the PARSE macro. */ switch (NEXTBYTE) { PARSE (hash_value, XOBJECT_END); default: parser->expected = XWHITESPACE | VALUE_START; FAILOBJECT (unexpected_character); } if (key.contains_escapes) { /* The key had something like "\n" in it, so we can't just copy the value but have to process it to remove the escapes. */ #ifdef PERLING int klen; klen = resolve_string (parser, & key); key.start = parser->buffer; key.length = klen; #else resolve_string (parser, & key); #endif } #ifdef PERLING if (parser->detect_collisions) { /* Look in hv for an existing key with our values. */ SV ** sv_ptr; sv_ptr = hv_fetch (hv, (char *) key.start, key.length * uniflag, 0); if (sv_ptr) { parser->bad_byte = key.start; parser->bad_length = key.length; parser->bad_type = json_object; parser->bad_beginning = start; parser->error = json_error_name_is_not_unique; failbadinput (parser); } } (void) hv_store (hv, (char *) key.start, key.length * uniflag, value, 0); #endif #if defined(TOKENING) prev = json_token_set_next (prev, value); #endif goto hash_middle; hash_end: DECDEPTH; #ifdef PERLING return newRV_noinc ((SV *) hv); #elif defined (TOKENING) json_token_set_end (parser, hv, parser->end - 1); return hv; #else return; #endif } #undef PREFIX #undef SVPTR #undef SETVALUE #ifdef PERLING /* Set and delete user-defined literals. */ static void json_parse_delete_true (json_parse_t * parser) { if (parser->user_true) { SvREFCNT_dec (parser->user_true); parser->user_true = 0; } } static void json_parse_set_true (json_parse_t * parser, SV * user_true) { json_parse_delete_true (parser); if (! SvTRUE (user_true) && ! parser->no_warn_literals) { warn ("User-defined value for JSON true evaluates as false"); } if (parser->copy_literals && ! parser->no_warn_literals) { warn ("User-defined value overrules copy_literals"); } parser->user_true = user_true; SvREFCNT_inc (user_true); } static void json_parse_delete_false (json_parse_t * parser) { if (parser->user_false) { SvREFCNT_dec (parser->user_false); parser->user_false = 0; } } static void json_parse_set_false (json_parse_t * parser, SV * user_false) { json_parse_delete_false (parser); if (SvTRUE (user_false) && ! parser->no_warn_literals) { warn ("User-defined value for JSON false evaluates as true"); } if (parser->copy_literals && ! parser->no_warn_literals) { warn ("User-defined value overrules copy_literals"); } parser->user_false = user_false; SvREFCNT_inc (user_false); } static void json_parse_delete_null (json_parse_t * parser) { if (parser->user_null) { SvREFCNT_dec (parser->user_null); parser->user_null = 0; } } static void json_parse_set_null (json_parse_t * parser, SV * user_null) { if (parser->copy_literals && ! parser->no_warn_literals) { warn ("User-defined value overrules copy_literals"); } json_parse_delete_null (parser); parser->user_null = user_null; SvREFCNT_inc (user_null); } static void json_parse_free (json_parse_t * parser) { /* We can get here with depth > 0 if the parser fails and then the error is caught. */ if (parser->depth < 0) { warn ("Parser depth underflow %d", parser->depth); } json_parse_delete_true (parser); json_parse_delete_false (parser); json_parse_delete_null (parser); Safefree (parser); } static void json_parse_copy_literals (json_parse_t * parser, SV * onoff) { if (! parser->no_warn_literals && (parser->user_true || parser->user_false || parser->user_null)) { warn ("User-defined value overrules copy_literals"); } parser->copy_literals = SvTRUE (onoff) ? 1 : 0; } #endif /* def PERLING */ JSON-Parse-0.61/script/000755 001751 001751 00000000000 14011073360 014134 5ustar00benben000000 000000 JSON-Parse-0.61/unicode.h000444 001751 001751 00000015164 14011073345 014437 0ustar00benben000000 000000 /* This file was generated by the following command: /home/ben/software/install/bin/cfunctions unicode.c */ #ifndef CFH_UNICODE_H #define CFH_UNICODE_H #line 8 "unicode.c" #define UTF8_MAX_LENGTH 5 #define UNICODE_MAXIMUM 0x10ffff #define UNICODE_UTF8_4 0x1fffff #define UNICODE_OK 0 #define UTF8_BAD_LEADING_BYTE -1 #define UNICODE_SURROGATE_PAIR -2 #define UNICODE_NOT_SURROGATE_PAIR -3 #define UTF8_BAD_CONTINUATION_BYTE -4 #define UNICODE_EMPTY_INPUT -5 #define UTF8_NON_SHORTEST -6 #define UNICODE_TOO_BIG -7 #define UNICODE_NOT_CHARACTER -8 #define UTF8_VALID 1 #define UTF8_INVALID 0 #line 98 "unicode.c" extern const uint8_t utf8_sequence_len[]; #line 103 "unicode.c" int32_t utf8_bytes (uint8_t c); #line 146 "unicode.c" int32_t utf8_no_checks (const uint8_t* input, const uint8_t** end_ptr); #line 197 "unicode.c" int32_t utf8_to_ucs2 (const uint8_t* input, const uint8_t** end_ptr); #line 295 "unicode.c" int32_t ucs2_to_utf8 (int32_t ucs2, uint8_t* utf8); #line 339 "unicode.c" int32_t unicode_to_surrogates (int32_t unicode, int32_t* hi_ptr, int32_t* lo_ptr); #line 358 "unicode.c" int32_t surrogates_to_unicode (int32_t hi, int32_t lo); #line 381 "unicode.c" int32_t surrogate_to_utf8 (int32_t hi, int32_t lo, uint8_t* utf8); #line 394 "unicode.c" int32_t unicode_chars_to_bytes (const uint8_t* utf8, int32_t n_chars); #line 414 "unicode.c" int32_t unicode_count_chars_fast (const uint8_t* utf8); #line 436 "unicode.c" int32_t unicode_count_chars (const uint8_t* utf8); #line 459 "unicode.c" #define BYTE_80_8F \ 0x80: case 0x81: case 0x82: case 0x83: case 0x84: case 0x85: case 0x86: \ case 0x87: case 0x88: case 0x89: case 0x8A: case 0x8B: case 0x8C: case 0x8D: \ case 0x8E: case 0x8F #define BYTE_80_9F \ 0x80: case 0x81: case 0x82: case 0x83: case 0x84: case 0x85: case 0x86: \ case 0x87: case 0x88: case 0x89: case 0x8A: case 0x8B: case 0x8C: case 0x8D: \ case 0x8E: case 0x8F: case 0x90: case 0x91: case 0x92: case 0x93: case 0x94: \ case 0x95: case 0x96: case 0x97: case 0x98: case 0x99: case 0x9A: case 0x9B: \ case 0x9C: case 0x9D: case 0x9E: case 0x9F #define BYTE_80_BF \ 0x80: case 0x81: case 0x82: case 0x83: case 0x84: case 0x85: case 0x86: \ case 0x87: case 0x88: case 0x89: case 0x8A: case 0x8B: case 0x8C: case 0x8D: \ case 0x8E: case 0x8F: case 0x90: case 0x91: case 0x92: case 0x93: case 0x94: \ case 0x95: case 0x96: case 0x97: case 0x98: case 0x99: case 0x9A: case 0x9B: \ case 0x9C: case 0x9D: case 0x9E: case 0x9F: case 0xA0: case 0xA1: case 0xA2: \ case 0xA3: case 0xA4: case 0xA5: case 0xA6: case 0xA7: case 0xA8: case 0xA9: \ case 0xAA: case 0xAB: case 0xAC: case 0xAD: case 0xAE: case 0xAF: case 0xB0: \ case 0xB1: case 0xB2: case 0xB3: case 0xB4: case 0xB5: case 0xB6: case 0xB7: \ case 0xB8: case 0xB9: case 0xBA: case 0xBB: case 0xBC: case 0xBD: case 0xBE: \ case 0xBF #define BYTE_80_8F_B0_BF \ 0x80: case 0x81: case 0x82: case 0x83: case 0x84: case 0x85: case 0x86: \ case 0x87: case 0x88: case 0x89: case 0x8A: case 0x8B: case 0x8C: case 0x8D: \ case 0x8E: case 0x8F: case 0xB0: \ case 0xB1: case 0xB2: case 0xB3: case 0xB4: case 0xB5: case 0xB6: case 0xB7: \ case 0xB8: case 0xB9: case 0xBA: case 0xBB: case 0xBC: case 0xBD: case 0xBE: \ case 0xBF #define BYTE_80_B6_B8_BF \ 0x80: case 0x81: case 0x82: case 0x83: case 0x84: case 0x85: case 0x86: \ case 0x87: case 0x88: case 0x89: case 0x8A: case 0x8B: case 0x8C: case 0x8D: \ case 0x8E: case 0x8F: case 0x90: case 0x91: case 0x92: case 0x93: case 0x94: \ case 0x95: case 0x96: case 0x97: case 0x98: case 0x99: case 0x9A: case 0x9B: \ case 0x9C: case 0x9D: case 0x9E: case 0x9F: case 0xA0: case 0xA1: case 0xA2: \ case 0xA3: case 0xA4: case 0xA5: case 0xA6: case 0xA7: case 0xA8: case 0xA9: \ case 0xAA: case 0xAB: case 0xAC: case 0xAD: case 0xAE: case 0xAF: case 0xB0: \ case 0xB1: case 0xB2: case 0xB3: case 0xB4: case 0xB5: case 0xB6: \ case 0xB8: case 0xB9: case 0xBA: case 0xBB: case 0xBC: case 0xBD: case 0xBE: \ case 0xBF #define BYTE_80_BD \ 0x80: case 0x81: case 0x82: case 0x83: case 0x84: case 0x85: case 0x86: \ case 0x87: case 0x88: case 0x89: case 0x8A: case 0x8B: case 0x8C: case 0x8D: \ case 0x8E: case 0x8F: case 0x90: case 0x91: case 0x92: case 0x93: case 0x94: \ case 0x95: case 0x96: case 0x97: case 0x98: case 0x99: case 0x9A: case 0x9B: \ case 0x9C: case 0x9D: case 0x9E: case 0x9F: case 0xA0: case 0xA1: case 0xA2: \ case 0xA3: case 0xA4: case 0xA5: case 0xA6: case 0xA7: case 0xA8: case 0xA9: \ case 0xAA: case 0xAB: case 0xAC: case 0xAD: case 0xAE: case 0xAF: case 0xB0: \ case 0xB1: case 0xB2: case 0xB3: case 0xB4: case 0xB5: case 0xB6: case 0xB7: \ case 0xB8: case 0xB9: case 0xBA: case 0xBB: case 0xBC: case 0xBD #define BYTE_90_BF \ 0x90: case 0x91: case 0x92: case 0x93: case 0x94: case 0x95: case 0x96: \ case 0x97: case 0x98: case 0x99: case 0x9A: case 0x9B: case 0x9C: case 0x9D: \ case 0x9E: case 0x9F: case 0xA0: case 0xA1: case 0xA2: case 0xA3: case 0xA4: \ case 0xA5: case 0xA6: case 0xA7: case 0xA8: case 0xA9: case 0xAA: case 0xAB: \ case 0xAC: case 0xAD: case 0xAE: case 0xAF: case 0xB0: case 0xB1: case 0xB2: \ case 0xB3: case 0xB4: case 0xB5: case 0xB6: case 0xB7: case 0xB8: case 0xB9: \ case 0xBA: case 0xBB: case 0xBC: case 0xBD: case 0xBE: case 0xBF #define BYTE_A0_BF \ 0xA0: case 0xA1: case 0xA2: case 0xA3: case 0xA4: case 0xA5: case 0xA6: \ case 0xA7: case 0xA8: case 0xA9: case 0xAA: case 0xAB: case 0xAC: case 0xAD: \ case 0xAE: case 0xAF: case 0xB0: case 0xB1: case 0xB2: case 0xB3: case 0xB4: \ case 0xB5: case 0xB6: case 0xB7: case 0xB8: case 0xB9: case 0xBA: case 0xBB: \ case 0xBC: case 0xBD: case 0xBE: case 0xBF #define BYTE_C2_DF \ 0xC2: case 0xC3: case 0xC4: case 0xC5: case 0xC6: case 0xC7: case 0xC8: \ case 0xC9: case 0xCA: case 0xCB: case 0xCC: case 0xCD: case 0xCE: case 0xCF: \ case 0xD0: case 0xD1: case 0xD2: case 0xD3: case 0xD4: case 0xD5: case 0xD6: \ case 0xD7: case 0xD8: case 0xD9: case 0xDA: case 0xDB: case 0xDC: case 0xDD: \ case 0xDE: case 0xDF #define BYTE_E1_EC \ 0xE1: case 0xE2: case 0xE3: case 0xE4: case 0xE5: case 0xE6: case 0xE7: \ case 0xE8: case 0xE9: case 0xEA: case 0xEB: case 0xEC #define BYTE_F1_F3 \ 0xF1: case 0xF2: case 0xF3 #line 549 "unicode.c" int32_t valid_utf8 (const uint8_t* input, int32_t input_length); #line 563 "unicode.c" typedef struct utf8_info { int32_t len_read; int32_t runes_read; } utf8_info_t; #line 573 "unicode.c" int32_t validate_utf8 (const uint8_t* input, int32_t len, utf8_info_t* info); #line 775 "unicode.c" int32_t trim_to_utf8_start (const uint8_t** ptr); #line 802 "unicode.c" const char* unicode_code_to_error (int32_t code); #endif /* CFH_UNICODE_H */ JSON-Parse-0.61/json-entry-points.c000644 001751 001751 00000016352 13772624752 016451 0ustar00benben000000 000000 /* Empty input was provided. */ static void fail_empty (json_parse_t * parser) { parser->bad_type = json_initial_state; parser->error = json_error_empty_input; failbadinput (parser); } /* Check for stray non-whitespace after the end and free memory. */ static void check_end (json_parse_t * parser) { int c; end: switch (NEXTBYTE) { case WHITESPACE: goto end; case '\0': parser_free (parser); return; default: parser->bad_type = json_initial_state; parser->bad_byte = parser->end - 1; parser->expected = XWHITESPACE; parser->error = json_error_unexpected_character; failbadinput (parser); } } #define ENTRYDECL \ /* Our collection of bits and pieces. */ \ \ json_parse_t parser_o = {0}; \ json_parse_t * parser = & parser_o; \ json_parse_init (parser) #ifndef NOPERL /* Set up "parser" with the string from "json". */ static void getstring (SV * json, json_parse_t * parser) { STRLEN length; parser->input = (unsigned char *) SvPV (json, length); parser->end = parser->input; parser->length = (unsigned int) length; parser->unicode = SvUTF8 (json) ? 1 : 0; } #endif /* ndef NOPERL */ #define SETUPPARSER \ parser->line = 1; \ parser->last_byte = parser->input + parser->length /* Error to throw if there is a character other than whitespace, "[" or "{" at the start of the JSON. */ #define BADCHAR \ parser->bad_byte = parser->end - 1; \ parser->bad_type = json_initial_state; \ parser->expected = XARRAYOBJECTSTART | VALUE_START | XWHITESPACE; \ parser->error = json_error_unexpected_character; \ failbadinput (parser) #ifndef NOPERL static SV * json_parse_run (json_parse_t * parser, SV * json) { /* The currently-parsed character. */ char c; /* The returned object. */ SV * r = & PL_sv_undef; getstring (json, parser); if (parser->length == 0) { fail_empty (parser); } SETUPPARSER; parse_start: switch (NEXTBYTE) { case '{': INCDEPTH; r = object (parser); break; case '[': INCDEPTH; r = array (parser); break; case '-': case '0': case DIGIT19: parser->top_level_value = 1; r = number (parser); break; case '"': parser->top_level_value = 1; r = string (parser); break; case 't': parser->top_level_value = 1; r = literal_true (parser); break; case 'f': parser->top_level_value = 1; r = literal_false (parser); break; case 'n': parser->top_level_value = 1; r = literal_null (parser); break; case WHITESPACE: goto parse_start; case '\0': /* We have an empty string. */ fail_empty (parser); default: BADCHAR; } check_end (parser); return r; } /* This is the entry point for non-object parsing. */ SV * parse (SV * json) { /* Make our own parser object on the stack. */ ENTRYDECL; /* Run it. */ return json_parse_run (parser, json); } /* This is the entry point for "safe" non-object parsing. */ SV * parse_safe (SV * json) { /* Make our own parser object on the stack. */ ENTRYDECL; parser_o.detect_collisions = 1; parser_o.copy_literals = 1; parser_o.warn_only = 1; parser_o.diagnostics_hash = 1; /* Run it. */ return json_parse_run (parser, json); } #endif /* ndef NOPERL */ /* Validation without Perl structures. */ static void c_validate (json_parse_t * parser) { /* The currently-parsed character. */ char c; /* If the string is empty, throw an exception. */ if (parser->length == 0) { fail_empty (parser); } SETUPPARSER; validate_start: switch (NEXTBYTE) { case '{': INCDEPTH; valid_object (parser); break; case '[': INCDEPTH; valid_array (parser); break; case '-': case '0': case DIGIT19: parser->top_level_value = 1; valid_number (parser); break; case '"': parser->top_level_value = 1; valid_string (parser); break; case 't': parser->top_level_value = 1; valid_literal_true (parser); break; case 'f': parser->top_level_value = 1; valid_literal_false (parser); break; case 'n': parser->top_level_value = 1; valid_literal_null (parser); break; case WHITESPACE: goto validate_start; default: BADCHAR; } check_end (parser); } static INLINE void print_tokens (json_token_t * t) { printf ("Start: %d End: %d: Type: %s\n", t->start, t->end, token_names[t->type]); if (t->child) { printf ("Children:\n"); print_tokens (t->child); } if (t->next) { printf ("Next:\n"); print_tokens (t->next); } } #ifndef NOPERL static json_token_t * c_tokenize (json_parse_t * parser) { /* The currently-parsed character. */ char c; json_token_t * r; SETUPPARSER; tokenize_start: switch (NEXTBYTE) { case '{': r = tokenize_object (parser); break; case '[': r = tokenize_array (parser); break; case WHITESPACE: goto tokenize_start; default: BADCHAR; } check_end (parser); return r; } static void tokenize_free (json_token_t * token) { json_token_t * next; next = token->child; if (next) { if (! next->blessed) { tokenize_free (next); } token->child = 0; } next = token->next; if (next) { if (! next->blessed) { tokenize_free (next); } token->next = 0; } if (! token->blessed) { Safefree (token); } } /* This is the entry point for validation. */ static void validate (SV * json, unsigned int flags) { ENTRYDECL; getstring (json, parser); if (parser->length == 0) { fail_empty (parser); } c_validate (& parser_o); } static void check (json_parse_t * parser, SV * json) { getstring (json, parser); c_validate (parser); } static json_token_t * tokenize (SV * json) { ENTRYDECL; getstring (json, parser); /* Mark this parser as being used for tokenizing to bypass the checks for memory leaks when the parser is freed. */ parser_o.tokenizing = 1; return c_tokenize (& parser_o); } /* Make a hash containing a diagnostic error from the parser. */ static SV * error_to_hash (json_parse_t * parser, char * error_as_string) { HV * error; error = newHV (); #ifdef HK #warn "Redefinition of macro HK" #endif /* def HK */ #undef HK #define HK(x, val) (void) hv_store (error, x, strlen (x), val, 0) HK("length", newSViv (parser->length)); HK("bad type", newSVpv (type_names[parser->bad_type], 0)); HK("error", newSVpv (json_errors[parser->error], 0)); HK("error as string", newSVpv (error_as_string, 0)); if (parser->bad_byte) { int position; position = (int) (parser->bad_byte - parser->input) + 1; HK("bad byte position", newSViv (position)); HK("bad byte contents", newSViv (* parser->bad_byte)); } if (parser->bad_beginning) { int bcstart; bcstart = (int) (parser->bad_beginning - parser->input) + 1; HK("start of broken component", newSViv (bcstart)); } if (parser->error == json_error_unexpected_character) { int j; AV * valid_bytes; valid_bytes = newAV (); make_valid_bytes (parser); for (j = 0; j < JSON3MAXBYTE; j++) { av_push (valid_bytes, newSViv (parser->valid_bytes[j])); } HK("valid bytes", newRV_inc ((SV *) valid_bytes)); } return newRV_inc ((SV *) error); #undef HK } #endif /* ndef NOPERL */ JSON-Parse-0.61/Makefile.PL000644 001751 001751 00000002400 14003431361 014576 0ustar00benben000000 000000 use strict; use warnings; use ExtUtils::MakeMaker; my $pm = 'lib/JSON/Parse.pm'; my $pod = 'lib/JSON/Parse.pod'; my $github = 'github.com/benkasminbullock/JSON-Parse'; my $repo = "https://$github"; WriteMakefile ( NAME => 'JSON::Parse', VERSION_FROM => $pm, ABSTRACT_FROM => $pod, AUTHOR => 'Ben Bullock ', LICENSE => 'perl', PREREQ_PM => { 'Carp' => '0', }, META_MERGE => { 'meta-spec' => { version => 2, }, resources => { repository => { type => 'git', url => "git://$github.git", web => $repo, }, bugtracker => { web => "$repo/issues", }, }, no_index => { file => [ 'build/JPB.pm', 't/JPT.pm', 'xt/JPXT.pm', ], }, }, # All the C files are actually #included into Parse.xs so there is # only one object file. OBJECT => 'Parse.o', # Currently the oldest version to pass at CPAN testers. # http://matrix.cpantesters.org/?dist=JSON-Parse+0.24 MIN_PERL_VERSION => '5.008009', EXE_FILES => [ 'script/validjson', ], # Uncomment the following to get "author tests" # OPTIMIZE => '-Wall -g', # OPTIMIZE => '-Wall -O', # This achieves only small speedups with GCC. # OPTIMIZE => '-O3', # CC => 'gcc', ); JSON-Parse-0.61/json-common.c000644 001751 001751 00000107153 14010625307 015245 0ustar00benben000000 000000 /* These things are common between the validation and the parsing routines. This is #included into "Json3.xs". */ /* The following matches bytes which are not allowed in JSON strings. "All Unicode characters may be placed within the quotation marks except for the characters that must be escaped: quotation mark, reverse solidus, and the control characters (U+0000 through U+001F)." - from section 2.5 of RFC 4627 */ #define BADBYTES \ '\0':case 0x01:case 0x02:case 0x03: \ case 0x04:case 0x05:case 0x06:case 0x07: \ case 0x08:case 0x09:case 0x0A:case 0x0B: \ case 0x0C:case 0x0D:case 0x0E:case 0x0F: \ case 0x10:case 0x11:case 0x12:case 0x13: \ case 0x14:case 0x15:case 0x16:case 0x17: \ case 0x18:case 0x19:case 0x1A:case 0x1B: \ case 0x1C:case 0x1D:case 0x1E:case 0x1F /* Match whitespace. Whitespace is as defined by the JSON standard, not by Perl. "Insignificant whitespace is allowed before or after any of the six structural characters. ws = *( %x20 / ; Space %x09 / ; Horizontal tab %x0A / ; Line feed or New line %x0D ; Carriage return )" From JSON RFC. */ #define WHITESPACE \ '\n': \ parser->line++; \ /* Fallthrough. */ \ case ' ': \ case '\t': \ case '\r' /* Match digits. */ #define DIGIT \ '0': \ case '1': \ case '2': \ case '3': \ case '4': \ case '5': \ case '6': \ case '7': \ case '8': \ case '9' /* Match digits from 1-9. This is handled differently because JSON disallows leading zeros in numbers. */ #define DIGIT19 \ '1': \ case '2': \ case '3': \ case '4': \ case '5': \ case '6': \ case '7': \ case '8': \ case '9' /* Hexadecimal, in upper and lower case. */ #define UHEX 'A': case 'B': case 'C': case 'D': case 'E': case 'F' #define LHEX 'a': case 'b': case 'c': case 'd': case 'e': case 'f' /* As of version 0.45 of JSON::Parse, most of the UTF-8 switches are now in "unicode.c", but the following one is JSON-specific. */ /* This excludes '"' and '\'. */ #define BYTE_20_7F \ 0x20: case 0x21:\ case 0x23: case 0x24: case 0x25: case 0x26: case 0x27: case 0x28: case 0x29:\ case 0x2A: case 0x2B: case 0x2C: case 0x2D: case 0x2E: case 0x2F: case 0x30:\ case 0x31: case 0x32: case 0x33: case 0x34: case 0x35: case 0x36: case 0x37:\ case 0x38: case 0x39: case 0x3A: case 0x3B: case 0x3C: case 0x3D: case 0x3E:\ case 0x3F: case 0x40: case 0x41: case 0x42: case 0x43: case 0x44: case 0x45:\ case 0x46: case 0x47: case 0x48: case 0x49: case 0x4A: case 0x4B: case 0x4C:\ case 0x4D: case 0x4E: case 0x4F: case 0x50: case 0x51: case 0x52: case 0x53:\ case 0x54: case 0x55: case 0x56: case 0x57: case 0x58: case 0x59: case 0x5A:\ case 0x5B: case 0x5D: case 0x5E: case 0x5F: case 0x60: case 0x61:\ case 0x62: case 0x63: case 0x64: case 0x65: case 0x66: case 0x67: case 0x68:\ case 0x69: case 0x6A: case 0x6B: case 0x6C: case 0x6D: case 0x6E: case 0x6F:\ case 0x70: case 0x71: case 0x72: case 0x73: case 0x74: case 0x75: case 0x76:\ case 0x77: case 0x78: case 0x79: case 0x7A: case 0x7B: case 0x7C: case 0x7D:\ case 0x7E: case 0x7F /* A "string_t" is a pointer into the input, which lives in "parser->input". The "string_t" structure is used for copying strings when the string does not contain any escapes. When a string contains escapes, it is copied into "parser->buffer". */ typedef struct string { unsigned char * start; #ifdef NOPERL int length; #else /* def NOPERL */ STRLEN length; #endif /* def NOPERL */ /* The "contains_escapes" flag is set if there are backslash escapes in the string like "\r", so that it needs to be cleaned up before using it. That means we use "parser->buffer". This is to speed things up, by not doing the cleanup when it isn't necessary. */ unsigned contains_escapes : 1; } string_t; typedef enum { json_invalid, json_initial_state, json_string, json_number, json_literal, json_object, json_array, json_unicode_escape, json_overflow } json_type_t; const char * type_names[json_overflow] = { "invalid", "initial state", "string", "number", "literal", "object", "array", "unicode escape" }; /* The maximum value of bytes to check for. */ #define JSON3MAXBYTE 0x100 // uncomment this when running random test to terminal otherwise the // random characters will mess up the terminal. //#define JSON3MAXBYTE 0x80 #include "errors.c" /* Anything which could be the start of a value. */ #define VALUE_START (XARRAYOBJECTSTART|XSTRING_START|XDIGIT|XMINUS|XLITERAL) typedef struct parser { /* The length of "input". */ unsigned int length; /* The input. This is fixed at the beginning throughout parsing. */ unsigned char * input; /* The end-point of the parsing, the last parsed thing. This increments through "input". */ unsigned char * end; /* The last byte of "input", "parser->input + parser->length". This is used to detect overflows. */ unsigned char * last_byte; /* Allocated size of "buffer". */ int buffer_size; /* Buffer to stick strings into temporarily. */ unsigned char * buffer; /* Line number. */ int line; /* Where the beginning of the series of unfortunate events was. For example if we are parsing an array, this points to the "[" at the start of the array, or if we are parsing a string, this points to the byte after the '"' at the start of the string. */ unsigned char * bad_beginning; /* The bad type itself. */ json_type_t bad_type; /* What we were expecting to see when the error occurred. */ int expected; /* The byte which caused the parser to fail. */ unsigned char * bad_byte; unsigned bad_length; /* The type of error encountered. */ json_error_t error; /* If we were parsing a literal and found a bad character, what were we expecting? */ unsigned char literal_char; /* The end expected. */ int end_expected; /* Number of mallocs. */ int n_mallocs; /* Bytes we accept. */ int valid_bytes[JSON3MAXBYTE]; /* Current depth into arrays or objects. */ int depth; /* Maximum depth we accept. */ int max_depth; /* Perl SV * pointers to copy for our true, false, and null values. */ void * user_true; void * user_false; void * user_null; /* If this is 1, we copy the literals into new SVs. */ unsigned int copy_literals : 1; /* If this is 1, we don't die on errors. */ unsigned int warn_only : 1; /* If this is 1, we check for hash collisions before inserting values. */ unsigned int detect_collisions : 1; /* Don't warn the user about non-false false and untrue true values, etc. */ unsigned int no_warn_literals : 1; /* Are we tokenizing the input? */ unsigned int tokenizing : 1; #ifdef TESTRANDOM /* Return point for longjmp. */ jmp_buf biscuit; char * last_error; #endif /* Unicode? */ unsigned int unicode : 1; /* Force unicode. This happens when we hit "\uxyzy". */ unsigned int force_unicode : 1; /* Upgrade the input from bytes to characters. */ unsigned int upgrade_utf8 : 1; /* Top-level value? We need to know this for the case when we are parsing a number and suddenly meet a '\0' byte. If it's a top level value then we can assume that is just the end of the JSON, but if it's not a top-level value then that is an error, since the end array or end object at least are missing. */ unsigned int top_level_value : 1; /* Produce diagnostics as a hash rather than a string. */ unsigned int diagnostics_hash : 1; #ifdef TESTRANDOM /* This is true if we are testing with random bytes. */ unsigned randomtest : 1; #endif /* def TESTRANDOM */ } json_parse_t; /* Maximum depth of parsing. */ #define JSON_PARSE_DEFAULT_MAX_DEPTH 10000 static void json_parse_init (json_parse_t * parser) { parser->max_depth = JSON_PARSE_DEFAULT_MAX_DEPTH; } /* Check if the user has set something different from the default, and don't croak if we are still OK. */ /* Increment the parsing depth, with check. */ //#define DEBUG_DEPTH #ifdef DEBUG_DEPTH #define PRINT_DEPTH \ printf ("%s:%d: %d\n", __FILE__, __LINE__, parser->depth); #else #define PRINT_DEPTH #endif #define INCDEPTH \ PRINT_DEPTH; \ parser->depth++; \ if (parser->depth > parser->max_depth) { \ croak ("error: too many [ or {, maximum is %d", \ parser->max_depth); \ } #define DECDEPTH \ parser->depth--; \ PRINT_DEPTH; #ifndef NOPERL static SV * error_to_hash (json_parse_t * parser, char * error_as_string); #endif /* ndef NOPERL */ #ifdef __GNUC__ #define INLINE inline #else #define INLINE #endif /* def __GNUC__ */ /* The size of the buffer for printing errors. */ #define ERRORMSGBUFFERSIZE 0x1000 /* Declare all bad inputs as non-returning. */ #ifdef __GNUC__ #if 0 static void failbadinput_json (json_parse_t * parser) __attribute__ ((noreturn)); #endif /* 0 */ static void failbadinput (json_parse_t * parser) __attribute__ ((noreturn)); static INLINE void failbug (char * file, int line, json_parse_t * parser, const char * format, ...) __attribute__ ((noreturn)); #endif /* Assert failure handler. Coming here means there is a bug in the code rather than in the JSON input. We still send it to Perl via "croak". */ static INLINE void failbug (char * file, int line, json_parse_t * parser, const char * format, ...) { char buffer[ERRORMSGBUFFERSIZE]; va_list a; va_start (a, format); vsnprintf (buffer, ERRORMSGBUFFERSIZE, format, a); va_end (a); croak ("JSON::Parse: %s:%d: Internal error at line %d: %s", file, line, parser->line, buffer); } /* This is a test for whether the string has ended, which we use when we catch a zero byte in an unexpected part of the input. Here we use ">" rather than ">=" because "parser->end" is incremented by one after each access. See the NEXTBYTE macro. */ #define STRINGEND (parser->end > parser->last_byte) /* One of the types which demands a specific next byte. */ #define SPECIFIC(c) (((c) & XIN_LITERAL) || ((c) & XIN_SURROGATE_PAIR)) /* Make the list of valid bytes. */ static void make_valid_bytes (json_parse_t * parser) { int i; for (i = 0; i < JSON3MAXBYTE; i++) { parser->valid_bytes[i] = 0; } for (i = 0; i < n_expectations; i++) { int X; X = 1<expected & X) { int j; for (j = 0; j < JSON3MAXBYTE; j++) { parser->valid_bytes[j] |= allowed[i][j]; } } } if (SPECIFIC (parser->expected)) { parser->valid_bytes[parser->literal_char] = 1; } } /* Repeated arguments to snprintf. */ #define SNEND buffer + string_end #define SNSIZE ERRORMSGBUFFERSIZE - string_end /* Disabled due to clash with Darwin compiler: http://www.cpantesters.org/cpan/report/7c69e0f0-70c0-11e3-95aa-bcf4d95af652 http://www.cpantesters.org/cpan/report/6cde36da-6fd1-11e3-946f-2b87da5af652 #define SNEND, SNSIZE buffer + string_end, ERRORMSGBUFFERSIZE - string_end */ #define EROVERFLOW \ if (string_end >= ERRORMSGBUFFERSIZE - 0x100) { \ failbug (__FILE__, __LINE__, parser, \ "Error string length is %d" \ " of maximum %d. Bailing out.", \ string_end, ERRORMSGBUFFERSIZE); \ } #if 0 /* Coming in to this routine, we have checked the error for validity and converted at failbadinput. If this is called directly the bug traps won't work. */ static void failbadinput_json (json_parse_t * parser) { char buffer[ERRORMSGBUFFERSIZE]; int string_end; string_end = 0; string_end += snprintf (SNEND, SNSIZE, "{" "\"input length\":%d" ",\"bad type\":\"%s\"" ",\"error\":\"%s\"", parser->length, type_names[parser->bad_type], json_errors[parser->error]); EROVERFLOW; if (parser->bad_byte) { int position; position = (int) (parser->bad_byte - parser->input) + 1, string_end += snprintf (SNEND, SNSIZE, ",\"bad byte position\":%d" ",\"bad byte contents\":%d", position, * parser->bad_byte); EROVERFLOW; } if (parser->bad_beginning) { int bcstart; bcstart = (int) (parser->bad_beginning - parser->input) + 1; string_end += snprintf (SNEND, SNSIZE, ",\"start of broken component\":%d", bcstart); EROVERFLOW; } if (parser->error == json_error_unexpected_character) { int j; make_valid_bytes (parser); string_end += snprintf (SNEND, SNSIZE, ",\"valid bytes\":[%d", parser->valid_bytes[0]); EROVERFLOW; for (j = 1; j < JSON3MAXBYTE; j++) { string_end += snprintf (SNEND, SNSIZE, ",%d", parser->valid_bytes[j]); } EROVERFLOW; string_end += snprintf (SNEND, SNSIZE, "]"); EROVERFLOW; } string_end += snprintf (SNEND, SNSIZE, "}\n"); EROVERFLOW; croak (buffer); } #endif /* 0 */ static void failbadinput (json_parse_t * parser) { char buffer[ERRORMSGBUFFERSIZE]; int string_end; int i; int l; const char * format; /* If the error is "unexpected character", and we are at the end of the input, change to "unexpected end of input". This is probably triggered by reading a byte with value '\0', but we don't check the value of "* parser->bad_byte" in the following "if" statement, since it's an error to go past the expected end of the string regardless of whether the byte is '\0'. */ if (parser->error == json_error_unexpected_character && STRINGEND) { parser->error = json_error_unexpected_end_of_input; /* We don't care about what byte it was, we went past the end of the string, which is already a failure. */ parser->bad_byte = 0; /* It trips an assertion if "parser->expected" is set for anything other than an "unexpected character" error. */ parser->expected = 0; } /* Array bounds check for error message. */ if (parser->error <= json_error_invalid && parser->error >= json_error_overflow) { failbug (__FILE__, __LINE__, parser, "Bad value for parser->error: %d\n", parser->error); } format = json_errors[parser->error]; l = strlen (format); if (l >= ERRORMSGBUFFERSIZE - 1) { l = ERRORMSGBUFFERSIZE - 1; } for (i = 0; i < l; i++) { buffer[i] = format[i]; } buffer[l] = '\0'; string_end = l; /* If we got an unexpected character somewhere, append the exact value of the character to the error message. */ if (parser->error == json_error_unexpected_character) { /* This contains the unexpected character itself, from the "parser->bad_byte" pointer. */ unsigned char bb; /* Make sure that we were told where the unexpected character was. Unlocated unexpected characters are a bug. */ if (! parser->bad_byte) { failbug (__FILE__, __LINE__, parser, "unexpected character error but " "parser->bad_byte is invalid"); } bb = * parser->bad_byte; /* We have to check what kind of character. For example printing '\0' with %c will just give a message which suddenly ends when printed to the terminal, and other control characters will be invisible. So display the character in a different way depending on whether it's printable or not. */ /* Don't use "isprint" because on Windows it seems to think that 0x80 is printable: http://www.cpantesters.org/cpan/report/d6438b68-6bf4-1014-8647-737bdb05e747 */ if (bb >= 0x20 && bb < 0x7F) { /* Printable character, print the character itself. */ string_end += snprintf (SNEND, SNSIZE, " '%c'", bb); EROVERFLOW; } else { /* Unprintable character, print its hexadecimal value. */ string_end += snprintf (SNEND, SNSIZE, " 0x%02x", bb); EROVERFLOW; } } else if (parser->error == json_error_name_is_not_unique) { string_end += snprintf (SNEND, SNSIZE, ": \"%.*s\"", parser->bad_length, parser->bad_byte); } /* "parser->bad_type" contains what was being parsed when the error occurred. This should never be undefined. */ if (parser->bad_type <= json_invalid || parser->bad_type >= json_overflow) { failbug (__FILE__, __LINE__, parser, "parsing type set to invalid value %d in error message", parser->bad_type); } string_end += snprintf (SNEND, SNSIZE, " parsing %s", type_names[parser->bad_type]); EROVERFLOW; if (parser->bad_beginning) { int bad_byte; bad_byte = (parser->bad_beginning - parser->input) + 1; string_end += snprintf (SNEND, SNSIZE, " starting from byte %d", bad_byte); EROVERFLOW; } /* "parser->expected" is set for the "unexpected character" error and it tells the user what kind of input was expected. It contains various flags or'd together, so this goes through each possible flag and prints a message for it. */ if (parser->expected) { if (parser->error == json_error_unexpected_character) { int i; int joined; unsigned char bb; bb = * parser->bad_byte; string_end += snprintf (SNEND, SNSIZE, ": expecting "); EROVERFLOW; joined = 0; if (SPECIFIC (parser->expected)) { if (! parser->literal_char) { failbug (__FILE__, __LINE__, parser, "expected literal character unset"); } string_end += snprintf (SNEND, SNSIZE, "'%c'", parser->literal_char); EROVERFLOW; } for (i = 0; i < n_expectations; i++) { int X; X = 1<expected & X) { /* Check that this really is disallowed. */ if (allowed[i][bb]) { failbug (__FILE__, __LINE__, parser, "mismatch parsing %s: got %X " "but it's allowed by %s (%d)", type_names[parser->bad_type], bb, input_expectation[i], i); } if (joined) { string_end += snprintf (SNEND, SNSIZE, " or "); EROVERFLOW; } string_end += snprintf (SNEND, SNSIZE, "%s", input_expectation[i]); EROVERFLOW; joined = 1; } } } else { failbug (__FILE__, __LINE__, parser, "'expected' is set but error %s != unexp. char", json_errors[parser->error]); } } else if (parser->error == json_error_unexpected_character) { failbug (__FILE__, __LINE__, parser, "unexpected character error for 0X%02X at byte %d " "with no expected value set", * parser->bad_byte, parser->bad_byte - parser->input); } #undef SNEND #undef SNSIZE #ifdef TESTRANDOM /* Go back to where we came from. */ if (parser->randomtest) { parser->last_error = buffer; make_valid_bytes (parser); longjmp (parser->biscuit, 1); } #endif /* def TESTRANDOM */ #ifndef NOPERL if (parser->diagnostics_hash) { #if PERL_VERSION > 12 croak_sv (error_to_hash (parser, buffer)); #endif /* PERL_VERSION > 12 */ } #endif /* ndef NOPERL */ if (parser->length > 0) { if (parser->end - parser->input > parser->length) { croak ("JSON error at line %d: %s", parser->line, buffer); } else if (parser->bad_byte) { croak ("JSON error at line %d, byte %d/%d: %s", parser->line, (int) (parser->bad_byte - parser->input + 1), parser->length, buffer); } else { croak ("JSON error at line %d: %s", parser->line, buffer); } } else { croak ("JSON error: %s", buffer); } } #undef SPECIFIC /* This is for failures not due to errors in the input or to bugs but to exhaustion of resources, i.e. out of memory, or file errors would go here if there were any C file opening things anywhere. */ static INLINE void failresources (json_parse_t * parser, const char * format, ...) { char buffer[ERRORMSGBUFFERSIZE]; va_list a; va_start (a, format); vsnprintf (buffer, ERRORMSGBUFFERSIZE, format, a); va_end (a); croak ("Parsing failed at line %d, byte %d/%d: %s", parser->line, (int) (parser->end - parser->input), parser->length, buffer); } #undef ERRORMSGBUFFERSIZE /* Get more memory for "parser->buffer". */ static void expand_buffer (json_parse_t * parser, int length) { if (parser->buffer_size < 2 * length + 0x100) { parser->buffer_size = 2 * length + 0x100; if (parser->buffer) { Renew (parser->buffer, parser->buffer_size, unsigned char); } else { Newx (parser->buffer, parser->buffer_size, unsigned char); parser->n_mallocs++; } if (! parser->buffer) { failresources (parser, "out of memory"); } } } #define UNIFAIL(err) \ parser->bad_type = json_unicode_escape; \ parser->error = json_error_ ## err; \ failbadinput (parser) /* Parse the hex bit of a \uXYZA escape. */ static INLINE int parse_hex_bytes (json_parse_t * parser, unsigned char * p) { int k; int unicode; unicode = 0; for (k = 0; k < strlen ("ABCD"); k++) { unsigned char c; c = p[k]; switch (c) { case DIGIT: unicode = unicode * 16 + c - '0'; break; case UHEX: unicode = unicode * 16 + c - 'A' + 10; break; case LHEX: unicode = unicode * 16 + c - 'a' + 10; break; case '\0': if (p + k - parser->input >= parser->length) { UNIFAIL (unexpected_end_of_input); } break; default: parser->bad_byte = p + k; parser->expected = XHEXADECIMAL_CHARACTER; UNIFAIL (unexpected_character); } } return unicode; } /* STRINGFAIL applies for any kind of failure within a string, not just unexpected character errors. */ #define STRINGFAIL(err) \ parser->error = json_error_ ## err; \ parser->bad_type = json_string; \ failbadinput (parser) #define FAILSURROGATEPAIR(c) \ parser->expected = XIN_SURROGATE_PAIR; \ parser->literal_char = c; \ parser->bad_beginning = start - 2; \ parser->error = json_error_unexpected_character; \ parser->bad_type = json_unicode_escape; \ parser->bad_byte = p - 1; \ failbadinput (parser) static INLINE unsigned char * do_unicode_escape (json_parse_t * parser, unsigned char * p, unsigned char ** b_ptr) { int unicode; unsigned int plus; unsigned char * start; start = p; unicode = parse_hex_bytes (parser, p); p += 4; plus = ucs2_to_utf8 (unicode, *b_ptr); if (plus == UTF8_BAD_LEADING_BYTE || plus == UTF8_BAD_CONTINUATION_BYTE) { failbug (__FILE__, __LINE__, parser, "Failed to parse unicode input %.4s", start); } else if (plus == UNICODE_SURROGATE_PAIR) { int unicode2; int plus2; if (parser->last_byte - p < 6) { parser->bad_beginning = start - 2; parser->bad_type = json_unicode_escape; parser->error = json_error_unexpected_end_of_input; failbadinput (parser); } if (*p++ == '\\') { if (*p++ == 'u') { unicode2 = parse_hex_bytes (parser, p); p += 4; plus2 = surrogate_to_utf8 (unicode, unicode2, * b_ptr); if (plus2 <= 0) { if (plus2 == UNICODE_NOT_SURROGATE_PAIR) { parser->bad_byte = 0; parser->bad_beginning = p - 4; UNIFAIL (not_surrogate_pair); } else { failbug (__FILE__, __LINE__, parser, "unhandled error %d from surrogate_to_utf8", plus2); } } * b_ptr += plus2; goto end; } else { FAILSURROGATEPAIR ('u'); } } else { FAILSURROGATEPAIR ('\\'); } } else if (plus <= 0) { failbug (__FILE__, __LINE__, parser, "unhandled error code %d while decoding unicode escape", plus); } * b_ptr += plus; end: if (unicode >= 0x80 && ! parser->unicode) { /* Force the UTF-8 flag on for this string. */ parser->force_unicode = 1; } return p; } /* Handle backslash escapes. We can't use the NEXTBYTE macro here for the reasons outlined below. */ #if 0 /* I expected a switch statement to compile to faster code, but it doesn't seem to. */ #define HANDLE_ESCAPES(p,start) \ switch (c = * ((p)++)) { \ \ case '\\': \ case '/': \ case '"': \ *b++ = c; \ break; \ \ case 'b': \ *b++ = '\b'; \ break; \ \ case 'f': \ *b++ = '\f'; \ break; \ \ case 'n': \ *b++ = '\n'; \ break; \ \ case 'r': \ *b++ = '\r'; \ break; \ \ case 't': \ *b++ = '\t'; \ break; \ \ case 'u': \ p = do_unicode_escape (parser, p, & b); \ break; \ \ default: \ parser->bad_beginning = start; \ parser->bad_byte = p - 1; \ parser->expected = XESCAPE; \ STRINGFAIL (unexpected_character); \ } #else /* This is identical to the above macro, but it uses if statements rather than a switch statement. Using the Clang compiler, this results in about 2.5% faster code, for some reason or another. */ #define HANDLE_ESCAPES(p,start) \ c = * ((p)++); \ if (c == '\\' || c == '/' || c == '"') { \ *b++ = c; \ } \ else if (c == 'b') { \ *b++ = '\b'; \ } \ else if (c == 'f') { \ *b++ = '\f'; \ } \ else if (c == 'n') { \ *b++ = '\n'; \ } \ else if (c == 'r') { \ *b++ = '\r'; \ } \ else if (c == 't') { \ *b++ = '\t'; \ } \ else if (c == 'u') { \ p = do_unicode_escape (parser, p, & b); \ } \ else { \ parser->bad_beginning = start; \ parser->bad_byte = p - 1; \ parser->expected = XESCAPE; \ STRINGFAIL (unexpected_character); \ } #endif /* Resolve "s" by converting escapes into the appropriate things. Put the result into "parser->buffer". The return value is the length of the string. */ static INLINE int resolve_string (json_parse_t * parser, string_t * s) { /* The pointer where we copy the string. This points into "parser->buffer". */ unsigned char * b; /* "p" is the pointer into "parser->input", using "s->start" to get the start point. We don't use "parser->end" for this job because "resolve_string" is called only after the value of the object is resolved. E.g. if the object goes like {"hot":{"potatoes":"tomatoes"}} then this routine is called first for "potatoes" and then for "hot" as each sub-element of the hashes is resolved. We don't want to mess around with the value of "parser->end", which is always pointing to one after the last byte viewed. */ unsigned char * p; p = s->start; /* Ensure we have enough memory to fit the string. */ expand_buffer (parser, s->length); b = parser->buffer; while (p - s->start < s->length) { unsigned char c; c = *p++; if (c == '\\') { HANDLE_ESCAPES (p, s->start - 1); } else { *b++ = c; } } /* This is the length of the string in bytes. */ return b - parser->buffer; } #define NEXTBYTE (c = *parser->end++) /* Get an object key value and put it into "key". Check for escapes. */ static INLINE void get_key_string (json_parse_t * parser, string_t * key) { unsigned char c; int i; key->start = parser->end; key->contains_escapes = 0; key_string_next: switch (NEXTBYTE) { case '"': /* Go on eating bytes until we find a ". */ break; case '\\': /* Mark this string as containing escapes. */ key->contains_escapes = 1; switch (NEXTBYTE) { case '\\': case '/': case '"': case 'b': case 'f': case 'n': case 'r': case 't': /* Eat another byte. */ goto key_string_next; case 'u': /* i counts the bytes, from 0 to 3. */ i = 0; unitunes: switch (NEXTBYTE) { case DIGIT: case UHEX: case LHEX: i++; if (i >= strlen ("ABCD")) { goto key_string_next; } else { goto unitunes; } /* not a fall through, we always "goto" above. */ default: parser->bad_beginning = parser->end - 1 - i; parser->expected = XHEXADECIMAL_CHARACTER; parser->bad_byte = parser->end - 1; UNIFAIL (unexpected_character); } /* not a fall through, we either UNIFAIL or goto above. */ default: parser->bad_beginning = key->start - 1; parser->expected = XESCAPE; parser->bad_byte = parser->end - 1; STRINGFAIL (unexpected_character); } /* Not a fall through, we never arrive here. */ case BADBYTES: parser->bad_beginning = key->start - 1; parser->expected = XSTRINGCHAR; parser->bad_byte = parser->end - 1; STRINGFAIL (unexpected_character); /* Not a fall through, STRINGFAIL does not return. */ #define ADDBYTE #define string_start key_string_next #define startofutf8string (key->start) #include "utf8-byte-one.c" /* Not a fall through. */ default: parser->bad_beginning = key->start - 1; parser->expected = XSTRINGCHAR; parser->bad_byte = parser->end - 1; STRINGFAIL (unexpected_character); } key->length = parser->end - key->start - 1; return; #include "utf8-next-byte.c" #undef startofutf8string #undef string_start #undef ADDBYTE } /* "start - 1" puts the start on the " rather than after it. "start" is usually after the quote because the quote is eaten on the way here. */ #define ILLEGALBYTE \ parser->bad_beginning = start - 1; \ parser->bad_byte = parser->end - 1; \ parser->expected = XSTRINGCHAR; \ STRINGFAIL (unexpected_character) /* Resolve the string pointed to by "parser->end" into "parser->buffer". The return value is the length of the string. This is only called if the string has \ escapes in it. */ static INLINE int get_string (json_parse_t * parser) { unsigned char * b; unsigned char c; unsigned char * start; start = parser->end; if (! parser->buffer) { expand_buffer (parser, 0x1000); } b = parser->buffer; string_start: if (b - parser->buffer >= parser->buffer_size - 0x100) { /* Save our offset in parser->buffer, because "realloc" is called by "expand_buffer", and "b" may no longer point to a meaningful location. */ int size = b - parser->buffer; expand_buffer (parser, 2 * parser->buffer_size); b = parser->buffer + size; } switch (NEXTBYTE) { case '"': goto string_end; break; case '\\': HANDLE_ESCAPES (parser->end, start - 1); goto string_start; #define ADDBYTE (* b++ = c) #define startofutf8string start #include "utf8-byte-one.c" /* Not a fall through. */ default: /* fall through */ case BADBYTES: ILLEGALBYTE; } if (STRINGEND) { STRINGFAIL (unexpected_end_of_input); } string_end: return b - parser->buffer; #include "utf8-next-byte.c" #undef ADDBYTE goto string_end; } static void parser_free (json_parse_t * parser) { if (parser->buffer) { Safefree (parser->buffer); parser->n_mallocs--; } /* There is a discrepancy between the number of things used and the number freed. */ if (parser->n_mallocs != 0) { /* The tokenizing parser is freed before the tokens themselves are freed. Whether or not the tokens are freed correctly can be checked in "tokenize_free" in "json-entry-points.c". */ if (! parser->tokenizing) { fprintf (stderr, "%s:%d: %d pieces of unfreed memory remain.\n", __FILE__, __LINE__, parser->n_mallocs); } } parser->buffer = 0; parser->buffer_size = 0; } typedef enum json_token_type { json_token_invalid, json_token_number, json_token_string, json_token_key, json_token_literal, json_token_comma, json_token_colon, json_token_object, json_token_array, n_json_tokens } json_token_type_t; const char * token_names[n_json_tokens] = { "invalid", "number", "string", "key", "literal", "comma", "colon", "object", "array" }; typedef struct json_token json_token_t; struct json_token { json_token_t * child; json_token_t * next; unsigned int start; unsigned int end; json_token_type_t type; unsigned int parent; unsigned blessed : 1; }; #define JSON_TOKEN_PARENT_INVALID 0 /* "start" is the first character of the thing. "end" is the last character of the thing. If the thing only takes one character then "start == end" should be true. */ static json_token_t * json_token_new (json_parse_t * parser, unsigned char * start, unsigned char * end, json_token_type_t type) { json_token_t * new; /* Check the token in various ways. */ switch (type) { case json_token_string: case json_token_key: if (* start != '"') { if (end) { failbug (__FILE__, __LINE__, parser, "no quotes at start of string '%.*s'", end - start, start); } else { failbug (__FILE__, __LINE__, parser, "no quotes at start of string '%.10s'", start); } } if (end && * end != '"') { failbug (__FILE__, __LINE__, parser, "'%c' is not a quote at end of string '%.*s'", * end, end - start, start); } break; case json_token_number: if (* start - '0' > 9 && * start != '-') { failbug (__FILE__, __LINE__, parser, "bad character %c at start of number", * start); } if (* end - '0' > 9) { failbug (__FILE__, __LINE__, parser, "bad character %c at end of number", * end); } break; case json_token_object: if (* start != '{' || (end && * end != '}')) { failbug (__FILE__, __LINE__, parser, "no { or } in object %.*s: char %X", end ? end - start : strlen ((char *) start), start, * start); } break; case json_token_array: if (* start != '[' || (end && * end != ']')) { failbug (__FILE__, __LINE__, parser, "no [ or ] in array"); } break; case json_token_comma: if (end - start != 0 || * start != ',') { failbug (__FILE__, __LINE__, parser, "not a comma %.*s", end - start); } break; case json_token_colon: if (end - start != 0 || * start != ':') { failbug (__FILE__, __LINE__, parser, "not a colon %.*s", end - start); } break; case json_token_literal: break; default: croak ("%s:%d: bad type %d\n", __FILE__, __LINE__, type); } Newx (new, 1, json_token_t); // static int nnew; // nnew++; // fprintf (stderr, "New %d %p\n", nnew, new); parser->n_mallocs++; #if 0 fprintf (stderr, "%s:%d: parser->n_mallocs = %d\n", __FILE__, __LINE__, parser->n_mallocs); #endif /* 0 */ new->start = start - parser->input; if (end) { new->end = end - parser->input + 1; } else { new->end = 0; } new->type = type; new->parent = JSON_TOKEN_PARENT_INVALID; new->child = 0; new->next = 0; return new; } static void json_token_set_end (json_parse_t * parser, json_token_t * jt, unsigned char * end) { if (jt->end != 0) { int offset = (int) (end - parser->input); failbug (__FILE__, __LINE__, parser, "attempt to set end as %d is now %d\n", offset, jt->end); } switch (jt->type) { case json_token_string: case json_token_key: if (* end != '"') { failbug (__FILE__, __LINE__, parser, "no quotes at end of string"); } break; case json_token_object: if (* end != '}') { failbug (__FILE__, __LINE__, parser, "no } at end of object"); } break; case json_token_array: if (* end != ']') { failbug (__FILE__, __LINE__, parser, "no ] at end of array"); } break; default: failbug (__FILE__, __LINE__, parser, "set end for unknown type %d", jt->type); break; } jt->end = end - parser->input + 1; } static json_token_t * json_token_set_child (json_parse_t * parser, json_token_t * parent, json_token_t * child) { switch (parent->type) { case json_token_object: case json_token_array: break; default: failbug (__FILE__, __LINE__, parser, "bad parent type %d\n", parent->type); } parent->child = child; return child; } static json_token_t * json_token_set_next (json_token_t * prev, json_token_t * next) { prev->next = next; return next; } JSON-Parse-0.61/MANIFEST.SKIP000644 001751 001751 00000002552 14003431376 014540 0ustar00benben000000 000000 (build|make-pod|clean|make-errors|run)\.pl blib/.* .gitignore .git/.* Makefile$ pm_to_blib MYMETA..* .*\.bak ^Json3-[0-9.]/$ ^Json3-[0-9.]\.tar\.gz$ ^Parse.(?:c|o|bs)$ ^peek\.pl$ ^test-empty-string\.pl$ ^xt/.* ^lib/JSON/Parse\.pod\.tmpl$ ^Parse\.bs$ # Can download from github if needed. benchmarks # Spurious build guff ^makeitfile$ # Corefiles, a.out sometimes get into the distribution. ^.*\.core$ ^a\.out$ # There aren't any files I plan to distribute with these file types. ^[^/]*\.pl$ ^[^/]*\.txt$ # Don't put the "torture test" code into the CPAN distribution. It # contains bad stuff like "setjmp" which is incompatible with many # people's operating systems. People who want this kind of stuff can # get it from the repository. ^Json3-random-test\.c$ ^(?:json-)?random-test\.c$ ^random-test$ ^randomjson\.pl$ # Don't add the UTF-8 test code to the repo: ^utf8test/.*$ # Makefile for C random stuff ^randmakefile$ # Not necessary or useful for CPAN ^FILES$ # Design document directory, no use to put on CPAN. ^doc/.*$ # ^examples/.*-out\.txt$ # Template for POD ^lib/JSON/Tokenize\.pod\.tmpl$ ^lib/JSON/Whitespace\.pod\.tmpl$ # Obsolete files ^obsolete/.*$ # Author version change script ^versionup\.pl$ # CI control files ^\.travis\.yml$ ^build/.*$ # These are used to make the documentation. ^see-also-info\.json$ ^modules\.pl$ # Local variables: # comment-start: "#" # End: JSON-Parse-0.61/json-whitespace.c000644 001751 001751 00000007747 14004651620 016121 0ustar00benben000000 000000 /* Type for adding whitespace. */ typedef struct json_s { SV * sv; char * s; STRLEN sl; } json_s_t; typedef struct json_ws { SV * news; SV * olds; /* Length of original string. */ STRLEN olds_l; /* Length of new string. */ unsigned int news_l; /* Copy point. */ char * q; /* Origin */ char * p; /* Top of token tree. */ json_token_t * t; /* Token under examination now. */ json_token_t * next; /* Whitespace to add before and after. */ char * before[n_json_tokens]; char * after[n_json_tokens]; int array_depth; int object_depth; char * array_indent; char * object_indent; } json_ws_t; static void copy_whitespace (json_ws_t * ws, char * w) { char * q; q = ws->q; while (*w) { *q++ = *w++; } ws->q = q; } static INLINE int whitespace_json (json_ws_t * ws) { /* Copy place. */ char * c; /* Value of q at entry to this routine, used to calculate added length. */ char * qorig; json_token_t * next; char * q; q = ws->q; qorig = q; next = ws->next; while (next) { /* Copy start of string. */ copy_whitespace (ws, ws->before[next->type]); switch (next->type) { case json_token_object: *q++ = '{'; ws->object_depth++; ws->q = q; q += whitespace_json (ws); ws->object_depth--; *q++ = '}'; break; case json_token_array: *q++ = '['; ws->array_depth++; ws->q = q; q += whitespace_json (ws); ws->object_depth--; *q++ = ']'; break; case json_token_string: case json_token_key: case json_token_literal: case json_token_number: for (c = ws->p + next->start; c <= ws->p + next->end; c++) { *q++ = *c; } break; case json_token_comma: *q++ = ','; break; case json_token_colon: *q++ = ':'; break; default: croak ("unhandled token type %d", next->type); } /* Copy end of string. */ c = ws->after[next->type]; while (*c) { *q++ = *c++; } next = next->next; } return q - qorig; } static int copy_json (char * p, char * q, json_token_t * t) { /* Loop variable. */ json_token_t * next; /* Copy place. */ char * c; /* Value of q at entry to this routine, used to calculate added length. */ char * qorig; next = t; qorig = q; while (next) { switch (next->type) { case json_token_object: *q++ = '{'; q += copy_json (p, q, next->child); *q++ = '}'; break; case json_token_array: *q++ = '['; q += copy_json (p, q, next->child); *q++ = ']'; break; case json_token_string: case json_token_key: case json_token_literal: case json_token_number: for (c = p + next->start; c < p + next->end; c++) { *q++ = *c; } break; case json_token_comma: *q++ = ','; break; case json_token_colon: *q++ = ':'; break; default: croak ("unhandled token type %d", next->type); } next = next->next; } return q - qorig; } /* Remove all the whitespace. */ static SV * strip_whitespace (json_token_t * tokens, SV * json) { SV * stripped; char * p; char * q; /* Original length. */ STRLEN l; /* Length of output. */ unsigned int m; p = SvPV (json, l); stripped = newSV (l); /* Tell Perl it's a string. */ SvPOK_on (stripped); /* Set UTF-8 if necessary. */ if (SvUTF8 (json)) { SvUTF8_on (stripped); } /* Get a pointer to the string inside "stripped". */ q = SvPVX (stripped); m = copy_json (p, q, tokens); /* Set the length. */ SvCUR_set (stripped, m); return stripped; } static SV * indent (json_token_t * tokens, SV * json) { int i; json_ws_t j = {0}; j.olds = json; j.p = SvPV (j.olds, j.olds_l); j.t = tokens; j.next = tokens; for (i = 0; i < n_json_tokens; i++) { j.before[i] = ""; j.after[i] = ""; } j.after[json_token_comma] = "\n"; j.after[json_token_object] = "\n"; j.after[json_token_array] = "\n"; return &PL_sv_undef; } JSON-Parse-0.61/MANIFEST000644 001751 001751 00000002512 14011073361 013762 0ustar00benben000000 000000 Changes CONTRIBUTING.md errors.c examples/array.pl examples/assert.pl examples/bad-utf8.pl examples/bad-utf8.txt examples/chr.pl examples/collide.pl examples/ebi.pl examples/first-bit.pl examples/hash.pl examples/json-tiny-round-trip-demo.pl examples/kani.pl examples/key-collision.pl examples/long-number.pl examples/sasori.pl examples/synopsis.pl examples/tokenize-synopsis.pl examples/true-subs.pl examples/unicode-details.pl examples/whitespace-synopsis.pl json-common.c json-entry-points.c json-perl.c json-whitespace.c lib/JSON/Parse.pm lib/JSON/Parse.pod lib/JSON/Tokenize.pm lib/JSON/Tokenize.pod lib/JSON/Whitespace.pm lib/JSON/Whitespace.pod Makefile.PL MANIFEST This list of files MANIFEST.SKIP Parse.xs README script/validjson t/array.t t/bugzilla-2049.t t/collision.t t/JPT.pm t/JSON-Parse.t t/json-tokenize.t t/Json3.t t/kolmorogov42-1.t t/max-depth.t t/minify.t t/numbers.t t/object.t t/perl-monks-1165399.t t/read-file.t t/rfc7159.t t/string-bug-44.json t/string-bug-44.t t/syntax-error-1.json t/syntax.t t/test-empty-string.t t/test.json t/unicode.t t/upgrade-utf8.t t/utf8.t t/valid-json.t t/whitespace.t typemap unicode.c unicode.h utf8-byte-one.c utf8-next-byte.c META.yml Module YAML meta-data (added by MakeMaker) META.json Module JSON meta-data (added by MakeMaker) JSON-Parse-0.61/utf8-byte-one.c000644 001751 001751 00000000734 13772601034 015416 0ustar00benben000000 000000 case BYTE_20_7F: ADDBYTE; goto string_start; case BYTE_C2_DF: ADDBYTE; goto byte_last_80_bf; case 0xE0: ADDBYTE; goto byte23_a0_bf; case BYTE_E1_EC: ADDBYTE; goto byte_penultimate_80_bf; case 0xED: ADDBYTE; goto byte23_80_9f; case 0xEE: case 0xEF: ADDBYTE; goto byte_penultimate_80_bf; case 0xF0: ADDBYTE; goto byte24_90_bf; case BYTE_F1_F3: ADDBYTE; goto byte24_80_bf; case 0xF4: ADDBYTE; goto byte24_80_8f; JSON-Parse-0.61/utf8-next-byte.c000644 001751 001751 00000002423 13012464667 015616 0ustar00benben000000 000000 #define FAILUTF8(want) \ parser->bad_beginning = startofutf8string - 1; \ parser->bad_type = json_string; \ parser->bad_byte = parser->end - 1; \ parser->expected = want; \ parser->error = json_error_unexpected_character; \ failbadinput (parser) byte_last_80_bf: switch (NEXTBYTE) { case BYTE_80_BF: ADDBYTE; goto string_start; default: FAILUTF8 (XBYTES_80_BF); } byte_penultimate_80_bf: switch (NEXTBYTE) { case BYTE_80_BF: ADDBYTE; goto byte_last_80_bf; default: FAILUTF8 (XBYTES_80_BF); } byte24_90_bf: switch (NEXTBYTE) { case BYTE_90_BF: ADDBYTE; goto byte_penultimate_80_bf; default: FAILUTF8 (XBYTES_90_BF); } byte23_80_9f: switch (NEXTBYTE) { case BYTE_80_9F: ADDBYTE; goto byte_last_80_bf; default: FAILUTF8 (XBYTES_80_9F); } byte23_a0_bf: switch (NEXTBYTE) { case BYTE_A0_BF: ADDBYTE; goto byte_last_80_bf; default: FAILUTF8 (XBYTES_A0_BF); } byte24_80_bf: switch (NEXTBYTE) { case BYTE_80_BF: ADDBYTE; goto byte_penultimate_80_bf; default: FAILUTF8 (XBYTES_80_BF); } byte24_80_8f: switch (NEXTBYTE) { case BYTE_80_8F: ADDBYTE; goto byte_penultimate_80_bf; default: FAILUTF8 (XBYTES_80_8F); } JSON-Parse-0.61/errors.c000444 001751 001751 00000035561 14011073353 014322 0ustar00benben000000 000000 typedef enum { json_error_invalid, json_error_unexpected_character, json_error_unexpected_end_of_input, json_error_not_surrogate_pair, json_error_empty_input, json_error_name_is_not_unique, json_error_overflow } json_error_t; const char * json_errors[json_error_overflow] = { "Invalid", "Unexpected character", "Unexpected end of input", "Not surrogate pair", "Empty input", "Name is not unique", }; enum expectation { xwhitespace, xcomma, xvalue_separator, xobject_end, xarray_end, xhexadecimal_character, xstring_start, xdigit, xdot, xminus, xplus, xexponential, xarrayobjectstart, xescape, xstringchar, xliteral, xin_literal, xin_surrogate_pair, xbytes_80_8f, xbytes_80_9f, xbytes_80_bf, xbytes_90_bf, xbytes_a0_bf, n_expectations }; #define XWHITESPACE (1<' build_requires: ExtUtils::MakeMaker: '0' configure_requires: ExtUtils::MakeMaker: '0' dynamic_config: 1 generated_by: 'ExtUtils::MakeMaker version 7.44, CPAN::Meta::Converter version 2.150010' license: perl meta-spec: url: http://module-build.sourceforge.net/META-spec-v1.4.html version: '1.4' name: JSON-Parse no_index: directory: - t - inc file: - build/JPB.pm - t/JPT.pm - xt/JPXT.pm requires: Carp: '0' perl: '5.008009' resources: bugtracker: https://github.com/benkasminbullock/JSON-Parse/issues repository: git://github.com/benkasminbullock/JSON-Parse.git version: '0.61' x_serialization_backend: 'CPAN::Meta::YAML version 0.018' JSON-Parse-0.61/META.json000644 001751 001751 00000002502 14011073361 014251 0ustar00benben000000 000000 { "abstract" : "Parse JSON", "author" : [ "Ben Bullock " ], "dynamic_config" : 1, "generated_by" : "ExtUtils::MakeMaker version 7.44, CPAN::Meta::Converter version 2.150010", "license" : [ "perl_5" ], "meta-spec" : { "url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec", "version" : 2 }, "name" : "JSON-Parse", "no_index" : { "directory" : [ "t", "inc" ], "file" : [ "build/JPB.pm", "t/JPT.pm", "xt/JPXT.pm" ] }, "prereqs" : { "build" : { "requires" : { "ExtUtils::MakeMaker" : "0" } }, "configure" : { "requires" : { "ExtUtils::MakeMaker" : "0" } }, "runtime" : { "requires" : { "Carp" : "0", "perl" : "5.008009" } } }, "release_status" : "stable", "resources" : { "bugtracker" : { "web" : "https://github.com/benkasminbullock/JSON-Parse/issues" }, "repository" : { "type" : "git", "url" : "git://github.com/benkasminbullock/JSON-Parse.git", "web" : "https://github.com/benkasminbullock/JSON-Parse" } }, "version" : "0.61", "x_serialization_backend" : "JSON::PP version 4.04" } JSON-Parse-0.61/script/validjson000755 001751 001751 00000002167 13016170166 016067 0ustar00benben000000 000000 #!/home/ben/software/install/bin/perl use warnings; use strict; use lib '/home/ben/projects/Json3/blib/lib'; use lib '/home/ben/projects/Json3/blib/arch'; use JSON::Parse 'assert_valid_json'; use Getopt::Long; my $ok = GetOptions ( "verbose" => \my $verbose, "help" => \my $help, ); if (! $ok || $help) { usage (); exit; } for my $file (@ARGV) { eval { open my $in, "<:raw", $file or die "Can't open '$file': $!"; my $text = ''; while (my $line = <$in>) { $text .= $line; } close $in or die $!; assert_valid_json ($text); }; if ($@) { my $error = $@; $error =~ s/\n+$//; if ($error !~ /\Q$file/) { $error = "$file: $error"; } if ($error =~ /validjson line [0-9]+\.$/) { $error =~ s/\sat\s\S+\sline.*$/\./; } print "$error\n"; } else { if ($verbose) { print "'$file' is valid JSON.\n"; } } } sub usage { print <{骪}, '骪', "Unicode \\uXXXX parsed correctly"); note ("keys = ", keys %$ar); # Here the second unicode piece of the string is added to switch on # the UTF-8 flag inside Perl and get the required invalidity. my $badunicode = '["\uD800", "バター"]'; ok (! valid_json ($badunicode), "$badunicode is invalid"); # This is what the documentation says will happen. However, I'm not # sure this is correct or what the user expects to happen. my $okunicode = '["\uD800"]'; ok (! valid_json ($okunicode), "$okunicode is valid"); my $surpair = '["\uD834\uDD1E"]'; my $spo; eval { $spo = parse_json ($surpair); }; ok (! $@, "parsed surrogate pairs"); is (ord ($spo->[0]), 0x1D11E, "g-clef surrogate pair"); use utf8; my $surpair_force_utf8 = '["\uD834\uDD1E麻婆茄子"]'; my $spo_force_utf8; eval { $spo_force_utf8 = parse_json ($surpair); }; ok (! $@, "parsed surrogate pairs"); is (ord ($spo_force_utf8->[0]), 0x1D11E, "g-clef surrogate pair"); use utf8; my $scorpion = '["蠍"]'; my $p1 = parse_json ($scorpion); ok (utf8::is_utf8 ($p1->[0]), "UTF-8 survives"); no utf8; my $ebi = '["蠍"]'; my $p2 = parse_json ($ebi); ok (! utf8::is_utf8 ($p2->[0]), "Not UTF-8 not marked as UTF-8"); no utf8; # 蟹 my $kani = '["\u87f9", "蟹", "\u87f9猿"]'; my $p = parse_json ($kani); ok (utf8::is_utf8 ($p->[0]), "kani upgraded regardless"); ok (! utf8::is_utf8 ($p->[1]), "input string not upgraded, even though it's UTF-8"); ok (utf8::is_utf8 ($p->[2]), "upgrade this too"); is (length ($p->[2]), 2, "length is two by magic"); ok (! valid_json ('["\uDE8C "]'), "invalid \uDE8C + space"); # Test of "surrogate pairs". my $jc = JSON::Parse->new (); my $wikipedia_1 = '"\ud801\udc37"'; my $out_1 = $jc->run ($wikipedia_1); is ($out_1, "\x{10437}"); my $wikipedia_2 = '"\ud852\udf62"'; my $out_2 = $jc->run ($wikipedia_2); is ($out_2, "\x{24b62}"); my $json_spec = '"\ud834\udd1e"'; my $out_3 = $jc->run ($json_spec); is ($out_3, "\x{1D11E}"); done_testing (); JSON-Parse-0.61/t/array.t000644 001751 001751 00000001560 14003422404 014376 0ustar00benben000000 000000 use FindBin '$Bin'; use lib "$Bin"; use JPT; # This was a bug with uninitialized memory. my $array = '[0,0,0,0,0,0,0,0,0,1,1,0,0,1]'; my $parray = eval ($array); my $jarray = parse_json ($array); is_deeply ($jarray, $parray); my $array2 = '[0,0,0,0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]'; my $parray2 = eval ($array2); my $jarray2 = parse_json ($array2); is_deeply ($jarray2, $parray2); done_testing (); exit; JSON-Parse-0.61/t/whitespace.t000644 001751 001751 00000000404 14003422404 015410 0ustar00benben000000 000000 use FindBin '$Bin'; use lib "$Bin"; use JPT; my $json = <new(ngram_width => 3); # use trigrams # feed in text $ng->add_text($text1); # analyze $text1 $ng->add_text($text2); # analyze $text2 # feed in arbitrary sequence of tokens $ng->add_start_token; $ng->add_tokens(qw/token1 token2 token3/); $ng->add_end_token; my $output = $ng->generate_text; DESCRIPTION This is a module for analyzing token sequences with n-grams. You can use it to parse a block of text, or feed in your own tokens. It can generate new sequences of tokens from what has been fed in. EXPORT None. METHODS new Create a new n-gram analyzer instance. Options: ngram_width This is the \"window size\" of how many tokens the analyzer will keep track of. A ngram_width of two will make a bigram, a ngram_width of three will make a trigram, etc... ngram_width Returns token window size (e.g. the \"n\" in n-gram) token_table Returns n-gram table add_text Splits a block of text up by whitespace and processes each word as a token. Automatically calls \"add_start_token()\" at the beginning of the text and \"add_end_token()\" at the end. add_tokens Adds an arbitrary list of tokens. add_start_token Adds the \"start token.\" This is useful because you often will want to mark the beginnings and ends of a token sequence so that when generating your output the generator will know what tokens start a sequence and when to end. add_end_token Adds the \"end token.\" See \"add_start_token()\". analyze Generates an n-gram frequency table. Returns a hashref of *N => tokens => count*, where N is the number of tokens (will be from 2 to ngram_width). You will not normally need to call this unless you want to get the n-gram frequency table. generate_text After feeding in text tokens, this will return a new block of text based on whatever text was added. generate Generates a new sequence of tokens based on whatever tokens have previously been fed in. next_tok Given a list of tokens, will pick a possible token to come next. token_lookup Returns a hashref of the counts of tokens that follow a sequence of tokens. token_key Serializes a sequence of tokens for use as a key into the n-gram table. You will not normally need to call this. serialize Returns the tokens and n-gram (if one has been generated) in a string deserialize($string) Deserializes a string and returns an \"Algorithm::NGram\" instance SEE ALSO Text::Ngram, Text::Ngrams AUTHOR Mischa Spiegelmock, COPYRIGHT AND LICENSE Copyright 2007 by Mischa Spiegelmock This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. ", "date" : "2011-09-23T05:08:06", "status" : "latest", "author" : "REVMISCHA", "directory" : false, "maturity" : "released", "indexed" : true, "documentation" : "Algorithm::NGram", "id" : "rEbfZ9plgUX_e6ySf4u3d5ZGrhE", "module" : [ { "indexed" : true, "authorized" : true, "version" : "0.9", "name" : "Algorithm::NGram", "version_numified" : 0.9 } ], "authorized" : true, "pod_lines" : [ [ 19, 51 ], [ 92, 5 ], [ 98, 5 ], [ 104, 7 ], [ 130, 5 ], [ 143, 8 ], [ 157, 5 ], [ 168, 8 ], [ 211, 6 ], [ 225, 6 ], [ 256, 5 ], [ 283, 5 ], [ 301, 6 ], [ 313, 5 ], [ 331, 5 ], [ 363, 18 ] ], "version" : "0.9", "binary" : false, "name" : "NGram.pm", "version_numified" : 0.9, "path" : "lib/Algorithm/NGram.pm", "release" : "Algorithm-NGram-0.9", "description" : "This is a module for analyzing token sequences with n-grams. You can use it to parse a block of text, or feed in your own tokens. It can generate new sequences of tokens from what has been fed in.", "distribution" : "Algorithm-NGram", "stat" : { "uid" : 500, "mtime" : 1316754239, "mode" : 33188, "size" : 7715, "gid" : 500 }, "level" : 2, "sloc" : 143, "slop" : 94, "mime" : "text/x-script.perl-module" } JSON-Parse-0.61/t/string-bug-44.json000644 001751 001751 00000007772 14003422404 016307 0ustar00benben000000 000000 {"x":"\naaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"} JSON-Parse-0.61/t/numbers.t000644 001751 001751 00000003064 14003422404 014734 0ustar00benben000000 000000 use FindBin '$Bin'; use lib "$Bin"; use JPT; my $p; # This was causing some problems with the new grammar / lexer. my $jeplus = '[1.9e+9]'; eval { $p = parse_json ($jeplus); }; ok (! $@, "Parsed $jeplus OK"); cmp_ok ($p->[0], '==', 1.9e9, "Got a valid value"); # Various kinds of numbers are valid as JSON. my $j = <{integer}, 100), "Got 100 for integer"); ok (compare ($p->{decimal} , 1.5), "Got 1.5 for decimal"); ok (compare ($p->{exponent} , 100), "Got 100 for exponent"); ok (compare ($p->{"exponent-"} , 19/1000), "got 19/1000 for exponent-"); ok (compare ($p->{"exponent+"} , 1_900_000_000), "got 1_900_000_000 for exponent+"); ok (compare ($p->{fraction} , 0.01), "got 0.01 for fraction"); my $q = @{parse_json ('[0.12345]')}[0]; ok (compare ($q, '0.12345'), "Got 0.12345"); # Illegal numbers eval { parse_json ('[0...111]'); }; ok ($@, "Don't accept 0...111"); eval { parse_json ('[0111]'); }; like ($@, qr/unexpected character/i, "Error for leading zero"); my $long_number = '12345678901234567890123456789012345678901234567890'; my $out = parse_json ("[$long_number]"); is ($out->[0], $long_number); done_testing; exit; # Compare floating point numbers. sub compare { my ($x, $y) = @_; my $error = 0.00001; if (abs ($x - $y) < $error) { return 1; } return; } JSON-Parse-0.61/t/Json3.t000644 001751 001751 00000006067 14003422404 014263 0ustar00benben000000 000000 # These are basic tests for JSON::Parse. use FindBin '$Bin'; use lib "$Bin"; use JPT; # Empty array. my $p = parse_json ('[]'); ok ($p); is (ref $p, 'ARRAY'); is (scalar @$p, 0); # Empty hash. my $o = parse_json ('{}'); ok ($o); is (ref $o, 'HASH'); is (scalar keys %$o, 0); # Array with one element. my $a1 = parse_json ('[1]'); ok ($a1); is (ref $a1, 'ARRAY'); is (scalar @$a1, 1); is ($a1->[0], 1, "Got value 1"); # Array of integer numbers. my $ai = parse_json ('[1,12,123,1234,12345,123456,1234567,12345678]'); ok ($ai); is (ref $ai, 'ARRAY'); is (scalar @$ai, 8); is_deeply ($ai, [1,12,123,1234,12345,123456,1234567,12345678]); # Object with one pair of elements, a number as value. my $o1 = parse_json ('{"a":1}'); ok ($o1); is (ref $o1, 'HASH', "Got a hash"); is (scalar keys %$o1, 1); ok (defined ($o1->{a}), "Key for 'a' is defined"); is ($o1->{a}, 1, "Value for 'a' is one"); # Object with one pair of elements, a string as value. my $o2 = parse_json ('{"william":"shakespeare"}'); ok ($o2, "got a value"); is (ref $o2, 'HASH', "Got a hash"); is (scalar keys %$o2, 1, "Right no of keys"); ok (defined ($o2->{william}), "Got key william"); is ($o2->{william}, 'shakespeare', "Got right value for william"); # Object with a lot of whitespace. my $w = <{Kash})); is ($ow->{Funky}, 'Gibbon'); # Array of floating point numbers my $af = parse_json ('[0.001, 2.5e4, 3e-12]'); ok ($af); is (ref $af, 'ARRAY'); is (scalar @$af, 3); my $eps = 1e-3; cmp_ok (abs ($af->[0] - 0.001), '<', 0.001 * $eps); cmp_ok (abs ($af->[1] - 2.5e4), '<', 2.5e4 * $eps); cmp_ok (abs ($af->[2] - 3e-12), '<', 3e-12 * $eps); # Nested hash my $on2 = parse_json ('{"gust":{"breeze":"wind"}}'); ok ($on2); is (ref $on2, 'HASH'); is (scalar keys %$on2, 1); is_deeply ($on2, {gust => {breeze => 'wind'}}, "Nested hash depth 2"); # Nested hash my $on4 = parse_json ('{"gusty":{"breezy":{"monkey":{"flat":"hog"}},"miserable":"dawson"}}'); ok ($on4); is (ref $on4, 'HASH'); is (scalar keys %$on4, 1); is_deeply ($on4, {gusty => {breezy => {monkey => {flat => 'hog'}}, miserable => 'dawson'}}, "Nested hash depth 4"); # Array of things with escapes my $escjson = '["\\t", "bubbles\n", "\u1234", "\nmonkey\n", "milky\tmoggy", "mocha\tmoggy\n"]'; my $aesc = parse_json ($escjson); # Test one by one. is ($aesc->[0], "\t"); is ($aesc->[1], "bubbles\n"); ok (utf8::is_utf8 ($aesc->[2]), "Unicode switched on for character escapes"); is ($aesc->[3], "\nmonkey\n"); is ($aesc->[4], "milky\tmoggy"); is ($aesc->[5], "mocha\tmoggy\n"); my $ao = parse_json ('[{"baby":"chops"}, {"starsky":"hutch"}]'); ok ($ao, "Got JSON"); is (ref $ao, 'ARRAY'); is_deeply ($ao, [{baby => 'chops'}, {starsky => 'hutch'}]); # Literals my $at = parse_json ('[true]'); ok ($at); is ($at->[0], 1); my $afalse = parse_json ('[false]'); ok ($afalse, "got false value"); is ($afalse->[0], '', "is empty string"); done_testing (); # Local variables: # mode: perl # End: JSON-Parse-0.61/t/json-tokenize.t000644 001751 001751 00000001664 14003422404 016064 0ustar00benben000000 000000 # Test for JSON::Tokenize use FindBin '$Bin'; use lib "$Bin"; use JPT; use JSON::Tokenize ':all'; my $input = '{"tuttie":["fruity", true, 100]}'; ok (valid_json ($input)); my $token = tokenize_json ($input); is (tokenize_type ($token), 'object'); my $child = tokenize_child ($token); is (tokenize_type ($child), "string"); is (tokenize_text ($input, $child), '"tuttie"'); my $next = tokenize_next ($child); is (tokenize_type ($next), "colon"); is (tokenize_start ($next), 9, "start at 9"); is (tokenize_text ($input, $next), ":"); my $nnext = tokenize_next ($next); is (tokenize_text ($input, $nnext), '["fruity", true, 100]'); use utf8; my $utf8input = '{"くそ":"くらえ"}'; ok (valid_json ($utf8input), "valid input"); my $tokenutf8 = tokenize_json ($utf8input); my $childutf8 = tokenize_child ($tokenutf8); is (tokenize_type ($childutf8), "string", "is a string"); is (tokenize_text ($utf8input, $childutf8), '"くそ"'); done_testing (); JSON-Parse-0.61/t/read-file.t000644 001751 001751 00000000474 14003422404 015113 0ustar00benben000000 000000 # This tests reading a file using the two different names of the # routine. use FindBin '$Bin'; use lib "$Bin"; use JPT; my $p = json_file_to_perl ("$Bin/test.json"); ok ($p->{distribution} eq 'Algorithm-NGram'); my $q = read_json ("$Bin/test.json"); ok ($q->{distribution} eq 'Algorithm-NGram'); done_testing (); JSON-Parse-0.61/t/syntax-error-1.json000644 001751 001751 00000000050 12253504436 016605 0ustar00benben000000 000000 { "version_numified" : 8e-06 } JSON-Parse-0.61/t/perl-monks-1165399.t000644 001751 001751 00000001163 14003422404 016205 0ustar00benben000000 000000 # This is a test for the fix of the following bug: # https://github.com/benkasminbullock/JSON-Parse/issues/34 # There is also a discussion here: # http://perlmonks.org/?node_id=1165399 use FindBin '$Bin'; use lib "$Bin"; use JPT; my $j = JSON::Parse->new(); # no complain, no effect: $j->warn_only(1); # legal json: eval { my $pl = $j->run('{"k":"v"}'); }; ok (! $@); # illegal json, the following statement dies: my $warning; $SIG{__WARN__} = sub { $warning = "@_" }; eval { my $pl = $j->run('illegal json'); }; ok (! $@, "No fatal error"); ok ($warning, "Got warning"); undef $SIG{__WARN__}; done_testing (); JSON-Parse-0.61/t/rfc7159.t000644 001751 001751 00000003126 14003422404 014360 0ustar00benben000000 000000 # This tests for the new behaviour of the JSON specification as of RFC # 7159 where a single item without braces {} or square brackets [] is # also valid as JSON. use FindBin '$Bin'; use lib "$Bin"; use JPT; my $stringonly = '"this"'; my $j; eval { $j = parse_json ($stringonly); }; ok (! $@, "no errors parsing rfc7159 json"); is ($j, 'this', "Got correct value as well"); ok (valid_json ($stringonly), "And it's valid json too"); my $numberonly = '3.14'; my $j2; eval { $j2 = parse_json ($numberonly); }; ok (! $@, "no errors parsing rfc7159 json"); cmp_ok (abs ($j2 - $numberonly), '<', 0.0001, "got number back"); ok (valid_json ($numberonly), "And it's valid JSON too"); my $numberonly2 = '0.14'; my $jx; eval { $jx = parse_json ($numberonly2); }; ok (! $@, "no errors parsing rfc7159 json $numberonly2"); cmp_ok (abs ($jx - ($numberonly2 + 0.0)), '<', 0.0001, "got number back $numberonly2"); ok (valid_json ($numberonly2), "And it's valid JSON too"); my $numberws = ' 5.55e10 '; ok (valid_json ($numberws), "$numberws validated"); my $literalws = ' true '; ok (valid_json ($literalws), "'$literalws' validates"); my $j3; eval { $j3 = parse_json ($literalws); }; ok (! $@, "no errors parsing '$literalws'"); ok ($j3, "'$literalws' gives a true value"); is ($j3, 1, "'$literalws' is equal to one"); my $literal = 'null'; ok (valid_json ($literal), "'$literal' validates"); my $j4; eval { $j4 = parse_json ($literal); }; ok (! $@, "no errors parsing '$literal'"); ok (! $j4, "bare literal null is false value"); ok (! defined ($j4), "bare literal null is undefined"); done_testing (); JSON-Parse-0.61/t/upgrade-utf8.t000644 001751 001751 00000000575 14010627301 015601 0ustar00benben000000 000000 use FindBin '$Bin'; use lib "$Bin"; use JPT; my $jp = JSON::Parse->new (); $jp->upgrade_utf8 (1); no utf8; my $json = '{"場":"部"}'; my $out = $jp->parse ($json); use utf8; use Data::Dumper; print Dumper ($out); my @keys = keys %$out; ok (utf8::is_utf8 ($keys[0]), "Upgraded UTF-8 to character encoding"); cmp_ok (length ($out->{"場"}), '==', 1, "Got utf8"); done_testing (); JSON-Parse-0.61/t/max-depth.t000644 001751 001751 00000001307 14003422404 015146 0ustar00benben000000 000000 # This tests the "max depth" feature added so that the JSON Test Suite # silly test with 100,000 open { and [ doesn't cause an error. use FindBin '$Bin'; use lib "$Bin"; use JPT; my $jp = JSON::Parse->new (); # Test setting to "one" so that two [[ will cause an error. $jp->set_max_depth (1); my $ok = eval { $jp->run ('[[[["should fail due to depth"]]]]'); 1; }; ok (! $ok, "fails to parse array when max depth is set to 1"); my $md = $jp->get_max_depth (); cmp_ok ($md, '==', 1, "got back the max depth"); # Test setting back to default using zero argument. $jp->set_max_depth (0); my $mdd = $jp->get_max_depth (); cmp_ok ($mdd, '==', 10000, "got back the default max depth"); done_testing (); JSON-Parse-0.61/t/bugzilla-2049.t000644 001751 001751 00000000245 14003422404 015464 0ustar00benben000000 000000 use FindBin '$Bin'; use lib "$Bin"; use JPT; eval { my $type = ''; my $tri2file = read_json ('$type-tri2file.txt'); }; ok ($@); note ($@); done_testing (); JSON-Parse-0.61/t/JSON-Parse.t000644 001751 001751 00000006705 14003422404 015107 0ustar00benben000000 000000 # This is a basic test of parsing JSON. See also Json3.t. use FindBin '$Bin'; use lib "$Bin"; use JPT; my $jason = <<'EOF'; {"bog":"log","frog":[1,2,3],"guff":{"x":"y","z":"monkey","t":[0,1,2.3,4,59999]}} EOF my $x = parse_json ($jason); note ($x->{guff}->{t}->[2]); cmp_ok (abs ($x->{guff}->{t}->[2] - 2.3), '<', 0.00001, "Two point three"); my $xs = parse_json_safe ($jason); note ($xs->{guff}->{t}->[2]); cmp_ok (abs ($xs->{guff}->{t}->[2] - 2.3), '<', 0.00001, "Two point three"); my $fleece = '{"凄い":"技", "tickle":"baby"}'; my $y = parse_json ($fleece); ok ($y->{tickle} eq 'baby', "Parse hash"); my $ys = parse_json_safe ($fleece); ok ($ys->{tickle} eq 'baby', "Parse hash"); ok (valid_json ($fleece), "Valid OK JSON"); my $argonauts = '{"medea":{"magic":true,"nice":false}}'; my $z = parse_json ($argonauts); ok ($z->{medea}->{magic}, "Parse true literal."); ok (! ($z->{medea}->{nice}), "Parse false literal."); my $zs = parse_json_safe ($argonauts); ok ($zs->{medea}->{magic}, "Parse true literal."); ok (! ($zs->{medea}->{nice}), "Parse false literal."); ok (valid_json ($argonauts), "Valid OK JSON"); # Test that empty inputs result in an error message. eval { my $Q = parse_json (''); }; ok ($@, "Empty string makes error"); ok ($@ =~ /empty input/i, "Empty input error for empty input"); eval { # Switch off uninitialized value warning for this test. no warnings; my $R = parse_json (undef); }; ok ($@, "Empty string makes error"); ok ($@ =~ /empty input/i, "Empty input error for empty input"); eval { my $S = parse_json (' '); }; ok ($@, "Empty string makes error"); ok ($@ =~ /empty input/i, "Empty input error for empty input"); # Test that errors are produced if we are missing the final brace. my $n; eval { $n = '{"骪":"\u9aaa"'; my $nar = parse_json ($n); }; ok ($@, "found error"); { my $warning; local $SIG{__WARN__} = sub { $warning = $_[0]; }; eval { $n = '{"骪":"\u9aaa"'; my $nar = parse_json_safe ($n); }; ok (! $@, "no exception with parse_json_safe"); unlike ($warning, qr/\n.+/, "no newlines in middle of error"); like ($warning, qr/JSON-Parse\.t/, "right file name for error"); } ok (! valid_json ($n), "! Not valid missing end }"); # Test that errors are produced if we are missing the initial brace {. my $bad1 = '"bad":"city"}'; $@ = undef; eval { parse_json ($bad1); }; ok ($@, "found error in '$bad1'"); my $notjson = 'this is not lexable'; $@ = undef; eval { parse_json ($notjson); }; ok ($@, "Got error message"); ok (! valid_json ($notjson), "Not valid bad json"); # This is the example from either the JSON RFC or from Douglas # Crockford's web page. my $wi =<{address}->{postalCode} eq '10021', "Test a value $xi->{address}->{postalCode}"); ok (valid_json ($wi), "Validate"); my $perl_a = parse_json ('["a", "b", "c"]'); ok (ref $perl_a eq 'ARRAY', "json array to perl array"); my $perl_b = parse_json ('{"a":1, "b":2}'); ok (ref $perl_b eq 'HASH', "json object to perl hash"); done_testing (); exit; JSON-Parse-0.61/t/JPT.pm000644 001751 001751 00000001314 14003422404 014063 0ustar00benben000000 000000 package JPT; use warnings; use strict; use utf8; use Test::More; use JSON::Parse ':all'; require Exporter; our @ISA = qw(Exporter); our @EXPORT = (qw/daft_test/, @Test::More::EXPORT, @JSON::Parse::EXPORT_OK); # Set all the file handles to UTF-8 my $builder = Test::More->builder; binmode $builder->output, ":encoding(utf8)"; binmode $builder->failure_output, ":encoding(utf8)"; binmode $builder->todo_output, ":encoding(utf8)"; binmode STDOUT, ":encoding(utf8)"; binmode STDERR, ":encoding(utf8)"; sub import { my ($class) = @_; strict->import (); utf8->import (); warnings->import (); Test::More->import (); JSON::Parse->import (':all'); JPT->export_to_level (1); } 1; JSON-Parse-0.61/t/object.t000644 001751 001751 00000015560 14003422404 014533 0ustar00benben000000 000000 # Test the new "object" behaviour. # This tests: # * Copy literals, don't use read-only scalars. # * User-defined booleans # ** Correct object name in user-defined booleans # ** Copy literals and user-defined booleans interplay # ** Deletion of user-defined booleans # * Detect hash collisions use FindBin '$Bin'; use lib "$Bin"; use JPT; # ____ _ _ _ _ # / ___|___ _ __ _ _ | (_) |_ ___ _ __ __ _| |___ # | | / _ \| '_ \| | | | | | | __/ _ \ '__/ _` | / __| # | |__| (_) | |_) | |_| | | | | || __/ | | (_| | \__ \ # \____\___/| .__/ \__, | |_|_|\__\___|_| \__,_|_|___/ # |_| |___/ # my $jp = JSON::Parse->new (); $jp->copy_literals (1); my $stuff = '{"hocus":true,"pocus":false,"focus":null}'; my $out = $jp->run ($stuff); eval { $out->{pocus} = "bad city"; }; ok (! $@, "Can modify literals without error"); $jp->copy_literals (0); my $stuff2 = '{"hocus":true,"pocus":false,"focus":null}'; my $out2 = $jp->run ($stuff); eval { $out2->{pocus} = "bad city"; }; ok ($@, "Can't modify literals without error"); note ($@); # User-defined booleans package Ba::Bi::Bu::Be::Bo; # https://metacpan.org/source/MAKAMAKA/JSON-PP-2.27300/lib/JSON/PP.pm#L1390 $Ba::Bi::Bu::Be::Bo::true = do { bless \(my $dummy = 1), "JSON::PP::Boolean" }; $Ba::Bi::Bu::Be::Bo::false = do { bless \(my $dummy = 0), "JSON::PP::Boolean" }; $Ba::Bi::Bu::Be::Bo::null = do { bless \(my $dummy), "JSON::PP::Boolean" }; sub true {$Ba::Bi::Bu::Be::Bo::true;} sub false {$Ba::Bi::Bu::Be::Bo::false;} sub null {$Ba::Bi::Bu::Be::Bo::null;} 1; package main; # $jpub = j-son p-arser with u-ser b-ooleans my $jpub = JSON::Parse->new (); my $jpub1 = $jpub->run ($stuff); eval { $jpub1->{hocus} = "bad city"; }; ok ($@, "got error altering literals with default JSON::Parse object"); # Use the same things all the people on CPAN do, switching off the # warnings. $jpub->set_true ($Ba::Bi::Bu::Be::Bo::true); $jpub->no_warn_literals (1); $jpub->set_false ($Ba::Bi::Bu::Be::Bo::false); $jpub->no_warn_literals (0); $jpub->set_null ($Ba::Bi::Bu::Be::Bo::null); my $jpub2 = $jpub->run ($stuff); eval { $jpub2->{hocus} = "bad city"; }; ok (! $@, "Values are not read-only with user-defined true/false values"); my $jpub3 = $jpub->run ($stuff); like (ref $jpub3->{hocus}, qr/JSON::PP::Boolean/, "true value correct type"); like (ref $jpub3->{pocus}, qr/JSON::PP::Boolean/, "false value correct type"); like (ref $jpub3->{focus}, qr/JSON::PP::Boolean/, "null value correct type"); # Now test the same thing after switching on copy_literals. $jpub->no_warn_literals (1); $jpub->copy_literals (1); $jpub->no_warn_literals (0); my $jpub4 = $jpub->run ($stuff); like (ref $jpub4->{hocus}, qr/JSON::PP::Boolean/, "true value correct type even with copy-literals"); like (ref $jpub4->{pocus}, qr/JSON::PP::Boolean/, "false value correct type even with copy-literals"); like (ref $jpub4->{focus}, qr/JSON::PP::Boolean/, "null value correct type even with copy-literals"); # Now delete all our user-defined booleans $jpub->delete_true (); $jpub->delete_false (); $jpub->delete_null (); # Test the objects have gone. my $jpub5 = $jpub->run ($stuff); unlike (ref $jpub5->{hocus}, qr/JSON::PP::Boolean/, "User true deleted"); unlike (ref $jpub5->{pocus}, qr/JSON::PP::Boolean/, "User false deleted"); unlike (ref $jpub5->{focus}, qr/JSON::PP::Boolean/, "User null deleted"); # Now test that copy-literals is still working. my $jpub6 = $jpub->run ($stuff); eval { $jpub6->{hocus} = "bad city"; }; ok (! $@, "Values are not read-only, copy literals still works"); # Finally switch off copy-literals and check that things are back to # the default behaviour. $jpub->copy_literals (0); my $jpub7 = $jpub->run ($stuff); unlike (ref $jpub7->{hocus}, qr/JSON::PP::Boolean/, "User true deleted"); unlike (ref $jpub7->{pocus}, qr/JSON::PP::Boolean/, "User false deleted"); unlike (ref $jpub7->{focus}, qr/JSON::PP::Boolean/, "User null deleted"); eval { $jpub7->{hocus} = "bad city"; }; ok ($@, "Values are read-only again"); # Check it's the right error, "Modification of a readonly value". like ($@, qr/Modification/, "Error message looks good"); note ($@); # Check that this doesn't make a warning, we want the user to be able # to set "null" to "undef". my $warning; $SIG{__WARN__} = sub { $warning = "@_"; }; $jpub->set_true (undef); ok ($warning, "Warning on setting true to non-true value"); $jpub->set_true (0); ok ($warning, "Warning on setting true to non-true value"); $jpub->set_true (''); ok ($warning, "Warning on setting true to non-true value"); $warning = undef; $jpub->set_false (undef); ok (! $warning, "no warning when setting user-defined false"); $warning = undef; $jpub->set_false (0); ok (! $warning, "no warning when setting user-defined false"); $warning = undef; $jpub->set_false (''); ok (! $warning, "no warning when setting user-defined false"); $warning = undef; # https://www.youtube.com/watch?v=g4ouPGGLI6Q $jpub->set_false ('Yodeadodoyodeadodoyodeadodoyodeadodoyodeadodoyodeadodoyo-bab-baaaaa Ahhhhhh-aaahhhh-aaaaaa-aaaaAAA! Ohhhhhh-ooohhh-oooooo-oooOOO!'); ok ($warning, "warning when setting user-defined false to a true value"); note ($warning); $warning = undef; $jpub->set_null (undef); $jpub->set_null (0); $jpub->set_null (''); ok (! $warning, "no warning when setting user-defined null"); # ____ _ _ _ _ _ _ # | _ \ ___| |_ ___ ___| |_ ___ ___ | | (_)___(_) ___ _ __ ___ # | | | |/ _ \ __/ _ \/ __| __| / __/ _ \| | | / __| |/ _ \| '_ \/ __| # | |_| | __/ || __/ (__| |_ | (_| (_) | | | \__ \ | (_) | | | \__ \ # |____/ \___|\__\___|\___|\__| \___\___/|_|_|_|___/_|\___/|_| |_|___/ # # my $stuff3 = '{"hocus":1,"pocus":2,"hocus":3,"focus":4}'; my $jp3 = JSON::Parse->new (); eval { $jp3->run ($stuff3); }; ok (!$@, "Did not detect collision in default setting"); $jp3->detect_collisions (1); eval { $jp3->run ($stuff3); }; ok ($@, "Detected collision"); note ($@); $jp3->detect_collisions (0); eval { $jp3->run ($stuff3); }; ok (!$@, "Did not detect collision after reset to 0"); SKIP: { eval "require 5.14;"; if ($@) { skip "diagnostics_hash requires perl 5.14 or later", 1; } # ____ _ _ _ _ _ # | _ \(_) __ _ __ _ _ __ ___ ___| |_(_) ___ | |__ __ _ ___| |__ # | | | | |/ _` |/ _` | '_ \ / _ \/ __| __| |/ __| | '_ \ / _` / __| '_ \ # | |_| | | (_| | (_| | | | | (_) \__ \ |_| | (__ | | | | (_| \__ \ | | | # |____/|_|\__,_|\__, |_| |_|\___/|___/\__|_|\___| |_| |_|\__,_|___/_| |_| # |___/ my $jp4 = JSON::Parse->new (); $jp4->diagnostics_hash (1); eval { $jp4->run ("{{{{{"); }; ok (ref $@ eq 'HASH', "Got hash diagnostics"); }; done_testing (); JSON-Parse-0.61/t/kolmorogov42-1.t000644 001751 001751 00000010524 14003422404 015762 0ustar00benben000000 000000 # This tests behaviour against # https://github.com/benkasminbullock/JSON-Parse/issues/35 # "Module crashes if field larger than just below 4 kB contains \n sequence" use FindBin '$Bin'; use lib "$Bin"; use JPT; my $input = <<'EOF'; {"date":1468338282,"text":"asasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdae\nrt432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasd555sdaert432fghyuiopasasddsasasasdasdasdasdasdasdasdasdassdfsdfsdfs"} EOF my $p; eval { $p = parse_json ($input); }; ok (! $@); ok ($p); done_testing (); JSON-Parse-0.61/t/test-empty-string.t000644 001751 001751 00000000462 14003422404 016677 0ustar00benben000000 000000 # This tests for an old bug where empty strings didn't work properly. use FindBin '$Bin'; use lib "$Bin"; use JPT; my $json = parse_json ('{"buggles":"","bibbles":""}'); is ($json->{buggles}, ''); is ($json->{bibbles}, ''); $json->{buggles} .= "chuggles"; is ($json->{bibbles}, ''); done_testing (); exit; JSON-Parse-0.61/t/collision.t000644 001751 001751 00000002167 14003422404 015257 0ustar00benben000000 000000 # This tests what happens when a JSON object has two keys with the # same string, as in {"a":1,"a":2}. These are called "collisions" # because the entries for the two bits of the JSON in the storing # object "collide". use FindBin '$Bin'; use lib "$Bin"; use JPT; # We need to do some work with Unicode. This is a core module so it's # always available. use Encode 'decode_utf8'; my $j = '{"a":1, "a":2}'; my $p = parse_json ($j); cmp_ok ($p->{a}, '==', 2, "Test documented hash key collision behaviour"); my $j2 = '{"a":1, "a":2, "a":3, "a":4, "a":5, "a":6, "a":7, "a":8, "a":9, "a":10}'; my $p2 = parse_json ($j2); cmp_ok ($p2->{a}, '==', 10, "Test documented hash key collision behaviour"); my $focus = '{"hocus":10,"pocus":20,"hocus":30,"focus":40}'; my $jp = JSON::Parse->new (); $jp->detect_collisions (1); eval { $jp->run ($focus); }; ok ($@); like ($@, qr/"hocus"/); # Test functioning with Unicode strings. my $yodi = '{"ほかす":10,"ぽかす":20,"ほかす":30,"ふぉかす":40}'; eval { $jp->run ($yodi); }; ok ($@); my $error = decode_utf8 ($@); like ($error, qr/"ほかす"/); note ($error); done_testing (); JSON-Parse-0.61/t/minify.t000644 001751 001751 00000002217 14003437336 014565 0ustar00benben000000 000000 use FindBin '$Bin'; use lib "$Bin"; use JPT; use JSON::Whitespace ':all'; my $in = <{x}), '==', 4080, "Length as expected"); done_testing (); JSON-Parse-0.61/t/utf8.t000644 001751 001751 00000037426 14003422404 014160 0ustar00benben000000 000000 # Test UTF-8 processing. use FindBin '$Bin'; use lib "$Bin"; use JPT; # Test for valid and invalid JSON with Perl's flag switched off. no utf8; ok (valid_json ('["蟹"]')); ok (valid_json ('{"動物":"像"}')); my $bad_cont = sprintf ('["%c"]', 0x80); ok (! valid_json ($bad_cont)); eval { assert_valid_json ($bad_cont); }; like ($@, qr/Unexpected character 0x80 parsing string/); # Test with the flag switched back on. use utf8; ok (valid_json ('["蟹"]')); ok (valid_json ('{"動物":"像"}')); # From UTF-8 SAMPLER ok (valid_json ('["¥ · £ · € · $ · ¢ · ₡ · ₢ · ₣ · ₤ · ₥ · ₦ · ₧ · ₨ · ₩ · ₪ · ₫ · ₭ · ₮ · ₯ · ₹"]')); ok (valid_json ('["ᚠᛇᚻ᛫ᛒᛦᚦ᛫ᚠᚱᚩᚠᚢᚱ᛫ᚠᛁᚱᚪ᛫ᚷᛖᚻᚹᛦᛚᚳᚢᛗ\nᛋᚳᛖᚪᛚ᛫ᚦᛖᚪᚻ᛫ᛗᚪᚾᚾᚪ᛫ᚷᛖᚻᚹᛦᛚᚳ᛫ᛗᛁᚳᛚᚢᚾ᛫ᚻᛦᛏ᛫ᛞᚫᛚᚪᚾ\nᚷᛁᚠ᛫ᚻᛖ᛫ᚹᛁᛚᛖ᛫ᚠᚩᚱ᛫ᛞᚱᛁᚻᛏᚾᛖ᛫ᛞᚩᛗᛖᛋ᛫ᚻᛚᛇᛏᚪᚾ᛬"]')); ok (valid_json ('["Τη γλώσσα μου έδωσαν ελληνική\nτο σπίτι φτωχικό στις αμμουδιές του Ομήρου.\nΜονάχη έγνοια η γλώσσα μου στις αμμουδιές του Ομήρου.\nαπό το Άξιον Εστί\nτου Οδυσσέα Ελύτη"]')); ok (valid_json ('["Τὴ γλῶσσα μοῦ ἔδωσαν ἑλληνικὴ\nτὸ σπίτι φτωχικὸ στὶς ἀμμουδιὲς τοῦ Ὁμήρου.\nΜονάχη ἔγνοια ἡ γλῶσσα μου στὶς ἀμμουδιὲς τοῦ Ὁμήρου.\nἀπὸ τὸ Ἄξιον ἐστί\nτοῦ Ὀδυσσέα Ἐλύτη"]')); ok (valid_json ('["ვეპხის ტყაოსანი შოთა რუსთაველი\nღმერთსი შემვედრე, ნუთუ კვლა დამხსნას სოფლისა შრომასა, ცეცხლს, წყალსა და მიწასა, ჰაერთა თანა მრომასა; მომცნეს ფრთენი და აღვფრინდე, მივჰხვდე მას ჩემსა ნდომასა, დღისით და ღამით ვჰხედვიდე მზისა ელვათა კრთომაასა."]')); my $glassjson = <<'EOF'; {"Sanskrit":"काचं शक्नोम्यत्तुम् । नोपहिनस्ति माम् ॥", "Sanskrit (standard transcription)":"kācaṃ śaknomyattum; nopahinasti mām.", "Classical Greek":"ὕαλον ϕαγεῖν δύναμαι· τοῦτο οὔ με βλάπτει.", "Greek (monotonic)":"Μπορώ να φάω σπασμένα γυαλιά χωρίς να πάθω τίποτα.", "Greek (polytonic)":"Μπορῶ νὰ φάω σπασμένα γυαλιὰ χωρὶς νὰ πάθω τίποτα. ", "Etruscan":"(NEEDED)", "Latin":"Vitrum edere possum; mihi non nocet.", "Old French":"Je puis mangier del voirre. Ne me nuit.", "French":"Je peux manger du verre, ça ne me fait pas mal.", "Provençal / Occitan":"Pòdi manjar de veire, me nafrariá pas.", "Québécois":"J'peux manger d'la vitre, ça m'fa pas mal.", "Walloon":"Dji pou magnî do vêre, çoula m' freut nén må. ", "Champenois":"(NEEDED) ", "Lorrain":"(NEEDED)", "Picard":"Ch'peux mingi du verre, cha m'foé mie n'ma. ", "Corsican/Corsu":"(NEEDED) ", "Jèrriais":"(NEEDED)", "Kreyòl Ayisyen (Haitï)":"Mwen kap manje vè, li pa blese'm.", "Basque":"Kristala jan dezaket, ez dit minik ematen.", "Catalan / Català":"Puc menjar vidre, que no em fa mal.", "Spanish":"Puedo comer vidrio, no me hace daño.", "Aragonés":"Puedo minchar beire, no me'n fa mal . ", "Aranés":"(NEEDED) ", "Mallorquín":"(NEEDED)", "Galician":"Eu podo xantar cristais e non cortarme.", "European Portuguese":"Posso comer vidro, não me faz mal.", "Brazilian Portuguese (8)":"Posso comer vidro, não me machuca.", "Caboverdiano/Kabuverdianu (Cape Verde)":"M' podê cumê vidru, ca ta maguâ-m'.", "Papiamentu":"Ami por kome glas anto e no ta hasimi daño.", "Italian":"Posso mangiare il vetro e non mi fa male.", "Milanese":"Sôn bôn de magnà el véder, el me fa minga mal.", "Roman":"Me posso magna' er vetro, e nun me fa male.", "Napoletano":"M' pozz magna' o'vetr, e nun m' fa mal.", "Venetian":"Mi posso magnare el vetro, no'l me fa mae.", "Zeneise (Genovese)":"Pòsso mangiâ o veddro e o no me fà mâ.", "Sicilian":"Puotsu mangiari u vitru, nun mi fa mali. ", "Campinadese (Sardinia)":"(NEEDED) ", "Lugudorese (Sardinia)":"(NEEDED)", "Romansch (Grischun)":"Jau sai mangiar vaider, senza che quai fa donn a mai. ", "Romany / Tsigane":"(NEEDED)", "Romanian":"Pot să mănânc sticlă și ea nu mă rănește.", "Esperanto":"Mi povas manĝi vitron, ĝi ne damaĝas min. ", "Pictish":"(NEEDED) ", "Breton":"(NEEDED)", "Cornish":"Mý a yl dybry gwéder hag éf ny wra ow ankenya.", "Welsh":"Dw i'n gallu bwyta gwydr, 'dyw e ddim yn gwneud dolur i mi.", "Manx Gaelic":"Foddym gee glonney agh cha jean eh gortaghey mee.", "Old Irish (Ogham)":"᚛᚛ᚉᚑᚅᚔᚉᚉᚔᚋ ᚔᚈᚔ ᚍᚂᚐᚅᚑ ᚅᚔᚋᚌᚓᚅᚐ᚜", "Old Irish (Latin)":"Con·iccim ithi nglano. Ním·géna.", "Irish":"Is féidir liom gloinne a ithe. Ní dhéanann sí dochar ar bith dom.", "Ulster Gaelic":"Ithim-sa gloine agus ní miste damh é.", "Scottish Gaelic":"S urrainn dhomh gloinne ithe; cha ghoirtich i mi.", "Anglo-Saxon (Runes)":"ᛁᚳ᛫ᛗᚨᚷ᛫ᚷᛚᚨᛋ᛫ᛖᚩᛏᚪᚾ᛫ᚩᚾᛞ᛫ᚻᛁᛏ᛫ᚾᛖ᛫ᚻᛖᚪᚱᛗᛁᚪᚧ᛫ᛗᛖ᛬", "Anglo-Saxon (Latin)":"Ic mæg glæs eotan ond hit ne hearmiað me.", "Middle English":"Ich canne glas eten and hit hirtiþ me nouȝt.", "English":"I can eat glass and it doesn't hurt me.", "English (IPA)":"[aɪ kæn iːt glɑːs ænd ɪt dɐz nɒt hɜːt miː] (Received Pronunciation)", "English (Braille)":"⠊⠀⠉⠁⠝⠀⠑⠁⠞⠀⠛⠇⠁⠎⠎⠀⠁⠝⠙⠀⠊⠞⠀⠙⠕⠑⠎⠝⠞⠀⠓⠥⠗⠞⠀⠍⠑", "Jamaican":"Mi kian niam glas han i neba hot mi.", "Lalland Scots / Doric":"Ah can eat gless, it disnae hurt us. ", "Glaswegian":"(NEEDED)", "Gothic (4)":"𐌼𐌰𐌲 𐌲𐌻𐌴𐍃 𐌹̈𐍄𐌰𐌽, 𐌽𐌹 𐌼𐌹𐍃 𐍅𐌿 𐌽𐌳𐌰𐌽 𐌱𐍂𐌹𐌲𐌲𐌹𐌸.", "Old Norse (Runes)":"ᛖᚴ ᚷᛖᛏ ᛖᛏᛁ ᚧ ᚷᛚᛖᚱ ᛘᚾ ᚦᛖᛋᛋ ᚨᚧ ᚡᛖ ᚱᚧᚨ ᛋᚨᚱ", "Old Norse (Latin)":"Ek get etið gler án þess að verða sár.", "Norsk / Norwegian (Nynorsk)":"Eg kan eta glas utan å skada meg.", "Norsk / Norwegian (Bokmål)":"Jeg kan spise glass uten å skade meg.", "Føroyskt / Faroese":"Eg kann eta glas, skaðaleysur.", "Íslenska / Icelandic":"Ég get etið gler án þess að meiða mig.", "Svenska / Swedish":"Jag kan äta glas utan att skada mig.", "Dansk / Danish":"Jeg kan spise glas, det gør ikke ondt på mig.", "Sønderjysk":"Æ ka æe glass uhen at det go mæ naue.", "Frysk / Frisian":"Ik kin glês ite, it docht me net sear.", "Nederlands / Dutch":"Ik kan glas eten, het doet mij geen kwaad.", "Kirchröadsj/Bôchesserplat":"Iech ken glaas èèse, mer 't deet miech jing pieng.", "Afrikaans":"Ek kan glas eet, maar dit doen my nie skade nie.", "Lëtzebuergescht / Luxemburgish":"Ech kan Glas iessen, daat deet mir nët wei.", "Deutsch / German":"Ich kann Glas essen, ohne mir zu schaden.", "Ruhrdeutsch":"Ich kann Glas verkasematuckeln, ohne dattet mich wat jucken tut.", "Langenfelder Platt":"Isch kann Jlaas kimmeln, uuhne datt mich datt weh dääd.", "Lausitzer Mundart (\"Lusatian\")":"Ich koann Gloos assn und doas dudd merr ni wii.", "Odenwälderisch":"Iech konn glaasch voschbachteln ohne dass es mir ebbs daun doun dud.", "Sächsisch / Saxon":"'sch kann Glos essn, ohne dass'sch mer wehtue.", "Pfälzisch":"Isch konn Glass fresse ohne dasses mer ebbes ausmache dud.", "Schwäbisch / Swabian":"I kå Glas frässa, ond des macht mr nix!", "Deutsch (Voralberg)":"I ka glas eassa, ohne dass mar weh tuat.", "Bayrisch / Bavarian":"I koh Glos esa, und es duard ma ned wei.", "Allemannisch":"I kaun Gloos essen, es tuat ma ned weh.", "Schwyzerdütsch (Zürich)":"Ich chan Glaas ässe, das schadt mir nöd.", "Schwyzerdütsch (Luzern)":"Ech cha Glâs ässe, das schadt mer ned. ", "Plautdietsch":"(NEEDED)", "Hungarian":"Meg tudom enni az üveget, nem lesz tőle bajom.", "Suomi / Finnish":"Voin syödä lasia, se ei vahingoita minua.", "Sami (Northern)":"Sáhtán borrat lása, dat ii leat bávččas.", "Erzian":"Мон ярсан суликадо, ды зыян эйстэнзэ а ули.", "Northern Karelian":"Mie voin syvvä lasie ta minla ei ole kipie.", "Southern Karelian":"Minä voin syvvä st'oklua dai minule ei ole kibie. ", "Vepsian":"(NEEDED) ", "Votian":"(NEEDED) ", "Livonian":"(NEEDED)", "Estonian":"Ma võin klaasi süüa, see ei tee mulle midagi.", "Latvian":"Es varu ēst stiklu, tas man nekaitē.", "Lithuanian":"Aš galiu valgyti stiklą ir jis manęs nežeidžia ", "Old Prussian":"(NEEDED) ", "Sorbian (Wendish)":"(NEEDED)", "Czech":"Mohu jíst sklo, neublíží mi.", "Slovak":"Môžem jesť sklo. Nezraní ma.", "Polska / Polish":"Mogę jeść szkło i mi nie szkodzi.", "Slovenian":"Lahko jem steklo, ne da bi mi škodovalo.", "Croatian":"Ja mogu jesti staklo i ne boli me.", "Serbian (Latin)":"Ja mogu da jedem staklo.", "Serbian (Cyrillic)":"Ја могу да једем стакло.", "Macedonian":"Можам да јадам стакло, а не ме штета.", "Russian":"Я могу есть стекло, оно мне не вредит.", "Belarusian (Cyrillic)":"Я магу есці шкло, яно мне не шкодзіць.", "Belarusian (Lacinka)":"Ja mahu jeści škło, jano mne ne škodzić.", "Ukrainian":"Я можу їсти скло, і воно мені не зашкодить.", "Bulgarian":"Мога да ям стъкло, то не ми вреди.", "Georgian":"მინას ვჭამ და არა მტკივა.", "Armenian":"Կրնամ ապակի ուտել և ինծի անհանգիստ չըներ։", "Albanian":"Unë mund të ha qelq dhe nuk më gjen gjë.", "Turkish":"Cam yiyebilirim, bana zararı dokunmaz.", "Turkish (Ottoman)":"جام ييه بلورم بڭا ضررى طوقونمز", "Bangla / Bengali":"আমি কাঁচ খেতে পারি, তাতে আমার কোনো ক্ষতি হয় না।", "Marathi":"मी काच खाऊ शकतो, मला ते दुखत नाही.", "Kannada":"ನನಗೆ ಹಾನಿ ಆಗದೆ, ನಾನು ಗಜನ್ನು ತಿನಬಹುದು", "Hindi":"मैं काँच खा सकता हूँ और मुझे उससे कोई चोट नहीं पहुंचती.", "Tamil":"நான் கண்ணாடி சாப்பிடுவேன், அதனால் எனக்கு ஒரு கேடும் வராது.", "Telugu":"నేను గాజు తినగలను మరియు అలా చేసినా నాకు ఏమి ఇబ్బంది లేదు", "Sinhalese":"මට වීදුරු කෑමට හැකියි. එයින් මට කිසි හානියක් සිදු නොවේ.", "Urdu(3)":"میں کانچ کھا سکتا ہوں اور مجھے تکلیف نہیں ہوتی ۔", "Pashto(3)":"زه شيشه خوړلې شم، هغه ما نه خوږوي", "Farsi / Persian(3)":".من می توانم بدونِ احساس درد شيشه بخورم", "Arabic(3)":"أنا قادر على أكل الزجاج و هذا لا يؤلمني. ", "Aramaic":"(NEEDED)", "Maltese":"Nista' niekol il-ħġieġ u ma jagħmilli xejn.", "Hebrew(3)":"אני יכול לאכול זכוכית וזה לא מזיק לי.", "Yiddish(3)":"איך קען עסן גלאָז און עס טוט מיר נישט װײ. ", "Judeo-Arabic":"(NEEDED) ", "Ladino":"(NEEDED) ", "Gǝʼǝz":"(NEEDED) ", "Amharic":"(NEEDED)", "Twi":"Metumi awe tumpan, ɜnyɜ me hwee.", "Hausa (Latin)":"Inā iya taunar gilāshi kuma in gamā lāfiyā.", "Hausa (Ajami) (2)":"إِنا إِىَ تَونَر غِلَاشِ كُمَ إِن غَمَا لَافِىَا", "Yoruba(4)":"Mo lè je̩ dígí, kò ní pa mí lára.", "Lingala":"Nakokí kolíya biténi bya milungi, ekosála ngáí mabé tɛ́.", "(Ki)Swahili":"Naweza kula bilauri na sikunyui.", "Malay":"Saya boleh makan kaca dan ia tidak mencederakan saya.", "Tagalog":"Kaya kong kumain nang bubog at hindi ako masaktan.", "Chamorro":"Siña yo' chumocho krestat, ti ha na'lalamen yo'.", "Fijian":"Au rawa ni kana iloilo, ia au sega ni vakacacani kina.", "Javanese":"Aku isa mangan beling tanpa lara.", "Burmese":"က္ယ္ဝန္‌တော္‌၊က္ယ္ဝန္‌မ မ္ယက္‌စားနုိင္‌သည္‌။ ၎က္ရောင္‌့ ထိခုိက္‌မ္ဟု မရ္ဟိပာ။ (9)", "Vietnamese (quốc ngữ)":"Tôi có thể ăn thủy tinh mà không hại gì.", "Vietnamese (nôm) (4)":"些 𣎏 世 咹 水 晶 𦓡 空 𣎏 害 咦", "Khmer":"ខ្ញុំអាចញុំកញ្ចក់បាន ដោយគ្មានបញ្ហារ", "Lao":"ຂອ້ຍກິນແກ້ວໄດ້ໂດຍທີ່ມັນບໍ່ໄດ້ເຮັດໃຫ້ຂອ້ຍເຈັບ.", "Thai":"ฉันกินกระจกได้ แต่มันไม่ทำให้ฉันเจ็บ", "Mongolian (Cyrillic)":"Би шил идэй чадна, надад хортой биш", "Mongolian (Classic) (5)":"ᠪᠢ ᠰᠢᠯᠢ ᠢᠳᠡᠶᠦ ᠴᠢᠳᠠᠨᠠ ᠂ ᠨᠠᠳᠤᠷ ᠬᠣᠤᠷᠠᠳᠠᠢ ᠪᠢᠰᠢ ", "Dzongkha":"(NEEDED)", "Nepali":"म काँच खान सक्छू र मलाई केहि नी हुन्‍न् ।", "Tibetan":"ཤེལ་སྒོ་ཟ་ནས་ང་ན་གི་མ་རེད།", "Chinese":"我能吞下玻璃而不伤身体。", "Chinese (Traditional)":"我能吞下玻璃而不傷身體。", "Taiwanese(6)":"Góa ē-tàng chia̍h po-lê, mā bē tio̍h-siong.", "Japanese":"私はガラスを食べられます。それは私を傷つけません。", "Korean":"나는 유리를 먹을 수 있어요. 그래도 아프지 않아요", "Bislama":"Mi save kakae glas, hemi no save katem mi.", "Hawaiian":"Hiki iaʻu ke ʻai i ke aniani; ʻaʻole nō lā au e ʻeha.", "Marquesan":"E koʻana e kai i te karahi, mea ʻā, ʻaʻe hauhau.", "Inuktitut (10)":"ᐊᓕᒍᖅ ᓂᕆᔭᕌᖓᒃᑯ ᓱᕋᙱᑦᑐᓐᓇᖅᑐᖓ", "Chinook Jargon":"Naika məkmək kakshət labutay, pi weyk ukuk munk-sik nay.", "Navajo":"Tsésǫʼ yishą́ągo bííníshghah dóó doo shił neezgai da. ", "Cherokee (and Cree, Chickasaw, Cree, Micmac, Ojibwa, Lakota, Náhuatl, Quechua, Aymara, and other American languages)":"(NEEDED) ", "Garifuna":"(NEEDED) ", "Gullah":"(NEEDED)", "Lojban":"mi kakne le nu citka le blaci .iku'i le se go'i na xrani mi", "Nórdicg":"Ljœr ye caudran créneþ ý jor cẃran." } EOF assert_valid_json ($glassjson); ok (valid_json ($glassjson)); no utf8; # Markus Kuhn validation file for my $c (0xc0..0xf4) { my $badinitial = sprintf ("%c ", $c); ok (! valid_json ($badinitial), "first byte $c, second byte space invalid"); } my @overlong = split /\n/, (qq/ # 4.1 Examples of an overlong ASCII character c0 af e0 80 af f0 80 80 af # 4.2 Maximum overlong sequences c1 bf e0 9f bf f0 8f bf bf # 4.3 Overlong representation of the NUL character c0 80 e0 80 80 f0 80 80 80 # 5.1 Single UTF-16 surrogates ed a0 80 ed ad bf ed ae 80 ed af bf ed b0 80 ed be 80 ed bf bf # 5.2 Paired UTF-16 surrogates ed a0 80 ed b0 80 ed a0 80 ed bf bf ed ad bf ed b0 80 ed ad bf ed bf bf ed ae 80 ed b0 80 ed ae 80 ed bf bf ed af bf ed b0 80 ed af bf ed bf bf /); for my $overlong (@overlong) { if ($overlong =~ /^#/) { next; } my @bytes = split / /, $overlong; my $bad = join ('', map {sprintf "%c", hex ($_)} @bytes); ok (! valid_json ($bad), "$overlong invalid"); } done_testing (); exit; JSON-Parse-0.61/t/syntax.t000644 001751 001751 00000000360 14003422404 014603 0ustar00benben000000 000000 # This is a test for a false syntax error produced by this module on # legitimate input. use FindBin '$Bin'; use lib "$Bin"; use JPT; eval { my $json = read_json ("$Bin/syntax-error-1.json"); }; note ($@); ok (! $@); done_testing (); JSON-Parse-0.61/lib/JSON/000755 001751 001751 00000000000 14011073360 014147 5ustar00benben000000 000000 JSON-Parse-0.61/lib/JSON/Parse.pod000644 001751 001751 00000214210 14011073353 015727 0ustar00benben000000 000000 =pod =encoding UTF-8 =head1 NAME JSON::Parse - Parse JSON =head1 SYNOPSIS use JSON::Parse 'parse_json'; my $json = '["golden", "fleece"]'; my $perl = parse_json ($json); # Same effect as $perl = ['golden', 'fleece']; Convert JSON into Perl. =head1 VERSION This documents version 0.61 of JSON::Parse corresponding to L released on Thu Feb 11 09:14:04 2021 +0900. =head1 DESCRIPTION A module for parsing JSON. (JSON means "JavaScript Object Notation" and it is specified in L.) JSON::Parse offers the function L, which takes a string containing JSON, and returns an equivalent Perl structure. It also offers validation of JSON via L, which returns true or false depending on whether the JSON is correct or not, and L, which produces a descriptive fatal error if the JSON is invalid. A function L reads JSON from a file, and there is a safer version of L called L which doesn't throw exceptions. For special cases of parsing, there are also methods L and L, which create a JSON parsing object and run it on text. See L. JSON::Parse accepts only UTF-8 as input. See L and L. =head1 FUNCTIONS =head2 assert_valid_json use JSON::Parse 'assert_valid_json'; eval { assert_valid_json ('["xyz":"b"]'); }; if ($@) { print "Your JSON was invalid: $@\n"; } # Prints "Unexpected character ':' parsing array" produces output Your JSON was invalid: JSON error at line 1, byte 7/11: Unexpected character ':' parsing array starting from byte 1: expecting whitespace: 'n', '\r', '\t', ' ' or comma: ',' or end of array: ']' at /usr/home/ben/projects/json-parse/examples/assert.pl line 6. (This example is included as L|https://fastapi.metacpan.org/source/BKB/JSON-Parse-0.61/examples/assert.pl> in the distribution.) This is the underlying function for L. It runs at the same speed, but it throws an error if the JSON is wrong, rather than returning 1 or 0. See L for the error format, which is identical to L. This cannot detect key collisions in the JSON since it does not store values. See L for more on this module's handling of non-unique names in the JSON. The method equivalent to this is L. The behaviour of disallowing empty inputs was changed in version 0.49. =head2 parse_json use JSON::Parse 'parse_json'; my $perl = parse_json ('{"x":1, "y":2}'); This function converts JSON into a Perl structure, either an array reference, a hash reference, or a scalar. If the first argument does not contain a complete valid JSON text, is the undefined value, an empty string, or a string containing only whitespace C throws a fatal error ("dies"). If the argument contains valid JSON, the return value is either a hash reference, an array reference, or a scalar. If the input JSON text is a serialized object, a hash reference is returned: use JSON::Parse ':all'; my $perl = parse_json ('{"a":1, "b":2}'); print ref $perl, "\n"; produces output HASH (This example is included as L|https://fastapi.metacpan.org/source/BKB/JSON-Parse-0.61/examples/hash.pl> in the distribution.) If the input JSON text is a serialized array, an array reference is returned: use JSON::Parse ':all'; my $perl = parse_json ('["a", "b", "c"]'); print ref $perl, "\n"; produces output ARRAY (This example is included as L|https://fastapi.metacpan.org/source/BKB/JSON-Parse-0.61/examples/array.pl> in the distribution.) Otherwise a Perl scalar is returned. The behaviour of allowing a scalar was added in version 0.32 of this module. This brings it into line with the new specification for JSON. The behaviour of disallowing empty inputs was changed in version 0.49. The function L offers a version of this function with various safety features enabled. The method L is equivalent to this. =head2 parse_json_safe This is almost the same thing as L, but has the following differences: =over =item Does not throw exceptions If the JSON is invalid, a warning is printed and the undefined value is returned, as if calling L like this: eval { $out = parse_json ($json); }; if ($@) { carp $@; $out = undef; } =item Detects key collisions This switches on L, so that if the JSON contains non-unique names, a warning is printed and the undefined value is returned. See L for an explanation of what a key collision is. =item Booleans are not read-only This switches on L so that JSON true, false and null values are copied. These values can be modified, but they will not be converted back into C and C by L. =item Errors are reported by carp Parsing errors are reported by L, so the error line number refers to the caller's line. =back As the name implies, this is meant to be a "safety-first" version of L. 🎲 This function was added in version 0.38. =head2 read_json use JSON::Parse 'read_json'; my $p = read_json ('filename'); This is exactly the same as L except that it reads the JSON from the specified file rather than a scalar. The file must be in the UTF-8 encoding, and is opened as a character file using C<:encoding(utf8)> (see L and L). The output is marked as character strings. The method equivalent is L. This is a convenience function written in Perl. You may prefer to read the file yourself using another module if you need faster performance. This was renamed from L in version 0.59. The old name will also continue to work indefinitely. =head2 valid_json use JSON::Parse 'valid_json'; if (valid_json ($json)) { # do something } C returns I<1> if its argument is valid JSON and I<0> if not. It runs several times faster than L. This gain in speed is obtained because it discards the input data after reading it, rather than storing it into Perl variables. This does not supply the actual errors which caused invalidity. Use L to get error messages when the JSON is invalid. This cannot detect duplicate keys in JSON objects because it does not store values. See L for more on this module's handling of non-unique names in the JSON. =head1 METHODS If you need to parse JSON and you are not satisfied with the parsing options offered by L and L, you can create a JSON parsing object with L and set various options on the object, then use it with L or L. There are options to copy JSON literals (C, C, C) with L, switch off fatal errors with L, detect duplicate keys in objects with L, set the maximum depth of nested objects and arrays with L, produce machine-readable parsing errors with L, and set the JSON literals to user defined values with the methods described under L. These methods only affect the object created with L; they do not globally affect the behaviour of L or L. =head2 check eval { $jp->check ($json); }; This does the same thing as L, except its behaviour can be modified using the L method. 🎲 This method was added in version 0.48. This is for the benefit of L. =head2 copy_literals $jp->copy_literals (1); With a true value, copy JSON literal values (C, C, and C) into new Perl scalar values, and don't put read-only values into the output. With a false value, use read-only scalars: $jp->copy_literals (0); The C behaviour is the behaviour of L. The C behaviour is the behaviour of L. If the user also sets user-defined literals with L, L and L, that takes precedence over this. 🎲 This method was added in version 0.38. =head2 detect_collisions $jp->detect_collisions (1); This switches on a check for hash key collisions (non-unique names in JSON objects). If a collision is found, an error message L is printed, which also gives the non-unique name and the byte position where the start of the colliding string was found: use JSON::Parse; my $jp = JSON::Parse->new (); $jp->detect_collisions (1); eval { $jp->parse ('{"animals":{"cat":"moggy","cat":"feline","cat":"neko"}}'); }; print "$@\n" if $@; produces output JSON error at line 1, byte 28/55: Name is not unique: "cat" parsing object starting from byte 12 at /usr/home/ben/projects/json-parse/examples/../blib/lib/JSON/Parse.pm line 131. (This example is included as L|https://fastapi.metacpan.org/source/BKB/JSON-Parse-0.61/examples/collide.pl> in the distribution.) The C behaviour is the behaviour of L. The C behaviour is the behaviour of L. 🎲 This method was added in version 0.38. =head2 diagnostics_hash $jp->diagnostics_hash (1); This changes diagnostics produced by errors from a simple string into a hash reference containing various fields. This is incompatible with L. This replaces the previous experimental global variable C<$json_diagnostics>, which was removed from the module. The hash keys and values are identical to those provided in the object returned by C<$json_diagnostics>, with the addition of a key C which returns the usual error. This requires Perl version 5.14 or later. An example of the use of this method to "repair" broken JSON is in the module L. 🎲 This method was added in version 0.46. =head2 get_max_depth my $max_depth = $jp->get_max_depth (); This returns the maximum nesting depth of objects or arrays in the input JSON. The default value is 10,000. 🎲 This method was added in version 0.58. =head2 new my $jp = JSON::Parse->new (); Create a new JSON::Parse object. 🎲 This method was added in version 0.38. =head2 parse my $out = $jp->parse ($json); This does the same thing as L, except its behaviour can be modified using object methods. 🎲 This method was added in version 0.38. This was renamed from C in version 0.60. =head2 read my $json = $jp->read ($file); Read a file, parse the contained JSON, and return the output. This method is equivalent to the function L. 🎲 This method was added in version 0.60. =head2 set_max_depth $jp->set_max_depth (42); Set the maximum nesting depth of objects or arrays in the input JSON. The default value is 10,000. 🎲 This method was added in version 0.58. =head2 upgrade_utf8 $jp->upgrade_utf8 (1); Upgrade input from bytes to characters automatically. This can be switched off again using any false value: $jp->upgrade_utf8 (0); 🎲 This method was added in version 0.61. =head2 warn_only $jp->warn_only (1); Warn, don't die, on error. Failed parsing returns the undefined value, C, and prints a warning. This can be switched off again using any false value: $jp->warn_only (''); 🎲 This method was added in version 0.41. =head2 Methods for manipulating literals These methods alter what is written into the Perl structure when the parser sees a literal value, C, C or C in the input JSON. This number of methods is needed because of the possibility that a user wants to set the output for C to be C: $jp->set_false (undef); Thus, we cannot use a single function C<< $jp->false (undef) >> to cover both setting and deleting of values. 🎲 This facility was added in version 0.38. =head3 set_true $jp->set_true ("Yes, that is so true"); Supply a scalar to be used in place of the JSON C literal. This example puts the string "Yes, that is so true" into the hash or array when we hit a "true" literal, rather than the default read-only scalar: use JSON::Parse; my $json = '{"yes":true,"no":false}'; my $jp = JSON::Parse->new (); $jp->set_true ('Yes, that is so true'); my $out = $jp->parse ($json); print $out->{yes}, "\n"; prints Yes, that is so true To override the previous value, call it again with a new value. To delete the value and revert to the default behaviour, use L. If you give this a value which is not "true", as in Perl will evaluate it as a false in an if statement, it prints a warning L. You can switch this warning off with L. 🎲 This method was added in version 0.38. =head3 delete_true $jp->delete_true (); Delete the user-defined true value. See L. This method is "safe" in that it has absolutely no effect if no user-defined value is in place. It does not return a value. 🎲 This method was added in version 0.38. =head3 set_false $jp->set_false (JSON::PP::Boolean::false); Supply a scalar to be used in place of the JSON C literal. In the above example, when we hit a "false" literal, we put C in the output, similar to L and other CPAN modules like L or L. To override the previous value, call it again with a new value. To delete the value and revert to the default behaviour, use L. If you give this a value which is not "false", as in Perl will evaluate it as a false in an if statement, it prints a warning L. You can switch this warning off with L. 🎲 This method was added in version 0.38. =head3 delete_false $jp->delete_false (); Delete the user-defined false value. See L. This method is "safe" in that it has absolutely no effect if no user-defined value is in place. It does not return a value. 🎲 This method was added in version 0.38. =head3 set_null $jp->set_null (0); Supply a scalar to be used in place of the JSON C literal. To override the previous value, call it again with a new value. To delete the value and revert to the default behaviour, use L. 🎲 This method was added in version 0.38. =head3 delete_null $jp->delete_null (); Delete the user-defined null value. See L. This method is "safe" in that it has absolutely no effect if no user-defined value is in place. It does not return a value. 🎲 This method was added in version 0.38. =head3 no_warn_literals $jp->no_warn_literals (1); Use a true value to switch off warnings about setting boolean values to contradictory things. For example if you want to set the JSON C literal to turn into the string "false", $jp->no_warn_literals (1); $jp->set_false ("false"); See also L. This also switches off the warning L. 🎲 This method was added in version 0.38. =head1 OLD INTERFACE The following alternative function names are accepted. These are the names used for the functions in old versions of this module. These names are not deprecated and will never be removed from the module. The names ending in "_to_perl" seem quite silly in retrospect since surely it is obvious that one is programming in Perl. =head2 json_to_perl This is exactly the same function as L. =head2 json_file_to_perl This is exactly the same function as L. The function was renamed in version 0.59, after the same function in L. =head2 run This is the old name for L. =head2 validate_json This is exactly the same function as L. =head1 Mapping from JSON to Perl JSON elements are mapped to Perl as follows: =head2 JSON numbers JSON numbers become Perl numbers, either integers or double-precision floating point numbers, or possibly strings containing the number if parsing of a number by the usual methods fails somehow. JSON does not allow leading zeros, like I<0123>, or leading plus signs, like I<+100>, in numbers, so these cause an L error. JSON also does not allow numbers of the form I<1.>, but it does allow things like I<0e0> or I<1E999999>. As far as possible these are accepted by JSON::Parse. =head2 JSON strings JSON strings become Perl strings. The JSON escape characters such as C<\t> for the tab character (see section 2.5 of L) are mapped to the equivalent ASCII character. =head3 Handling of Unicode Inputs must be in the UTF-8 format. See L. In addition, JSON::Parse rejects UTF-8 which encodes non-characters such as C and ill-formed characters such as incomplete halves of surrogate pairs. Unicode encoding points in the input of the form C<\u3000> are converted into the equivalent UTF-8 bytes. Surrogate pairs in the form C<\uD834\uDD1E> are also handled. If the second half of the surrogate pair is missing, an L or L error is thrown. If the second half of the surrogate pair is present but contains an impossible value, a L error is thrown. If the input to L is marked as Unicode characters, the output strings will be marked as Unicode characters. If the input is not marked as Unicode characters, the output strings will not be marked as Unicode characters. Thus, use JSON::Parse ':all'; # The scalar $sasori looks like Unicode to Perl use utf8; my $sasori = '["蠍"]'; my $p = parse_json ($sasori); print utf8::is_utf8 ($p->[0]); # Prints 1. but use JSON::Parse ':all'; # The scalar $ebi does not look like Unicode to Perl no utf8; my $ebi = '["海老"]'; my $p = parse_json ($ebi); print utf8::is_utf8 ($p->[0]); # Prints nothing. Escapes of the form \uXXXX (see page three of L) are mapped to ASCII if XXXX is less than 0x80, or to UTF-8 if XXXX is greater than or equal to 0x80. Strings containing \uXXXX escapes greater than 0x80 are also upgraded to character strings, regardless of whether the input is a character string or a byte string, thus regardless of whether Perl thinks the input string is Unicode, escapes like \u87f9 are converted into the equivalent UTF-8 bytes and the particular string in which they occur is marked as a character string: use JSON::Parse ':all'; no utf8; # 蟹 my $kani = '["\u87f9"]'; my $p = parse_json ($kani); print "It's marked as a character string" if utf8::is_utf8 ($p->[0]); # Prints "It's marked as a character string" because it's upgraded # regardless of the input string's flags. This is modelled on the behaviour of Perl's C: no utf8; my $kani = '87f9'; print "hex is character string\n" if utf8::is_utf8 ($kani); # prints nothing $kani = chr (hex ($kani)); print "chr makes it a character string\n" if utf8::is_utf8 ($kani); # prints "chr makes it a character string" However, JSON::Parse also upgrades the remaining part of the string into a character string, even when it's not marked as a character string. For example, use JSON::Parse ':all'; use Unicode::UTF8 'decode_utf8'; no utf8; my $highbytes = "か"; my $not_utf8 = "$highbytes\\u3042"; my $test = "{\"a\":\"$not_utf8\"}"; my $out = parse_json ($test); # JSON::Parse does something unusual here in promoting the first part # of the string into UTF-8. print "JSON::Parse gives this: ", $out->{a}, "\n"; # Perl cannot assume that $highbytes is in UTF-8, so it has to just # turn the initial characters into garbage. my $add_chr = $highbytes . chr (0x3042); print "Perl's output is like this: ", $add_chr, "\n"; # In fact JSON::Parse's behaviour is equivalent to this: my $equiv = decode_utf8 ($highbytes) . chr (0x3042); print "JSON::Parse did something like this: ", $equiv, "\n"; # With character strings switched on, Perl and JSON::Parse do the same # thing. use utf8; my $is_utf8 = "か"; my $test2 = "{\"a\":\"$is_utf8\\u3042\"}"; my $out2 = parse_json ($test2); print "JSON::Parse: ", $out2->{a}, "\n"; my $add_chr2 = $is_utf8 . chr (0x3042); print "Native Perl: ", $add_chr2, "\n"; produces output JSON::Parse gives this: かあ Perl's output is like this: かあ JSON::Parse did something like this: かあ JSON::Parse: かあ Native Perl: かあ (This example is included as L|https://fastapi.metacpan.org/source/BKB/JSON-Parse-0.61/examples/unicode-details.pl> in the distribution.) Although in general the above would be an unsafe practice, JSON::Parse can do things this way because JSON is a text-only, Unicode-only format. To ensure that invalid inputs are never upgraded, JSON::Parse checks each input byte to make sure that it forms UTF-8. See also L. Doing things this way, rather than the way that Perl does it, was one of the original motivations for writing this module. =head2 JSON arrays JSON arrays become Perl array references. The elements of the Perl array are in the same order as they appear in the JSON. Thus my $p = parse_json ('["monday", "tuesday", "wednesday"]'); has the same result as a Perl declaration of the form my $p = [ 'monday', 'tuesday', 'wednesday' ]; =head2 JSON objects JSON objects become Perl hashes. The members of the JSON object become key and value pairs in the Perl hash. The string part of each object member becomes the key of the Perl hash. The value part of each member is mapped to the value of the Perl hash. Thus my $j = < ['blue', 'black'], tuesday => ['grey', 'heart attack'], friday => 'Gotta get down on Friday', }; =head3 Key collisions A key collision is something like the following. use JSON::Parse qw/parse_json parse_json_safe/; my $j = '{"a":1, "a":2}'; my $p = parse_json ($j); print "Ambiguous key 'a' is ", $p->{a}, "\n"; my $q = parse_json_safe ($j); produces output JSON::Parse::parse_json_safe: Name is not unique: "a" parsing object starting from byte 1 at /usr/home/ben/projects/json-parse/examples/key-collision.pl line 8. Ambiguous key 'a' is 2 (This example is included as L|https://fastapi.metacpan.org/source/BKB/JSON-Parse-0.61/examples/key-collision.pl> in the distribution.) Here the key "a" could be either 1 or 2. As seen in the example, L overwrites the first value with the second value. L halts and prints a warning. If you use L you can switch key collision on and off with the L method. The rationale for L not to give warnings is that Perl doesn't give information about collisions when storing into hash values, and checking for collisions for every key will degrade performance for the sake of an unlikely occurrence. The JSON specification says "The names within an object SHOULD be unique." (see L, page 5), although it's not a requirement. For performance, L and L do not store hash keys, thus they cannot detect this variety of problem. =head2 Literals =head3 false L maps the JSON false literal to a readonly scalar which evaluates to the empty string, or to zero in a numeric context. (This behaviour changed from version 0.36 to 0.37. In versions up to 0.36, the false literal was mapped to a readonly scalar which evaluated to 0 only.) L maps the JSON literal to a similar scalar without the readonly constraints. If you use a parser created with L, you can choose either of these behaviours with L, or you can tell JSON::Parse to put your own value in place of falses using the L method. =head3 null L maps the JSON null literal to a readonly scalar C<$JSON::Parse::null> which evaluates to C. L maps the JSON literal to the undefined value. If you use a parser created with L, you can choose either of these behaviours with L, or you can tell JSON::Parse to put your own value in place of nulls using the L method. =head3 true L maps the JSON true literal to a readonly scalar which evaluates to C<1>. L maps the JSON literal to the value 1. If you use a parser created with L, you can choose either of these behaviours with L, or you can tell JSON::Parse to put your own value in place of trues using the L method. =head3 Round trips and compatibility The Perl versions of literals produced by L will be converted back to JSON literals if you use L. However, JSON::Parse's literals are incompatible with the other CPAN JSON modules. For compatibility with other CPAN modules, create a JSON::Parse object with L, and set JSON::Parse's literals with L, L, and L. =head3 A round trip with JSON::Tiny This example demonstrates round-trip compatibility using L, version 0.58: use utf8; use JSON::Tiny '0.58', qw(decode_json encode_json); use JSON::Parse; use JSON::Create; my $cream = '{"clapton":true,"hendrix":false}'; my $jp = JSON::Parse->new (); my $jc = JSON::Create->new (sort => 1); print "First do a round-trip of our modules:\n\n"; print $jc->create ($jp->parse ($cream)), "\n\n"; print "Now do a round-trip of JSON::Tiny:\n\n"; print encode_json (decode_json ($cream)), "\n\n"; print "🥴 First, incompatible mode:\n\n"; print 'tiny(parse): ', encode_json ($jp->parse ($cream)), "\n"; print 'create(tiny): ', $jc->create (decode_json ($cream)), "\n\n"; # Set our parser to produce these things as literals: $jp->set_true (JSON::Tiny::true); $jp->set_false (JSON::Tiny::false); print "🔄 Compatibility with JSON::Parse:\n\n"; print 'tiny(parse):', encode_json ($jp->parse ($cream)), "\n\n"; $jc->bool ('JSON::Tiny::_Bool'); print "🔄 Compatibility with JSON::Create:\n\n"; print 'create(tiny):', $jc->create (decode_json ($cream)), "\n\n"; print "🔄 JSON::Parse and JSON::Create are still compatible too:\n\n"; print $jc->create ($jp->parse ($cream)), "\n"; produces output First do a round-trip of our modules: {"clapton":true,"hendrix":false} Now do a round-trip of JSON::Tiny: {"clapton":true,"hendrix":false} 🥴 First, incompatible mode: tiny(parse): {"clapton":1,"hendrix":""} create(tiny): {"clapton":1,"hendrix":0} 🔄 Compatibility with JSON::Parse: tiny(parse):{"clapton":true,"hendrix":false} 🔄 Compatibility with JSON::Create: create(tiny):{"clapton":true,"hendrix":false} 🔄 JSON::Parse and JSON::Create are still compatible too: {"clapton":true,"hendrix":false} (This example is included as L|https://fastapi.metacpan.org/source/BKB/JSON-Parse-0.61/examples/json-tiny-round-trip-demo.pl> in the distribution.) Most of the other CPAN modules use similar methods to L, so the above example can easily be adapted. See also L for various examples. =head3 Modifying the values L maps all the literals to read-only values. Because of this, attempting to modifying the boolean values in the hash reference returned by L will cause "Modification of a read-only value attempted" errors: my $in = '{"hocus":true,"pocus":false,"focus":null}'; my $p = json_parse ($in); $p->{hocus} = 99; # "Modification of a read-only value attempted" error occurs Since the hash values are read-only scalars, C<< $p->{hocus} = 99 >> is like this: undef = 99; If you need to modify the returned hash reference, then delete the value first: my $in = '{"hocus":true,"pocus":false,"focus":null}'; my $p = json_parse ($in); delete $p->{pocus}; $p->{pocus} = 99; # OK Similarly with array references, delete the value before altering: my $in = '[true,false,null]'; my $q = json_parse ($in); delete $q->[1]; $q->[1] = 'magic'; Note that the return values from parsing bare literals are not read-only scalars, so my $true = JSON::Parse::json_parse ('true'); $true = 99; produces no error. This is because Perl copies the scalar. =head1 RESTRICTIONS This module imposes the following restrictions on its input. =over =item JSON only JSON::Parse is a strict parser. It only accepts input which exactly meets the criteria of L. That means, for example, JSON::Parse does not accept single quotes (') instead of double quotes ("), or numbers with leading zeros, like 0123. JSON::Parse does not accept control characters (0x00 - 0x1F) in strings, missing commas between array or hash elements like C<["a" "b"]>, or trailing commas like C<["a","b","c",]>. It also does not accept trailing non-whitespace, like the second "]" in C<["a"]]>. You may find L by the same authors as JSON::Parse useful if you need to process JSON-like text with tolerance for errors. =item No incremental parsing JSON::Parse does not parse incrementally. It only parses fully-formed JSON strings which include all opening and closing brackets. This is an inherent part of the design of the module. Incremental parsing in the style of L would require some kind of callback structure to deal with the elements of the partially digested structures, but JSON::Parse was never designed to do this; it merely converts what it sees into a Perl structure. Claims to offer incremental JSON parsing in other modules' documentation should be diligently verified. =item UTF-8 only JSON::Parse only parses the UTF-8 format. If input is in a different Unicode encoding than UTF-8, convert the input before handing it to this module. For example, for the UTF-16 format, use Encode 'decode'; my $input_utf8 = decode ('UTF-16', $input); my $perl = parse_json ($input_utf8); or, for a file, use C<:encoding> (see L and L): open my $input, "<:encoding(UTF-16)", 'some-json-file'; JSON::Parse does not try to determine the nature of the octet stream using BOM markers. A BOM marker in the input consists of bytes C<0xFE> and C<0xFF>, both of which are invalid as UTF-8, and thus will cause a fatal error. This restriction to UTF-8 applies regardless of whether Perl thinks that the input string is a character string or a byte string. Non-UTF-8 input will cause an L error. The latest specification for JSON, L, specifies it to be a UTF-8 only format. JSON::Parse does not accept Unicode non-characters (U+FFFF, UFDDO, etc.), UTF-8 representing surrogate pair code points, or bytes outside the range of Unicode code points as UTF-8 bytes. =back =head1 DIAGNOSTICS L does not produce error messages. L and L die on encountering invalid input. L uses L to pass error messages as warnings. Error messages have the line number, and the byte number where appropriate, of the input which caused the problem. The line number is formed simply by counting the number of "\n" (linefeed, ASCII 0x0A) characters in the whitespace part of the JSON. In L and L, parsing errors are fatal, so to continue after an error occurs, put the parsing into an C block: my $p; eval { $p = parse_json ($j); }; if ($@) { # handle error } The following error messages are produced: =over =item Unexpected character An unexpected character (byte) was encountered in the input. For example, when looking at the beginning of a string supposedly containing JSON, if the module encounters a plus sign, it will give an error like this: assert_valid_json ('+'); gives output JSON error at line 1, byte 1/1: Unexpected character '+' parsing initial state: expecting whitespace: 'n', '\r', '\t', ' ' or start of string: '"' or digit: '0-9' or minus: '-' or start of an array or object: '{', '[' or start of literal: 't', 'f', 'n' The message always includes a list of what characters are allowed. If there is some recognizable structure being parsed, the error message will include its starting point in the form "starting from byte n": assert_valid_json ('{"this":"\a"}'); gives output JSON error at line 1, byte 11/13: Unexpected character 'a' parsing string starting from byte 9: expecting escape: '', '/', '"', 'b', 'f', 'n', 'r', 't', 'u' A feature of JSON is that parsing it requires only one byte to be examined at a time. Thus almost all parsing problems can be handled using the "Unexpected character" error type, including spelling errors in literals: assert_valid_json ('[true,folse]'); gives output JSON error at line 1, byte 8/12: Unexpected character 'o' parsing literal starting from byte 7: expecting 'a' and the missing second half of a surrogate pair: assert_valid_json ('["\udc00? <-- should be a second half here"]'); gives output JSON error at line 1, byte 9/44: Unexpected character '?' parsing unicode escape starting from byte 3: expecting '\' All kinds of errors can occur parsing numbers, for example a missing fraction, assert_valid_json ('[1.e9]'); gives output JSON error at line 1, byte 4/6: Unexpected character 'e' parsing number starting from byte 2: expecting digit: '0-9' and a leading zero, assert_valid_json ('[0123]'); gives output JSON error at line 1, byte 3/6: Unexpected character '1' parsing number starting from byte 2: expecting whitespace: 'n', '\r', '\t', ' ' or comma: ',' or end of array: ']' or dot: '.' or exponential sign: 'e', 'E' The error message is this complicated because all of the following are valid here: whitespace: C<[0 ]>; comma: C<[0,1]>, end of array: C<[0]>, dot: C<[0.1]>, or exponential: C<[0e0]>. These are all handled by this error. Thus the error messages are a little confusing as diagnostics. Versions of this module prior to 0.29 gave more informative messages like "leading zero in number". (The messages weren't documented.) The reason to change over to the single message was because it makes the parsing code simpler, and because the testing code described in L makes use of the internals of this error to check that the error message produced actually do correspond to the invalid and valid bytes allowed by the parser, at the exact byte given. This is a bytewise error, thus for example if a miscoded UTF-8 appears in the input, an error message saying what bytes would be valid at that point will be printed. no utf8; use JSON::Parse 'assert_valid_json'; # Error in first byte: my $bad_utf8_1 = chr (hex ("81")); eval { assert_valid_json ("[\"$bad_utf8_1\"]"); }; print "$@\n"; # Error in third byte: my $bad_utf8_2 = chr (hex ('e2')) . chr (hex ('9C')) . 'b'; eval { assert_valid_json ("[\"$bad_utf8_2\"]"); }; print "$@\n"; prints JSON error at line 1, byte 3/5: Unexpected character 0x81 parsing string starting from byte 2: expecting printable ASCII or first byte of UTF-8: '\x20-\x7f', '\xC2-\xF4' at examples/bad-utf8.pl line 10. JSON error at line 1, byte 5/7: Unexpected character 'b' parsing string starting from byte 2: expecting bytes in range 80-bf: '\x80-\xbf' at examples/bad-utf8.pl line 16. =item Unexpected end of input The end of the string was encountered before the end of whatever was being parsed was. For example, if a quote is missing from the end of the string, it will give an error like this: assert_valid_json ('{"first":"Suzuki","second":"Murakami","third":"Asada}'); gives output JSON error at line 1: Unexpected end of input parsing string starting from byte 47 =item Not surrogate pair While parsing a string, a surrogate pair was encountered. While trying to turn this into UTF-8, the second half of the surrogate pair turned out to be an invalid value. assert_valid_json ('["\uDC00\uABCD"]'); gives output JSON error at line 1: Not surrogate pair parsing unicode escape starting from byte 11 =item Empty input This error occurs for an input which is an empty (no length or whitespace only) or an undefined value. assert_valid_json (''); gives output JSON error: Empty input parsing initial state Prior to version 0.49, this error was produced by L only, but it is now also produced by L. =item Name is not unique This error occurs when parsing JSON when the user has chosen L. For example an input like my $p = JSON::Parse->new (); $p->detect_collisions (1); $p->run ('{"hocus":1,"pocus":2,"hocus":3}'); gives output JSON error at line 1, byte 23/31: Name is not unique: "hocus" parsing object starting from byte 1 at blib/lib/JSON/Parse.pm line 131. where the JSON object has two keys with the same name, C. The terminology "name is not unique" is from the JSON specification. =item Contradictory values for "true" and "false" =over =item User-defined value for JSON false evaluates as true This happens if you set JSON false to map to a true value: $jp->set_false (1); To switch off this warning, use L. 🎲 This warning was added in version 0.38. =item User-defined value for JSON true evaluates as false This happens if you set JSON true to map to a false value: $jp->set_true (undef); To switch off this warning, use L. 🎲 This warning was added in version 0.38. =item User-defined value overrules copy_literals This warning is given if you set up literals with L then you also set up your own true, false, or null values with L, L, or L. 🎲 This warning was added in version 0.38. =back =back =head1 PERFORMANCE On the author's computer, the module's speed of parsing is approximately the same as L, with small variations depending on the type of input. For validation, L is faster than any other module known to the author, and up to ten times faster than JSON::XS. Some special types of input, such as floating point numbers containing an exponential part, like "1e09", seem to be about two or three times faster to parse with this module than with L. In JSON::Parse, parsing of exponentials is done by the system's C function, but JSON::XS contains its own parser for exponentials, so these results may be system-dependent. At the moment the main place JSON::XS wins over JSON::Parse is in strings containing escape characters, where JSON::XS is about 10% faster on the module author's computer and compiler. As of version 0.33, despite some progress in improving JSON::Parse, I haven't been able to fully work out the reason behind the better speed. There is some benchmarking code in the github repository under the directory "benchmarks" for those wishing to test these claims. The script L|https://github.com/benkasminbullock/JSON-Parse/033269fa8972fdce8626aa65cd11a5394ab50492/benchmarks/bench> is an adaptation of the similar script in the L distribution. The script L|https://github.com/benkasminbullock/JSON-Parse/033269fa8972fdce8626aa65cd11a5394ab50492/benchmarks/pub-bench.pl> runs the benchmarks and prints them out as POD. The following benchmark tests used version 0.58_01 of JSON::Parse, version 4.03 of L, and version 4.25 of L on Perl version v5.32.0 compiled with Clang version FreeBSD clang version 10.0.1 on FreeBSD 12.2. The files in the "benchmarks" directory of JSON::Parse. F and F are the benchmarks used by L. =over =item short.json Repetitions: 10 x 100 = 1000 --------------+------------+------------+ module | 1/min | min | --------------|------------|------------| Cpanel | 313007.761 | 0.0000319 | JP::valid | 838860.800 | 0.0000119 | JSON::Parse | 310689.185 | 0.0000322 | JSON::XS | 303935.072 | 0.0000329 | --------------+------------+------------+ =item long.json Repetitions: 10 x 100 = 1000 --------------+------------+------------+ module | 1/min | min | --------------|------------|------------| Cpanel | 5611.860 | 0.0017819 | JP::valid | 13586.991 | 0.0007360 | JSON::Parse | 4924.048 | 0.0020308 | JSON::XS | 6406.452 | 0.0015609 | --------------+------------+------------+ =item words-array.json Repetitions: 10 x 100 = 1000 --------------+------------+------------+ module | 1/min | min | --------------|------------|------------| Cpanel | 34749.826 | 0.0002878 | JP::valid | 270600.258 | 0.0000370 | JSON::Parse | 34017.064 | 0.0002940 | JSON::XS | 35726.610 | 0.0002799 | --------------+------------+------------+ =item exp.json Repetitions: 10 x 100 = 1000 --------------+------------+------------+ module | 1/min | min | --------------|------------|------------| Cpanel | 46759.242 | 0.0002139 | JP::valid | 117817.528 | 0.0000849 | JSON::Parse | 46759.242 | 0.0002139 | JSON::XS | 19195.899 | 0.0005209 | --------------+------------+------------+ =item literals.json Repetitions: 10 x 100 = 1000 --------------+------------+------------+ module | 1/min | min | --------------|------------|------------| Cpanel | 33026.016 | 0.0003028 | JP::valid | 384798.532 | 0.0000260 | JSON::Parse | 40840.351 | 0.0002449 | JSON::XS | 33689.189 | 0.0002968 | --------------+------------+------------+ =item cpantesters.json Repetitions: 10 x 100 = 1000 --------------+------------+------------+ module | 1/min | min | --------------|------------|------------| Cpanel | 212.377 | 0.0470860 | JP::valid | 1309.043 | 0.0076392 | JSON::Parse | 207.491 | 0.0481949 | JSON::XS | 226.439 | 0.0441620 | --------------+------------+------------+ =back =head1 SEE ALSO =over =item RFC 8259 JSON is specified in L. =item json.org L is the website for JSON, authored by Douglas Crockford. =back =head2 Other CPAN modules for parsing and producing JSON The ⭐ represents the number of votes this module has received on metacpan, on a logarithmic scale. Modules which we recommend are marked with 👍. Deprecated modules and modules which are definitely buggy (bug reports/pull requests ignored) and abandoned (no releases for several years) are marked with 👎 and/or 🐛. Modules we can't work out are marked with 😕. =over =item Modules by the same author =over =item JSON::Create 👍 L is a companion module to JSON::Parse by the same author. =item JSON::Repair L is an example module which demonstrates using JSON::Parse to apply some kinds of heuristics to repair "relaxed JSON" or otherwise broken JSON into compliant JSON. =item JSON::Server L is a module which offers a JSON-only, UTF-8 only server using C and L. =item JSON::Tokenize L is part of the JSON::Parse distribution, a tokenizer which reduces a JSON string to tokens. This makes the JSON::Parse tokenizer available to people who want to write their own JSON parsers. =item JSON::Whitespace L is for manipulating the "insignificant whitespace" part of JSON. =back =item Reading and writing JSON =over =item L [⭐⭐ Author: L; Date: C<2020-10-28>; Version: C<4.25>] This is a fork of L. Please see the module for details about the reasons for the fork. =item L [⭐ Author: L; Date: C<2020-11-18>; Version: C<1.00>] Slurp a JSON file into a data structure, and the reverse. It relies on L. =item L [⭐ Author: L; Date: C<2015-04-19>; Version: C<0.002>] Uses the JSON library from Glib, a library of C functions for the Linux GNOME desktop project, so it is independent of the other CPAN modules. Judging from the fairly sparse documentation, it seems to be a module where you build the JSON on the fly rather than converting a Perl structure wholesale into JSON. =item L [⭐⭐ Author: L; Date: C<2021-01-24>; Version: C<4.03>] This calls on either L or L. =item L [Author: L; Date: C<2010-09-29>; Version: C<0.47>] 👎🐛 This module "Does What I Want", where "I" refers to the module's author. Development seems to have ceased in 2010, there is a long list of unfixed bugs, and some of the module's features seem to predate Unicode support in Perl. It is written in XS, and it claims to accept a wide variety of non-JSON formats such as comments, single-quoted strings, trailing commas, etc. =item L [⭐⭐ Author: L; Date: C<2021-01-23>; Version: C<4.06>] This is part of the Perl core, installed when you install Perl. "PP" stands for "Pure Perl", which means it is in Perl-only without the XS (C-based) parsing. This is slower but may be necessary if you cannot install modules requiring a C compiler. =item L [⭐ Author: L; Date: C<2019-10-30>; Version: C<0.12>] Convenient file slurping and spurting of data using JSON. Uses L or L if available. The basic idea seems to be that it uses context to return arrays or hashes as required, and read and write files without extra stages of opening and closing the file. =item L [⭐⭐ Author: L; Date: C<2020-10-26>; Version: C<1.34>] 👎🐛 Takes advantage of a similarity between YAML (yet another markup language) and JSON to provide a JSON parser/producer using L. We have never tried this module, but it seems to be semi-deprecated (the ABSTRACT says "consider using JSON::XS instead!") and L about things like failing to process equals signs. However, the maintainer is fixing some of the bugs and making new releases, so we're not really sure. =item L [⭐⭐ Author: L; Date: C<2017-11-12>; Version: C<0.58>] This is a fork of L. =item L [⭐⭐⭐ Author: L; Date: C<2020-10-27>; Version: C<4.03>] This is an all-purpose JSON module in XS, which means it requires a C compiler to install. =item L [⭐ Author: L; Date: C<2011-08-05>; Version: C<0.10>] 👎🐛 Wraps a C library called yajl. The module has been abandoned since ten years ago. Bug reports include serious errors, and pull requests have been ignored. =item L [⭐⭐⭐ Author: L; Date: C<2021-01-17>; Version: C<8.71>] Part of the L standalone web framework, "pure Perl" JSON reader/writer. As of version 8.70 of Mojolicious, this actually depends on L but will load L if it is available. =back =item Combination modules These modules rely on more than one back-end module to process JSON for you. =over =item L [⭐ Author: L; Date: C<2015-06-10>; Version: C<1.39>] 👎 This now-deprecated module combines L, L versions one and two, and L. =item L [⭐⭐ Author: L; Date: C<2020-11-13>; Version: C<1.004003>] A module which combines L, L, and L. The original L combines L and L, but this prioritizes L over L. =item L [Author: L; Date: C<2008-02-13>; Version: C<0.31>] 👎 A "combination module" which supports two different interfaces of L. However, JSON::XS is now onto version 4. =item L [⭐ Author: L; Date: C<2019-08-07>; Version: C<1.002>] 👎 This pulls in L instead of L for L users. It seems to have been rendered obsolete by modern versions of Mojolicious due to changes to make that depend on L if available. =back =item Test-related modules =over =item L [Author: L; Date: C<2019-08-07>; Version: C<0.05>] =item L [⭐ Author: L; Date: C<2018-04-24>; Version: C<0.05>] Compare JSON with L. As of version 0.05, it relies on L. =item L [⭐ Author: L; Date: C<2009-08-09>; Version: C<0.11>] 👎 This offers a way to compare two different JSON strings to see if they refer to the same object. The most recent version, 0.11, was released in 2009, and it relies on the deprecated L, which makes it essentially abandoned. =item L [Author: L; Date: C<2012-09-14>; Version: C<0.2>] 👎 Test whether one JSON or Perl structure entails/subsumes another. The most recent version is from 2012, and it relies on L, so it is probably abandoned. Also, oddly but not uniquely for CPAN modules with the name JSON in the title, it seems to not actually have that much to do with JSON, which is a data serialisation format, but actually be testing Perl hashes and arrays. =item L [Author: L; Date: C<2016-04-28>; Version: C<0.02>] JSON Test Utility. As of version 0.02, it relies on L but it is able to use L instead, and so probably L would be OK too. According to the documentation, it can test JSON for validity and compare JSON strings with keys in a different order, and presumably with different whitespace. =back =item Type-related modules These untangle numbers, strings, and booleans into JSON types. =over =item L [Author: L; Date: C<2015-10-26>; Version: C] 😕 Virtually undocumented, it's not clear what this does. =item L [⭐ Author: L; Date: C<2012-10-17>; Version: C<0.05>] Change the type of a Perl variable so that it comes out as a number, a string, or a boolean in the output JSON. =item L [Author: L; Date: C<2017-04-01>; Version: C<0.03>] The module is barely documented, but from looking at L, this seems to enable you to change the output type of a number or a string so that you can, for example, make the number C<1> come out as either a number, C<1>, a string C<"1">, or a boolean, C, in the output JSON. =item L [⭐ Author: L; Date: C<2019-12-26>; Version: C<0.006>] "Replace mushy strings and numbers with rigidly typed replacements" Since Perl muddles strings and numbers, this enables you to work out whether your input JSON was C<"123"> (a string) or C<123> (a number). =back =item Special-purpose modules =over =item L [⭐ Author: L; Date: C<2015-03-04>; Version: C<1.000>] Convert JSON data to other formats. It reads your JSON file or input and converts it into either YAML or Perl native format using L. =item L [⭐⭐ Author: L; Date: C<2016-07-08>; Version: C<0.46>] 👍 This module offers C and C literals in Perl, so you just have use boolean; my $something = true; This is very useful for dealing with JSON. =item L [Author: L; Date: C<2014-12-25>; Version: C<1.5202>] Configuration files in JSON, with hash comments also allowed. =item L [⭐ Author: L; Date: C<2017-09-03>; Version: C<1.001>] For one-liners. =over If you use this module from the command-line, the last value of your one-liner (-e) code will be serialized as JSON data. =back =item L [Author: L; Date: C<2012-07-27>; Version: C] "Embed JSON data structures directly into your Perl code". Relies on L. =item L [Author: L; Date: C<2015-04-16>; Version: C<0.04>] Create JSON under memory limitations. =item L [⭐ Author: L; Date: C<2020-06-09>; Version: C<0.130>] 🌈 This module generates JSON colorized with ANSI escape sequences. =item L [⭐ Author: L; Date: C<2014-09-11>; Version: C<0.004>] =item L [Author: L; Date: C<2020-01-27>; Version: C<0.006>] C is a special-purpose module for parsing JSON objects which have key collisions (something like C<{"a":1,"a":2}>) within objects. (JSON::Parse's handling of key collisions is discussed in L in this document.) =item L [Author: L; Date: C<2015-02-04>; Version: C] Automatically change a JSON string when a data structure changes using tied scalars. =back =item Patch, path, pointer, schema, and transform modules =over =item L [Author: L; Date: C<2017-07-07>; Version: C<0.08>] "Asserts JSONPaths into a JSON data structure for correct values/matches" =item L =item L =item L [⭐ Author: L; Date: C<2016-02-24>; Version: C<0.04>] =item L [Author: L; Date: C<2018-10-25>; Version: C<0.04>] 😕 We don't know what this does, or how it relates to JSON. The example in the synopsis section of the document doesn't show any JSON, it shows an example of altering nested hashes in Perl. =item L [⭐ Author: L; Date: C<2018-05-05>; Version: C<0.420>] Search nested hashref/arrayref structures using JSONPath. =item L [⭐ Author: L; Date: C<2015-08-13>; Version: C<0.07>] Extract parts of a JSON string. =item L "Generate example JSON structures from JSON Schema definitions" =item L [⭐ Author: L; Date: C<2014-09-28>; Version: C<0.104>] Transform JSON using JsonT =item L [⭐ Author: L; Date: C<2020-01-01>; Version: C<0.03>] =item L [⭐⭐ Author: L; Date: C<2021-01-24>; Version: C<4.12>] "Validate data against a JSON schema" - you can decide what the JSON is supposed to contain. =back =item JSON extensions These modules extend JSON with comments and other things. =over =item L [⭐ Author: L; Date: C<2014-12-10>; Version: C<0.000002>] "A relaxed and easy diffable JSON variant" =item L [Author: L; Date: C<2016-04-30>; Version: C<0.05>] "An extension of JSON that allows for better human-readability". =item L [⭐ Author: L; Date: C<2020-04-27>; Version: C] "Relaxed JSON with a little bit of YAML" =back =item Web interactions via JSON =over =item L [⭐⭐ Author: L; Date: C<2021-01-10>; Version: C<0.031>] Module covers JSON Web Tokens, JSON Web Signature, and JSON Web Encryption. =item L [⭐ Author: L; Date: C<2019-07-01>; Version: C] Combines L and L to make a unified module to communicate with a web server via JSON. =item L [⭐ Author: L; Date: C<2018-05-11>; Version: C<0.014>] =item L [⭐ Author: L; Date: C<2015-05-27>; Version: C<1.02>] "Make working with JSON Web API's as painless as possible" =back =item Extension modules These modules extend the existing modules with some extra bits. =over =item L [Author: L; Date: C<2015-04-01>; Version: C<1.01>] Provides booleans and number/string forcing for L. =item L [⭐ Author: L; Date: C<2011-09-19>; Version: C<0.29>] Switches on formatting and strict utf8 in a L object. =back =item Demonstration modules These modules provide a JSON parser as a demonstration of another technology. =over =item L [Author: L; Date: C<2014-08-27>; Version: C<0.02>] =item L [Author: L; Date: C<2018-03-25>; Version: C<0.101>] 🐛🦟🦋🐞 JSON parser as a single Perl Regex, originally by Randal Schwartz. This may be ingenious, but it's not remotely a useful JSON parser. For example, looking at the string part, it provides no Unicode validation, L and it L|https://metacpan.org/release/JSON-Decode-Regexp/source/lib/JSON/Decode/Regexp.pm#L137>. =item L [Author: L; Date: C<2019-06-18>; Version: C<1.08>] =item L [Author: L; Date: C<2020-01-22>; Version: C<0.31>] 🐛 Based on L. See L. =back =item Other modules Modules which are parts of bigger distributions have not been included here except by accident. =over =item L [Author: L; Date: C<2016-08-05>; Version: C<0.01>] Undocumented command-line tools for JSON. =item L [⭐ Author: L; Date: C<2011-02-02>; Version: C<1>] 👎🐛 JSON prettification script. For whatever reason the script encapsulates the entirety of an old version of the L module dating from before L was included in the Perl core. If you need this kind of script, there is something called L which comes with L, or equivalently L in the forked module L. =item L [⭐ Author: L; Date: C<2013-12-18>; Version: C<0.01>] =item L [Author: L; Date: C<2020-03-04>; Version: C] =item L [Author: L; Date: C<2008-08-30>; Version: C<0.02>] 👎 This is JavaScript code which was uploaded to CPAN. The original JavaScript is now obsolete since the thing it codes is included in all modern web browsers. =item L [Author: L; Date: C<2019-10-27>; Version: C<0.002>] Eval Perl code found in JSON. This module enables one to encode and decode Perl scalar references and code references to JSON. =item L [⭐ Author: L; Date: C<2019-07-13>; Version: C<0.202>] Something about one-liners. =item L [Author: L; Date: C<2016-04-23>; Version: C<0.02>] =item L [⭐ Author: L; Date: C<2020-10-25>; Version: C<0.03>] "JSON Lines is a convenient format for storing structured data that may be processed one record at a time." =item L [⭐ Author: L; Date: C<2015-11-28>; Version: C<1.001007>] 😕 Claims to be "no nonsense JSON encoding/decoding as method calls on data". From the documentation: =over Don't make me think and give me what I want! This module automatically figures out whether you want to encode a Perl data structure to JSON or decode a JSON string to a Perl data structure. =back =item L [Author: L; Date: C<2013-06-26>; Version: C] JavaScript object notation object notator. =item L [⭐ Author: L; Date: C<2017-11-10>; Version: C] =item L [⭐ Author: L; Date: C<2012-11-24>; Version: C<0.06>] =item L [Author: L; Date: C<2012-11-24>; Version: C<0.03>] =item L [Author: L; Date: C<2015-09-03>; Version: C<0.06>] Relies on L and the author's other module L, so that you can put either a file name or a JSON string as the argument and it tries to work out which one you have given it. That is ingenious, but it seems that if you are a programmer who cannot distinguish whether your input string is a file name or JSON, you have a very serious problem. =item L [⭐ Author: L; Date: C<2020-04-18>; Version: C<1.004>] 😕 L claims it is a "thin wrapper around JSON::XS", but L, which seems to have partly been copy-pasted from the JSON::XS source code, but internally it doesn't make any reference to JSON::XS. The licence and copyright statement don't mention JSON::XS's original author at all so we're not sure if this is a fork, a wrapper, or a reimplementation. We haven't tried downloading this or installing it, but according to the documentation, this module encodes numbers with quotes around them, so C<< {this => 2} >> turns into C<{"this":"2"}>. =item L [Author: L; Date: C<2021-01-24>; Version: C<1.1>] =item L [Author: L; Date: C<2017-05-02>; Version: C<1.01>] Nibble complete JSON objects from buffers. This seems to be for extracting JSON from the midst of noise. =back =back =head1 SCRIPT A script "validjson" is supplied with the module. This runs L on its inputs, so run it like this. validjson *.json The default behaviour is to just do nothing if the input is valid. For invalid input it prints what the problem is: validjson ids.go ids.go: JSON error at line 1, byte 1/7588: Unexpected character '/' parsing initial state: expecting whitespace: '\n', '\r', '\t', ' ' or start of string: '"' or digit: '0-9' or minus: '-' or start of an array or object: '{', '[' or start of literal: 't', 'f', 'n'. If you need confirmation, use its --verbose option: validjson -v *.json atoms.json is valid JSON. ids.json is valid JSON. kanjidic.json is valid JSON. linedecomps.json is valid JSON. radkfile-radicals.json is valid JSON. =head1 DEPENDENCIES =over =item L =back =head1 EXPORTS The module exports nothing by default. Functions L, L, L, L and L, as well as the old function names L, L, and L, can be exported on request. All of the functions can be exported using the tag ':all': use JSON::Parse ':all'; =head1 TESTING =head2 Internal testing code The module incorporates extensive testing related to the production of error messages and validation of input. Some of the testing code is supplied with the module in the F subdirectory of the distribution. More extensive testing code is in the git repository. This is not supplied in the CPAN distribution. A script, L|https://github.com/benkasminbullock/JSON-Parse/033269fa8972fdce8626aa65cd11a5394ab50492/randomjson.pl>, generates a set number of bytes of random JSON and checks that the module's bytewise validation of input is correct. It does this by taking a valid fragment, then adding each possible byte from 0 to 255 to see whether the module correctly identifies it as valid or invalid at that point, then randomly picking one of the valid bytes and adding it to the fragment and continuing the process until a complete valid JSON input is formed. The module has undergone about a billion repetitions of this test. This setup relies on a C file, L|https://github.com/benkasminbullock/JSON-Parse/033269fa8972fdce8626aa65cd11a5394ab50492/json-random-test.c>, which isn't in the CPAN distribution, and it also requires L|https://github.com/benkasminbullock/JSON-Parse/033269fa8972fdce8626aa65cd11a5394ab50492/Json3.xs> to be edited to make the macro C true (uncomment line 7 of the file). The testing code uses C setjmp/longjmp, so it's not guaranteed to work on all operating systems and is commented out for CPAN releases. A pure C version called L|https://github.com/benkasminbullock/JSON-Parse/033269fa8972fdce8626aa65cd11a5394ab50492/random-test.c> also exists. This applies exactly the same tests, and requires no Perl at all. If you're interested in testing your own JSON parser, the outputs generated by L|https://github.com/benkasminbullock/JSON-Parse/033269fa8972fdce8626aa65cd11a5394ab50492/randomjson.pl> are quite a good place to start. The default is to produce UTF-8 output, which looks pretty horrible since it tends to produce long strings of UTF-8 garbage. (This is because it chooses randomly from 256 bytes and the end-of-string marker C<"> has only a 1/256 chance of being chosen, so the strings tend to get long and messy). You can mess with the internals of JSON::Parse by setting MAXBYTE in F to 0x80, recompiling (you can ignore the compiler warnings), and running F again to get just ASCII random JSON things. This breaks the UTF-8 functionality of JSON::Parse, so please don't install that version. =head2 JSON Parsing Test Suite JSON::Parse version 0.58 passes most of the JSON Parsing Test Suite, with the exception that JSON::Parse rejects various erroneous UTF-8 inputs, for example JSON::Parse will throw an error for non-character code points like Unicode U+FFFF and U+10FFFF. This parser only accepts valid UTF-8 as input. See L. In our opinion it would be a disservice to users of this module to allow bytes containing useless fragments such as incomplete parts of surrogate pairs, or invalid characters, just because the JSON specification doesn't actually explicitly demand rejecting these kinds of garbage inputs. Please see the function C in the file F for exactly which of these elements of the test suite we do not comply with. We note that this comment from Douglas Crockford, the inventor of JSON, L, dated 2005, agrees with our opinion on this point. JSON::Parse version 0.58 also introduced L and L to prevent the stack overflow errors caused by some very deeply nested inputs such as those of the JSON Parsing Test Suite. =head1 ACKNOWLEDGEMENTS Toby Inkster (TOBYINK) suggested some of the new function names which replaced the L names. Nicolas Immelman and Shlomi Fish (SHLOMIF) reported memory leaks which were fixed in 0.32 and 0.40. Github user kolmogorov42 reported a bug which led to 0.42. Github user SteveGlassman found an error in string copying for long strings, fixed in 0.57. Lars Dɪᴇᴄᴋᴏᴡ (DAXIM) pointed out problems with the JSON Parsing Test Suite which led to the addition of stack protection and L and L in 0.58. =head1 AUTHOR Ben Bullock, =head1 COPYRIGHT & LICENCE This package and associated files are copyright (C) 2013-2021 Ben Bullock. You can use, copy, modify and redistribute this package and associated files under the Perl Artistic Licence or the GNU General Public Licence. JSON-Parse-0.61/lib/JSON/Tokenize.pod000644 001751 001751 00000006764 14011073353 016462 0ustar00benben000000 000000 =encoding UTF-8 =head1 NAME JSON::Tokenize - Tokenize JSON =head1 SYNOPSIS use JSON::Tokenize ':all'; my $input = '{"tuttie":["fruity", true, 100]}'; my $token = tokenize_json ($input); print_tokens ($token, 0); sub print_tokens { my ($token, $depth) = @_; while ($token) { my $start = tokenize_start ($token); my $end = tokenize_end ($token); my $type = tokenize_type ($token); print " " x $depth; my $value = substr ($input, $start, $end - $start); print "'$value' has type '$type'.\n"; my $child = tokenize_child ($token); if ($child) { print_tokens ($child, $depth+1); } my $next = tokenize_next ($token); $token = $next; } } This outputs '{"tuttie":["fruity", true, 100]}' has type 'object'. '"tuttie"' has type 'string'. ':' has type 'colon'. '["fruity", true, 100]' has type 'array'. '"fruity"' has type 'string'. ',' has type 'comma'. 'true' has type 'literal'. ',' has type 'comma'. '100' has type 'number'. =head1 VERSION This documents version 0.61 of JSON::Tokenize corresponding to L released on Thu Feb 11 09:14:04 2021 +0900. =head1 DESCRIPTION This is a module for tokenizing a JSON string. "Tokenizing" means breaking the string into individual tokens, without creating any Perl structures. It uses the same underlying code as L. Tokenizing can be used for tasks such as picking out or searching through parts of a large JSON structure without storing each part of the entire structure in memory. This module is an experimental part of L and its interface is likely to change. The tokenizing functions are currently written in a very primitive way. =head1 FUNCTIONS =head2 tokenize_child my $child = tokenize_child ($child); Walk the tree of tokens. =head2 tokenize_end my $end = tokenize_end ($token); Get the end of the token as a byte offset from the start of the string. Note this is a byte offset not a character offset. =head2 tokenize_json my $token = tokenize_json ($json); =head2 tokenize_next my $next = tokenize_next ($token); Walk the tree of tokens. =head2 tokenize_start my $start = tokenize_start ($token); Get the start of the token as a byte offset from the start of the string. Note this is a byte offset not a character offset. =head2 tokenize_text my $text = tokenize_text ($json, $token); Given a token C<$token> from this parsing and the JSON in C<$json>, return the text which corresponds to the token. This is a convenience function written in Perl which uses L and L and C to get the string from C<$json>. =head2 tokenize_type my $type = tokenize_type ($token); Get the type of the token as a string. The possible return values are "array", "initial state", "invalid", "literal", "number", "object", "string", "unicode escape" =head1 AUTHOR Ben Bullock, =head1 COPYRIGHT & LICENCE This package and associated files are copyright (C) 2016-2021 Ben Bullock. You can use, copy, modify and redistribute this package and associated files under the Perl Artistic Licence or the GNU General Public Licence. JSON-Parse-0.61/lib/JSON/Tokenize.pm000644 001751 001751 00000001735 14011073306 016303 0ustar00benben000000 000000 package JSON::Tokenize; use warnings; use strict; require Exporter; our @ISA = qw(Exporter); use JSON::Parse; our @EXPORT_OK = qw/ tokenize_child tokenize_end tokenize_json tokenize_next tokenize_start tokenize_text tokenize_type /; our %EXPORT_TAGS = ('all' => \@EXPORT_OK); use Carp; our $VERSION = '0.61'; sub tokenize_text { my ($input, $token) = @_; if (! $input || ! $token) { croak "tokenize_text requires input string and JSON::Tokenize object"; } my $start = tokenize_start ($token); my $length = tokenize_end ($token) - $start; my $text; if (utf8::is_utf8 ($input)) { # $start and $length refer to bytes, so we need to convert # $input into bytes. my $copy = $input; utf8::encode ($copy); $text = substr ($copy, $start, $length); # Make the output utf8-flagged. utf8::decode ($text); } else { $text = substr ($input, $start, $length); } return $text; } 1; JSON-Parse-0.61/lib/JSON/Parse.pm000644 001751 001751 00000004370 14011073306 015563 0ustar00benben000000 000000 package JSON::Parse; use warnings; use strict; require Exporter; our @ISA = qw(Exporter); our @EXPORT_OK = qw/ assert_valid_json json_file_to_perl json_to_perl parse_json parse_json_safe read_json valid_json validate_json /; our %EXPORT_TAGS = ( all => \@EXPORT_OK, ); use Carp; our $VERSION = '0.61'; require XSLoader; XSLoader::load (__PACKAGE__, $VERSION); # Experimental, return a string of JSON as the error. our $json_diagnostics; # JSON "null" value. Although we're now using PL_sv_yes and PL_sv_no, # we don't use PL_sv_undef, because perldoc perlguts says it's a bad # idea. our $null; sub parse_json_safe { my $p; eval { $p = parse_json_safer (@_); }; if ($@) { my $error = $@; if (ref $error eq 'HASH') { my $error_as_string = $error->{"error as string"}; carp "JSON::Parse::parse_json_safe: $error_as_string"; } else { $error =~ s/at\s\S+\.pm\s+line\s+[0-9]+\.\s*$//; carp "JSON::Parse::parse_json_safe: $error"; } return undef; } return $p; } # Old names of subroutines. sub json_to_perl { goto &parse_json; } sub validate_json { goto &assert_valid_json; } sub read_file { my ($file_name) = @_; if (! -f $file_name) { # Trap possible errors from "open" before getting there. croak "File does not exist: '$file_name'"; } my $json = ''; open my $in, "<:encoding(utf8)", $file_name or croak "Error opening $file_name: $!"; while (<$in>) { $json .= $_; } close $in or croak $!; return $json; } sub JSON::Parse::read { my ($jp, $file_name) = @_; my $json = read_file ($file_name); return $jp->parse ($json); } sub read_json { my ($file_name) = @_; my $json = read_file ($file_name); return parse_json ($json); } sub valid_json { my ($json) = @_; if (! $json) { return 0; } my $ok = eval { assert_valid_json (@_); 1; }; return $ok; } sub json_file_to_perl { goto &read_json; } sub run { my ($parser, $json) = @_; if ($parser->get_warn_only ()) { my $out; eval { $out = $parser->run_internal ($json); }; if ($@) { warn "$@"; } return $out; } else { return $parser->run_internal ($json); } } sub parse { goto &run; } 1; JSON-Parse-0.61/lib/JSON/Whitespace.pod000644 001751 001751 00000004010 14011073353 016744 0ustar00benben000000 000000 =encoding UTF-8 =head1 NAME JSON::Whitespace - Alter the insignificant whitespace of JSON =head1 SYNOPSIS use JSON::Whitespace ':all'; my $in = < released on Thu Feb 11 09:14:04 2021 +0900. =head1 DESCRIPTION This module offers functions to manipulate the "insignificant whitespace" part of a JSON string (the whitespace which is not inside strings). According to L "insignificant whitespace" consists of space (C<%x20>), horizontal tab (C<%x09>), line feed or new line (C<%x0A>) and carriage return (C<%x0D>). =head1 FUNCTIONS =head2 json_indent my $indented = json_indent ($json); Add indentation to C<$json>. =head2 json_minify my $minified = json_minify ($json); Remove all whitespace, including trailing newlines, from C<$json>. =head1 SEE ALSO Documentation about JSON is in L. JSON::Whitespace is based on L, which breaks JSON into tokens without putting it into Perl structures. =head1 AUTHOR Ben Bullock, =head1 COPYRIGHT & LICENCE This package and associated files are copyright (C) 2016-2021 Ben Bullock. You can use, copy, modify and redistribute this package and associated files under the Perl Artistic Licence or the GNU General Public Licence. JSON-Parse-0.61/lib/JSON/Whitespace.pm000644 001751 001751 00000000662 14011073306 016605 0ustar00benben000000 000000 package JSON::Whitespace; use warnings; use strict; require Exporter; our @ISA = qw(Exporter); our @EXPORT_OK = qw/json_minify/; our %EXPORT_TAGS = ( all => \@EXPORT_OK, ); use warnings; use strict; use Carp; our $VERSION = '0.61'; use JSON::Tokenize 'tokenize_json'; sub json_minify { my ($json) = @_; my $tokens = tokenize_json ($json); my $nospace = strip_whitespace ($tokens, $json); return $nospace; } 1; JSON-Parse-0.61/examples/long-number.pl000644 001751 001751 00000000334 12253505156 017241 0ustar00benben000000 000000 #!/home/ben/software/install/bin/perl use warnings; use strict; use JSON::Parse ':all'; my $long_number = '[12345678901234567890123456789012345678901234567890]'; my $out = parse_json ($long_number); print "$out->[0]\n"; JSON-Parse-0.61/examples/bad-utf8.pl000644 001751 001751 00000000601 12257216145 016424 0ustar00benben000000 000000 #!/home/ben/software/install/bin/perl use warnings; use strict; no utf8; use JSON::Parse 'assert_valid_json'; # Error in first byte: my $bad_utf8_1 = chr (hex ("81")); eval { assert_valid_json ("[\"$bad_utf8_1\"]"); }; print "$@\n"; # Error in third byte: my $bad_utf8_2 = chr (hex ('e2')) . chr (hex ('9C')) . 'b'; eval { assert_valid_json ("[\"$bad_utf8_2\"]"); }; print "$@\n"; JSON-Parse-0.61/examples/assert.pl000644 001751 001751 00000000375 12256000620 016307 0ustar00benben000000 000000 #!/home/ben/software/install/bin/perl use warnings; use strict; use JSON::Parse 'assert_valid_json'; eval { assert_valid_json ('["xyz":"b"]'); }; if ($@) { print "Your JSON was invalid: $@\n"; } # Prints "Unexpected character ':' parsing array" JSON-Parse-0.61/examples/bad-utf8.txt000644 001751 001751 00000000570 12257216151 016632 0ustar00benben000000 000000 JSON error at line 1, byte 3/5: Unexpected character 0x81 parsing string starting from byte 2: expecting printable ASCII or first byte of UTF-8: '\x20-\x7f', '\xC2-\xF4' at examples/bad-utf8.pl line 10. JSON error at line 1, byte 5/7: Unexpected character 'b' parsing string starting from byte 2: expecting bytes in range 80-bf: '\x80-\xbf' at examples/bad-utf8.pl line 16. JSON-Parse-0.61/examples/unicode-details.pl000644 001751 001751 00000002144 13016247667 020076 0ustar00benben000000 000000 #!/home/ben/software/install/bin/perl use warnings; use strict; use JSON::Parse ':all'; use Unicode::UTF8 'decode_utf8'; binmode STDOUT, ":encoding(utf8)"; no utf8; my $highbytes = "か"; my $not_utf8 = "$highbytes\\u3042"; my $test = "{\"a\":\"$not_utf8\"}"; my $out = parse_json ($test); # JSON::Parse does something unusual here in promoting the first part # of the string into UTF-8. print "JSON::Parse gives this: ", $out->{a}, "\n"; # Perl cannot assume that $highbytes is in UTF-8, so it has to just # turn the initial characters into garbage. my $add_chr = $highbytes . chr (0x3042); print "Perl's output is like this: ", $add_chr, "\n"; # In fact JSON::Parse's behaviour is equivalent to this: my $equiv = decode_utf8 ($highbytes) . chr (0x3042); print "JSON::Parse did something like this: ", $equiv, "\n"; # With character strings switched on, Perl and JSON::Parse do the same # thing. use utf8; my $is_utf8 = "か"; my $test2 = "{\"a\":\"$is_utf8\\u3042\"}"; my $out2 = parse_json ($test2); print "JSON::Parse: ", $out2->{a}, "\n"; my $add_chr2 = $is_utf8 . chr (0x3042); print "Native Perl: ", $add_chr2, "\n"; JSON-Parse-0.61/examples/first-bit.pl000644 001751 001751 00000001504 12440454762 016723 0ustar00benben000000 000000 #!/home/ben/software/install/bin/perl use warnings; use strict; use JSON::Parse 'assert_valid_json'; my $json = <new (); my $jc = JSON::Create->new (sort => 1); print "First do a round-trip of our modules:\n\n"; print $jc->create ($jp->parse ($cream)), "\n\n"; print "Now do a round-trip of JSON::Tiny:\n\n"; print encode_json (decode_json ($cream)), "\n\n"; print "🥴 First, incompatible mode:\n\n"; print 'tiny(parse): ', encode_json ($jp->parse ($cream)), "\n"; print 'create(tiny): ', $jc->create (decode_json ($cream)), "\n\n"; # Set our parser to produce these things as literals: $jp->set_true (JSON::Tiny::true); $jp->set_false (JSON::Tiny::false); print "🔄 Compatibility with JSON::Parse:\n\n"; print 'tiny(parse):', encode_json ($jp->parse ($cream)), "\n\n"; $jc->bool ('JSON::Tiny::_Bool'); print "🔄 Compatibility with JSON::Create:\n\n"; print 'create(tiny):', $jc->create (decode_json ($cream)), "\n\n"; print "🔄 JSON::Parse and JSON::Create are still compatible too:\n\n"; print $jc->create ($jp->parse ($cream)), "\n"; JSON-Parse-0.61/examples/key-collision.pl000644 001751 001751 00000000357 13012717441 017576 0ustar00benben000000 000000 #!/home/ben/software/install/bin/perl use warnings; use strict; use JSON::Parse qw/parse_json parse_json_safe/; my $j = '{"a":1, "a":2}'; my $p = parse_json ($j); print "Ambiguous key 'a' is ", $p->{a}, "\n"; my $q = parse_json_safe ($j); JSON-Parse-0.61/examples/whitespace-synopsis.pl000644 001751 001751 00000000632 14003437357 021040 0ustar00benben000000 000000 use warnings; use strict; use JSON::Whitespace ':all'; my $in = <new (); $jp->detect_collisions (1); eval { $jp->parse ('{"animals":{"cat":"moggy","cat":"feline","cat":"neko"}}'); }; print "$@\n" if $@; JSON-Parse-0.61/examples/array.pl000644 001751 001751 00000000233 13012716023 016117 0ustar00benben000000 000000 #!/home/ben/software/install/bin/perl use warnings; use strict; use JSON::Parse ':all'; my $perl = parse_json ('["a", "b", "c"]'); print ref $perl, "\n"; JSON-Parse-0.61/examples/kani.pl000644 001751 001751 00000000517 12256175564 015752 0ustar00benben000000 000000 #!/home/ben/software/install/bin/perl use warnings; use strict; use JSON::Parse ':all'; no utf8; # 蟹 my $kani = '["\u87f9"]'; my $p = parse_json ($kani); print "It's marked as a character string" if utf8::is_utf8 ($p->[0]); # Prints "It's marked as a character string" because it's upgraded # regardless of the input string's flags. JSON-Parse-0.61/examples/sasori.pl000644 001751 001751 00000000364 12255464222 016317 0ustar00benben000000 000000 #!/home/ben/software/install/bin/perl use warnings; use strict; use JSON::Parse ':all'; # The scalar $sasori looks like Unicode to Perl use utf8; my $sasori = '["蠍"]'; my $p = parse_json ($sasori); print utf8::is_utf8 ($p->[0]); # Prints 1. JSON-Parse-0.61/examples/synopsis.pl000644 001751 001751 00000000320 12255715005 016674 0ustar00benben000000 000000 #!/home/ben/software/install/bin/perl use warnings; use strict; use JSON::Parse 'parse_json'; my $json = '["golden", "fleece"]'; my $perl = parse_json ($json); # Same effect as $perl = ['golden', 'fleece']; JSON-Parse-0.61/examples/true-subs.pl000644 001751 001751 00000000365 14003431367 016746 0ustar00benben000000 000000 #!/home/ben/software/install/bin/perl use warnings; use strict; use JSON::Parse; my $json = '{"yes":true,"no":false}'; my $jp = JSON::Parse->new (); $jp->set_true ('Yes, that is so true'); my $out = $jp->parse ($json); print $out->{yes}, "\n"; JSON-Parse-0.61/examples/tokenize-synopsis.pl000755 001751 001751 00000001213 14003431376 020526 0ustar00benben000000 000000 #!/home/ben/software/install/bin/perl use warnings; use strict; use JSON::Tokenize ':all'; my $input = '{"tuttie":["fruity", true, 100]}'; my $token = tokenize_json ($input); print_tokens ($token, 0); sub print_tokens { my ($token, $depth) = @_; while ($token) { my $start = tokenize_start ($token); my $end = tokenize_end ($token); my $type = tokenize_type ($token); print " " x $depth; my $value = substr ($input, $start, $end - $start); print "'$value' has type '$type'.\n"; my $child = tokenize_child ($token); if ($child) { print_tokens ($child, $depth+1); } my $next = tokenize_next ($token); $token = $next; } } JSON-Parse-0.61/examples/hash.pl000644 001751 001751 00000000232 13012716014 015723 0ustar00benben000000 000000 #!/home/ben/software/install/bin/perl use warnings; use strict; use JSON::Parse ':all'; my $perl = parse_json ('{"a":1, "b":2}'); print ref $perl, "\n"; JSON-Parse-0.61/examples/ebi.pl000644 001751 001751 00000000373 12255464231 015556 0ustar00benben000000 000000 #!/home/ben/software/install/bin/perl use warnings; use strict; use JSON::Parse ':all'; # The scalar $ebi does not look like Unicode to Perl no utf8; my $ebi = '["海老"]'; my $p = parse_json ($ebi); print utf8::is_utf8 ($p->[0]); # Prints nothing.