#include #include #include #include #include "internal.h" char *json_string_get_mut(struct json *str) { if (str->type != JSON_STRING) return NULL; return str->string; } const char *json_string_get_const(const struct json *string) { return json_string_get_mut((struct json *)string); } enum json_parse_result parse_string(struct json **json_out, struct raw_json *raw) { char *str; enum json_parse_result ret = parse_raw_string(&str, raw); if (ret != JSON_PARSE_OK) { return ret; } struct json *json = json_string_from(str); if (!json) { free(str); return JSON_OOM; } *json_out = json; return ret; } static inline enum json_parse_result parse_octet(uint8_t *out, const char ch) { uint8_t value = ch; if (value >= '0' && value <= '9') { *out = value - '0'; return JSON_PARSE_OK; } value |= 0x20; // set 6th bit to force lowercase. if (value >= 'a' && value <= 'f') { *out = 9 + (value & 0xF); // 'a' & 00001111 == 0001, but 0xA == 10, so +9 return JSON_PARSE_OK; } return JSON_PARSE_STRING_INVALID_UNICODE_ERR; } static inline enum json_parse_result parse_hex_pair(uint32_t *out, struct raw_json *raw) { if (raw->index + 4 > raw->size) { return JSON_PARSE_STRING_INVALID_UNICODE_ERR; } enum json_parse_result ret = JSON_PARSE_OK; uint8_t hihi = 0; uint8_t hilo = 0; ret = parse_octet(&hihi, raw->data[raw->index++]); if (ret != JSON_PARSE_OK) return ret; hihi <<= 4; ret = parse_octet(&hilo, raw->data[raw->index++]); if (ret != JSON_PARSE_OK) return ret; uint8_t hi = hihi | hilo; ret = parse_octet(&hihi, raw->data[raw->index++]); if (ret != JSON_PARSE_OK) return ret; hihi <<= 4; ret = parse_octet(&hilo, raw->data[raw->index++]); if (ret != JSON_PARSE_OK) return ret; uint8_t lo = hihi | hilo; *out = (hi << 8) | lo; return ret; } static inline enum json_parse_result parse_unicode(char *str, size_t len, size_t i, struct raw_json *raw) { enum json_parse_result ret = JSON_PARSE_OK; uint32_t codepoint = 0; ret = parse_hex_pair(&codepoint, raw); if (ret != JSON_PARSE_OK) { return ret; } if (codepoint <= 0x1F || (codepoint >= 0x7F && codepoint <= 0x9F)) { if (i + 6 > len || i + 6 > raw->size) { return JSON_PARSE_STRING_INVALID_ERR; } /* unescaping the same codepoints as we do on ascii */ switch (codepoint) { case 0x8: str[i++] = '\b'; return JSON_PARSE_OK; case 0x9: str[i++] = '\t'; return JSON_PARSE_OK; case 0xa: str[i++] = '\n'; return JSON_PARSE_OK; case 0xc: str[i++] = '\f'; return JSON_PARSE_OK; case 0xd: str[i++] = '\r'; return JSON_PARSE_OK; } /* rolling back index so we can parse the codepoint again */ raw->index -= 6; for (size_t limit = i + 6; i < limit; i++) { str[i] = raw->data[raw->index++]; } /* the calling function expects to do the last advance */ raw->index--; return ret; } /* here we're dealing with a utf-16 surrogate pair*/ if (codepoint >= 0xD800 && codepoint <= 0xDFFF) { if (raw->index + 6 > raw->size || (raw->data[raw->index++] != '\\' || raw->data[raw->index++] != 'u') || codepoint < 0xD800 || codepoint > 0xDBFF) { ret = JSON_PARSE_STRING_INVALID_UNICODE_ERR; return ret; } uint32_t second_codepoint = 0; ret = parse_hex_pair(&second_codepoint, raw); if (ret != JSON_PARSE_OK) { return ret; } if (second_codepoint < 0xDC00 || second_codepoint > 0xDFFF) { return JSON_PARSE_STRING_INVALID_UNICODE_ERR; } /* 0x3FF = 00000011111111 - we mask the lower 10 bits of both, * the first codepoint make up the high 10 bits of the result, * the second makes up the low 10 bits */ codepoint = ((codepoint & 0x3FF) << 10) | (second_codepoint & 0x3FF); codepoint += 0x10000; if (codepoint < 0x110000 && i + 4 < len) { str[i++] = 0xF0 | (codepoint >> 18); str[i++] = 0x80 | ((codepoint >> 12) & 0x3F); str[i++] = 0x80 | ((codepoint >> 6) & 0x3F); str[i] = 0x80 | (codepoint & 0x3F); } else { ret = JSON_PARSE_STRING_INVALID_UNICODE_ERR; return ret; } /* here it's utf-8 */ } else { if (codepoint < 0x80) { str[i] = codepoint; } else if (codepoint < 0x800) { str[i++] = 0xC0 | (codepoint >> 6); str[i] = 0x80 | (codepoint & 0x3F); } else if (codepoint < 0x10000) { str[i++] = 0xE0 | (codepoint >> 12); str[i++] = 0x80 | ((codepoint >> 6) & 0x3F); str[i] = 0x80 | (codepoint & 0x3F); } else { ret = JSON_PARSE_STRING_INVALID_UNICODE_ERR; return ret; } } /* the calling function expects to do the last advance */ raw->index--; return ret; } enum json_parse_result parse_raw_string(char **str_out, struct raw_json *raw) { assert(raw->data[raw->index] == '"'); assert(str_out); assert(raw); char *str = NULL; enum json_parse_result ret = JSON_PARSE_OK; size_t end = raw->index + 1; size_t skipped = 0; for (; end < raw->size && raw->data[end] != '"'; end++) { switch (raw->data[end]) { case '\n': case '\f': case '\b': case '\r': case '\t': case 0: ret = JSON_PARSE_STRING_INVALID_ESCAPE_ERR; goto err; case '\\': /* for unicode escapes, we can't skip bytes, since control * escape codes will not be parsed, and will be stored as is */ if (++end < raw->size && raw->data[end] != 'u') { skipped++; } break; } } if (raw->data[end] != '"') { ret = JSON_PARSE_STRING_INVALID_ERR; goto err; } size_t len = end - raw->index - skipped; str = calloc(len + 1, sizeof(char)); if (!str) { return JSON_OOM; } size_t i; for (i = 0, raw->index++; raw->index < raw->size && raw->data[raw->index] != '"' && i < len; raw->index++, i++) { if (raw->data[raw->index] == '\\' && raw->index + 1 < raw->size) { switch (raw->data[++raw->index]) { case '\\': case '\"': case '/': break; case 'n': str[i] = '\n'; continue; case 'f': str[i] = '\f'; continue; case 'b': str[i] = '\b'; continue; case 'r': str[i] = '\r'; continue; case 't': str[i] = '\t'; continue; case 'u': { raw->index++; ret = parse_unicode(str, len, i, raw); if (ret != JSON_PARSE_OK) goto err; continue; } default: ret = JSON_PARSE_STRING_INVALID_ESCAPE_ERR; goto err; } } str[i] = raw->data[raw->index]; } if (raw->data[raw->index++] != '"') { ret = JSON_PARSE_INVALID_STRING_ERR; goto err; } *str_out = str; goto end; err: free(str); end: return ret; } void json_string_set(struct json *dest, const char *string) { if (dest->type != JSON_STRING) { char *key = dest->key; dest->key = NULL; json_clear(dest); dest->key = key; dest->type = JSON_STRING; dest->string = strdup(string); return; } free(dest->string); dest->string = strdup(string); }