From 05862b802237df8bc61b0b45ef1ba4e70b0773f5 Mon Sep 17 00:00:00 2001 From: pommicket Date: Mon, 5 Dec 2022 19:05:14 -0500 Subject: full json parser --- json.c | 444 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 444 insertions(+) create mode 100644 json.c (limited to 'json.c') diff --git a/json.c b/json.c new file mode 100644 index 0000000..1b74e10 --- /dev/null +++ b/json.c @@ -0,0 +1,444 @@ +// JSON parser for LSP +// provides FAST(ish) parsing but SLOW lookup +// this is especially fast for small objects + +typedef struct { + u32 pos; + u32 len; +} JSONString; + +typedef struct JSONValue JSONValue; + +typedef struct { + u32 len; + // this is an index into the values array + // values[items..items+len] store the names + // values[items+len..items+2*len] store the values + u32 items; +} JSONObject; + +typedef struct { + u32 len; + // this is an index into the values array + // values[elements..elements+len] are the elements + u32 elements; +} JSONArray; + +enum { + JSON_NULL, + JSON_FALSE, + JSON_TRUE, + JSON_NUMBER, + JSON_STRING, + JSON_OBJECT, + JSON_ARRAY +}; + +struct JSONValue { + u8 type; + union { + double number; + JSONString string; + JSONArray array; + JSONObject object; + } val; +}; + + +typedef struct { + char error[64]; + const char *text; + // root = values[0] + JSONValue *values; +} JSON; + +#define SKIP_WHITESPACE while (json_is_space(text[index])) ++index; + + +static bool json_parse_value(JSON *json, u32 *p_index, JSONValue *val); + +// defining this instead of using isspace seems to be faster +// probably because isspace depends on the locale. +static inline bool json_is_space(char c) { + return c == ' ' || c == '\n' || c == '\r' || c == '\t'; +} + +static void json_debug_print_value(const JSON *json, u32 idx) { + JSONValue *value = &json->values[idx]; + switch (value->type) { + case JSON_NULL: printf("null"); break; + case JSON_FALSE: printf("false"); break; + case JSON_TRUE: printf("true"); break; + case JSON_NUMBER: printf("%f", value->val.number); break; + case JSON_STRING: { + JSONString *string = &value->val.string; + printf("\"%.*s\"", + (int)string->len, + json->text + string->pos); + } break; + case JSON_ARRAY: { + JSONArray *array = &value->val.array; + printf("["); + for (u32 i = 0; i < array->len; ++i) { + json_debug_print_value(json, array->elements + i); + printf(", "); + } + printf("]"); + } break; + case JSON_OBJECT: { + JSONObject *obj = &value->val.object; + printf("{"); + for (u32 i = 0; i < obj->len; ++i) { + json_debug_print_value(json, obj->items + i); + printf(": "); + json_debug_print_value(json, obj->items + obj->len + i); + printf(", "); + } + printf("}"); + } break; + } +} + +// count number of comma-separated values until +// closing ] or } +static u32 json_count(JSON *json, u32 index) { + int bracket_depth = 0; + int brace_depth = 0; + u32 count = 1; + const char *text = json->text; + SKIP_WHITESPACE; + // special case: empty object/array + if (text[index] == '}' || text[index] == ']') + return 0; + +// int mark[5000] = {0}; + for (; ; ++index) { + switch (text[index]) { + case '\0': + return 0; // bad no closing bracket + case '[': + ++bracket_depth; + break; + case ']': + --bracket_depth; + if (bracket_depth < 0) + return count; + break; + case '{': + ++brace_depth; +// mark[index] = 1; + break; + case '}': + --brace_depth; +// mark[index] = 1; + if (brace_depth < 0){ + // useful visualization for debugging +// for (int i = 0; text[i]; ++i ){ +// switch (mark[i]){ +// case 1: printf("\x1b[91m"); break; +// case 2: printf("\x1b[92m"); break; +// } +// printf("%c",text[i]); +// if (mark[i]) printf("\x1b[0m"); +// } +// printf("\n"); + + return count; + } + break; + case ',': + if (bracket_depth == 0 && brace_depth == 0) + ++count; + break; + case '"': { + ++index; // skip opening " + int escaped = 0; + for (; ; ++index) { +// mark[index] = 2; + switch (text[index]) { + case '\0': return 0; // bad no closing quote + case '\\': escaped = !escaped; break; + case '"': + if (!escaped) + goto done; + escaped = false; + break; + default: + escaped = false; + break; + } + } + done:; + } break; + } + } +} + +static bool json_parse_object(JSON *json, u32 *p_index, JSONObject *object) { + u32 index = *p_index; + const char *text = json->text; + ++index; // go past { + u32 count = json_count(json, index); + object->len = count; + object->items = arr_len(json->values); + arr_set_len(json->values, arr_len(json->values) + 2 * count); + + for (u32 i = 0; i < count; ++i) { + if (i > 0) { + if (text[index] != ',') { + strbuf_printf(json->error, "stuff after value in object"); + return false; + } + ++index; + } + SKIP_WHITESPACE; + JSONValue name = {0}, value = {0}; + if (!json_parse_value(json, &index, &name)) + return false; + SKIP_WHITESPACE; + if (text[index] != ':') { + strbuf_printf(json->error, "stuff after name in object"); + return false; + } + ++index; // skip : + SKIP_WHITESPACE; + if (!json_parse_value(json, &index, &value)) + return false; + SKIP_WHITESPACE; + json->values[object->items + i] = name; + json->values[object->items + count + i] = value; + } + + if (text[index] != '}') { + strbuf_printf(json->error, "mismatched brackets or quotes."); + return false; + } + ++index; // skip } + *p_index = index; + return true; +} + +static bool json_parse_array(JSON *json, u32 *p_index, JSONArray *array) { + u32 index = *p_index; + const char *text = json->text; + ++index; // go past [ + u32 count = json_count(json, index); + array->len = count; + array->elements = arr_len(json->values); + + arr_set_len(json->values, arr_len(json->values) + count); + + SKIP_WHITESPACE; + + for (u32 i = 0; i < count; ++i) { + if (i > 0) { + if (text[index] != ',') { + strbuf_printf(json->error, "stuff after element in array"); + return false; + } + ++index; + } + SKIP_WHITESPACE; + JSONValue element = {0}; + if (!json_parse_value(json, &index, &element)) + return false; + SKIP_WHITESPACE; + json->values[array->elements + i] = element; + } + + if (text[index] != ']') { + strbuf_printf(json->error, "mismatched brackets or quotes."); + return false; + } + ++index; // skip ] + *p_index = index; + return true; +} + +static bool json_parse_string(JSON *json, u32 *p_index, JSONString *string) { + u32 index = *p_index; + ++index; // skip opening " + string->pos = index; + const char *text = json->text; + bool escaped = false; + for (; ; ++index) { + switch (text[index]) { + case '"': + if (!escaped) + goto done; + escaped = false; + break; + case '\\': + escaped = !escaped; + break; + case '\0': + strbuf_printf(json->error, "string literal goes to end of JSON"); + return false; + default: + escaped = false; + break; + } + } + done: + string->len = index - string->pos; + ++index; // skip closing " + *p_index = index; + return true; +} + +static bool json_parse_number(JSON *json, u32 *p_index, double *number) { + char *endp = NULL; + const char *text = json->text; + u32 index = *p_index; + *number = strtod(text + index, &endp); + if (endp == text + index) { + strbuf_printf(json->error, "bad number"); + return false; + } + index = (u32)(endp - text); + *p_index = index; + return true; +} + +static bool json_parse_value(JSON *json, u32 *p_index, JSONValue *val) { + const char *text = json->text; + u32 index = *p_index; + SKIP_WHITESPACE; + switch (text[index]) { + case '{': + val->type = JSON_OBJECT; + if (!json_parse_object(json, &index, &val->val.object)) + return false; + break; + case '[': + val->type = JSON_ARRAY; + if (!json_parse_array(json, &index, &val->val.array)) + return false; + break; + case '"': + val->type = JSON_STRING; + if (!json_parse_string(json, &index, &val->val.string)) + return false; + break; + case ANY_DIGIT: + val->type = JSON_NUMBER; + if (!json_parse_number(json, &index, &val->val.number)) + return false; + break; + case 'f': + val->type = JSON_FALSE; + if (!str_has_prefix(&text[index], "false")) + return false; + index += 5; + break; + case 't': + val->type = JSON_TRUE; + if (!str_has_prefix(&text[index], "true")) + return false; + index += 4; + break; + case 'n': + val->type = JSON_NULL; + if (!str_has_prefix(&text[index], "null")) + return false; + index += 4; + break; + default: + strbuf_printf(json->error, "bad value"); + return false; + } + *p_index = index; + return true; +} + +// NOTE: text must live as long as json!!! +static bool json_parse(JSON *json, const char *text) { + memset(json, 0, sizeof *json); + json->text = text; + // @TODO: is this a good capacity? + arr_reserve(json->values, strlen(text) / 8); + arr_addp(json->values); // add root + JSONValue val = {0}; + u32 index = 0; + if (!json_parse_value(json, &index, &val)) + return false; + + SKIP_WHITESPACE; + if (text[index]) { + strbuf_printf(json->error, "extra text after end of root object"); + return false; + } + json->values[0] = val; + return true; +} + +static void json_free(JSON *json) { + arr_free(json->values); + memset(json, 0, sizeof *json); +} + +#if __unix__ +static void json_test_time_large(const char *filename) { + struct timespec start={0},end={0}; + FILE *fp = fopen(filename,"rb"); + if (!fp) { + perror(filename); + return; + } + + fseek(fp,0,SEEK_END); + size_t sz = (size_t)ftell(fp); + char *buf = calloc(1,sz+1); + rewind(fp); + fread(buf, 1, sz, fp); + fclose(fp); + for (int trial = 0; trial < 5; ++trial) { + clock_gettime(CLOCK_MONOTONIC, &start); + JSON json={0}; + bool success = json_parse(&json, buf); + if (!success) { + printf("FAIL: %s\n",json.error); + return; + } + + json_free(&json); + clock_gettime(CLOCK_MONOTONIC, &end); + + + + printf("time: %.1fms\n", + ((double)end.tv_sec*1e3+(double)end.tv_nsec*1e-6) + -((double)start.tv_sec*1e3+(double)start.tv_nsec*1e-6)); + } + +} +static void json_test_time_small(void) { + struct timespec start={0},end={0}; + int trials = 50000000; + clock_gettime(CLOCK_MONOTONIC, &start); + for (int trial = 0; trial < trials; ++trial) { + JSON json={0}; + bool success = json_parse(&json, "{\"hello\":\"there\"}"); + if (!success) { + printf("FAIL: %s\n",json.error); + return; + } + + json_free(&json); + } + clock_gettime(CLOCK_MONOTONIC, &end); + printf("time per trial: %.1fns\n", + (((double)end.tv_sec*1e9+(double)end.tv_nsec) + -((double)start.tv_sec*1e9+(double)start.tv_nsec)) + / trials); + +} +#endif + +static void json_debug_print(const JSON *json) { + printf("%u values (capacity %u, text length %zu)\n", + arr_len(json->values), arr_cap(json->values), strlen(json->text)); + json_debug_print_value(json, 0); +} + +#undef SKIP_WHITESPACE -- cgit v1.2.3