From 05862b802237df8bc61b0b45ef1ba4e70b0773f5 Mon Sep 17 00:00:00 2001 From: pommicket Date: Mon, 5 Dec 2022 19:05:14 -0500 Subject: full json parser --- Makefile | 2 +- arr.c | 7 +- base.h | 8 ++ json.c | 444 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ lsp.c | 300 +----------------------------------------- main.c | 1 + 6 files changed, 461 insertions(+), 301 deletions(-) create mode 100644 json.c diff --git a/Makefile b/Makefile index 92123c8..9a0e2fa 100644 --- a/Makefile +++ b/Makefile @@ -4,7 +4,7 @@ ALL_CFLAGS=$(CFLAGS) -Wall -Wextra -Wshadow -Wconversion -Wpedantic -pedantic -s LIBS=-lSDL2 -lGL -lm libpcre2-32.a DEBUG_CFLAGS=$(ALL_CFLAGS) -DDEBUG -O0 -g RELEASE_CFLAGS=$(ALL_CFLAGS) -O3 -PROFILE_CFLAGS=$(ALL_CFLAGS) -O3 -DPROFILE=1 +PROFILE_CFLAGS=$(ALL_CFLAGS) -O3 -g -DPROFILE=1 # if you change the directories below, ted won't work. # we don't yet have support for using different data directories GLOBAL_DATA_DIR=/usr/share/ted diff --git a/arr.c b/arr.c index ae25a3c..05117b1 100644 --- a/arr.c +++ b/arr.c @@ -60,6 +60,10 @@ static inline u32 arr_len(void *arr) { return arr ? arr_hdr_(arr)->len : 0; } +static inline u32 arr_cap(void *arr) { + return arr ? arr_hdr_(arr)->cap : 0; +} + static inline unsigned arr_lenu(void *arr) { return (unsigned)arr_len(arr); } @@ -114,6 +118,7 @@ static void arr_reserve_(void **arr, size_t member_size, size_t n) { if (!*arr) { // create a new array with capacity n+1 + // why n+1? i dont know i wrote this a while ago ArrHeader *hdr = calloc(1, sizeof(ArrHeader) + (n+1) * member_size); if (hdr) { hdr->cap = (u32)n+1; @@ -151,7 +156,7 @@ static void arr_set_len_(void **arr, size_t member_size, size_t n) { ArrHeader *hdr = arr_hdr_(*arr); if (n > hdr->len) { // zero new elements - memset((char *)hdr->data + hdr->len, 0, (n - hdr->len) * member_size); + memset((char *)hdr->data + hdr->len * member_size, 0, (n - hdr->len) * member_size); } hdr->len = (u32)n; } diff --git a/base.h b/base.h index ee427fc..2af0820 100644 --- a/base.h +++ b/base.h @@ -115,6 +115,14 @@ typedef unsigned long long ullong; #define arr_count(a) (sizeof (a) / sizeof *(a)) + +// usage: if UNLIKELY (x > 2) ... +#if __GNUC__ +#define UNLIKELY(x) (__builtin_expect(x,0)) +#else +#define UNLIKELY(x) (x) +#endif + #ifdef __GNUC__ #define no_warn_start _Pragma("GCC diagnostic push") \ _Pragma("GCC diagnostic ignored \"-Wpedantic\"") \ diff --git a/json.c b/json.c new file mode 100644 index 0000000..1b74e10 --- /dev/null +++ b/json.c @@ -0,0 +1,444 @@ +// JSON parser for LSP +// provides FAST(ish) parsing but SLOW lookup +// this is especially fast for small objects + +typedef struct { + u32 pos; + u32 len; +} JSONString; + +typedef struct JSONValue JSONValue; + +typedef struct { + u32 len; + // this is an index into the values array + // values[items..items+len] store the names + // values[items+len..items+2*len] store the values + u32 items; +} JSONObject; + +typedef struct { + u32 len; + // this is an index into the values array + // values[elements..elements+len] are the elements + u32 elements; +} JSONArray; + +enum { + JSON_NULL, + JSON_FALSE, + JSON_TRUE, + JSON_NUMBER, + JSON_STRING, + JSON_OBJECT, + JSON_ARRAY +}; + +struct JSONValue { + u8 type; + union { + double number; + JSONString string; + JSONArray array; + JSONObject object; + } val; +}; + + +typedef struct { + char error[64]; + const char *text; + // root = values[0] + JSONValue *values; +} JSON; + +#define SKIP_WHITESPACE while (json_is_space(text[index])) ++index; + + +static bool json_parse_value(JSON *json, u32 *p_index, JSONValue *val); + +// defining this instead of using isspace seems to be faster +// probably because isspace depends on the locale. +static inline bool json_is_space(char c) { + return c == ' ' || c == '\n' || c == '\r' || c == '\t'; +} + +static void json_debug_print_value(const JSON *json, u32 idx) { + JSONValue *value = &json->values[idx]; + switch (value->type) { + case JSON_NULL: printf("null"); break; + case JSON_FALSE: printf("false"); break; + case JSON_TRUE: printf("true"); break; + case JSON_NUMBER: printf("%f", value->val.number); break; + case JSON_STRING: { + JSONString *string = &value->val.string; + printf("\"%.*s\"", + (int)string->len, + json->text + string->pos); + } break; + case JSON_ARRAY: { + JSONArray *array = &value->val.array; + printf("["); + for (u32 i = 0; i < array->len; ++i) { + json_debug_print_value(json, array->elements + i); + printf(", "); + } + printf("]"); + } break; + case JSON_OBJECT: { + JSONObject *obj = &value->val.object; + printf("{"); + for (u32 i = 0; i < obj->len; ++i) { + json_debug_print_value(json, obj->items + i); + printf(": "); + json_debug_print_value(json, obj->items + obj->len + i); + printf(", "); + } + printf("}"); + } break; + } +} + +// count number of comma-separated values until +// closing ] or } +static u32 json_count(JSON *json, u32 index) { + int bracket_depth = 0; + int brace_depth = 0; + u32 count = 1; + const char *text = json->text; + SKIP_WHITESPACE; + // special case: empty object/array + if (text[index] == '}' || text[index] == ']') + return 0; + +// int mark[5000] = {0}; + for (; ; ++index) { + switch (text[index]) { + case '\0': + return 0; // bad no closing bracket + case '[': + ++bracket_depth; + break; + case ']': + --bracket_depth; + if (bracket_depth < 0) + return count; + break; + case '{': + ++brace_depth; +// mark[index] = 1; + break; + case '}': + --brace_depth; +// mark[index] = 1; + if (brace_depth < 0){ + // useful visualization for debugging +// for (int i = 0; text[i]; ++i ){ +// switch (mark[i]){ +// case 1: printf("\x1b[91m"); break; +// case 2: printf("\x1b[92m"); break; +// } +// printf("%c",text[i]); +// if (mark[i]) printf("\x1b[0m"); +// } +// printf("\n"); + + return count; + } + break; + case ',': + if (bracket_depth == 0 && brace_depth == 0) + ++count; + break; + case '"': { + ++index; // skip opening " + int escaped = 0; + for (; ; ++index) { +// mark[index] = 2; + switch (text[index]) { + case '\0': return 0; // bad no closing quote + case '\\': escaped = !escaped; break; + case '"': + if (!escaped) + goto done; + escaped = false; + break; + default: + escaped = false; + break; + } + } + done:; + } break; + } + } +} + +static bool json_parse_object(JSON *json, u32 *p_index, JSONObject *object) { + u32 index = *p_index; + const char *text = json->text; + ++index; // go past { + u32 count = json_count(json, index); + object->len = count; + object->items = arr_len(json->values); + arr_set_len(json->values, arr_len(json->values) + 2 * count); + + for (u32 i = 0; i < count; ++i) { + if (i > 0) { + if (text[index] != ',') { + strbuf_printf(json->error, "stuff after value in object"); + return false; + } + ++index; + } + SKIP_WHITESPACE; + JSONValue name = {0}, value = {0}; + if (!json_parse_value(json, &index, &name)) + return false; + SKIP_WHITESPACE; + if (text[index] != ':') { + strbuf_printf(json->error, "stuff after name in object"); + return false; + } + ++index; // skip : + SKIP_WHITESPACE; + if (!json_parse_value(json, &index, &value)) + return false; + SKIP_WHITESPACE; + json->values[object->items + i] = name; + json->values[object->items + count + i] = value; + } + + if (text[index] != '}') { + strbuf_printf(json->error, "mismatched brackets or quotes."); + return false; + } + ++index; // skip } + *p_index = index; + return true; +} + +static bool json_parse_array(JSON *json, u32 *p_index, JSONArray *array) { + u32 index = *p_index; + const char *text = json->text; + ++index; // go past [ + u32 count = json_count(json, index); + array->len = count; + array->elements = arr_len(json->values); + + arr_set_len(json->values, arr_len(json->values) + count); + + SKIP_WHITESPACE; + + for (u32 i = 0; i < count; ++i) { + if (i > 0) { + if (text[index] != ',') { + strbuf_printf(json->error, "stuff after element in array"); + return false; + } + ++index; + } + SKIP_WHITESPACE; + JSONValue element = {0}; + if (!json_parse_value(json, &index, &element)) + return false; + SKIP_WHITESPACE; + json->values[array->elements + i] = element; + } + + if (text[index] != ']') { + strbuf_printf(json->error, "mismatched brackets or quotes."); + return false; + } + ++index; // skip ] + *p_index = index; + return true; +} + +static bool json_parse_string(JSON *json, u32 *p_index, JSONString *string) { + u32 index = *p_index; + ++index; // skip opening " + string->pos = index; + const char *text = json->text; + bool escaped = false; + for (; ; ++index) { + switch (text[index]) { + case '"': + if (!escaped) + goto done; + escaped = false; + break; + case '\\': + escaped = !escaped; + break; + case '\0': + strbuf_printf(json->error, "string literal goes to end of JSON"); + return false; + default: + escaped = false; + break; + } + } + done: + string->len = index - string->pos; + ++index; // skip closing " + *p_index = index; + return true; +} + +static bool json_parse_number(JSON *json, u32 *p_index, double *number) { + char *endp = NULL; + const char *text = json->text; + u32 index = *p_index; + *number = strtod(text + index, &endp); + if (endp == text + index) { + strbuf_printf(json->error, "bad number"); + return false; + } + index = (u32)(endp - text); + *p_index = index; + return true; +} + +static bool json_parse_value(JSON *json, u32 *p_index, JSONValue *val) { + const char *text = json->text; + u32 index = *p_index; + SKIP_WHITESPACE; + switch (text[index]) { + case '{': + val->type = JSON_OBJECT; + if (!json_parse_object(json, &index, &val->val.object)) + return false; + break; + case '[': + val->type = JSON_ARRAY; + if (!json_parse_array(json, &index, &val->val.array)) + return false; + break; + case '"': + val->type = JSON_STRING; + if (!json_parse_string(json, &index, &val->val.string)) + return false; + break; + case ANY_DIGIT: + val->type = JSON_NUMBER; + if (!json_parse_number(json, &index, &val->val.number)) + return false; + break; + case 'f': + val->type = JSON_FALSE; + if (!str_has_prefix(&text[index], "false")) + return false; + index += 5; + break; + case 't': + val->type = JSON_TRUE; + if (!str_has_prefix(&text[index], "true")) + return false; + index += 4; + break; + case 'n': + val->type = JSON_NULL; + if (!str_has_prefix(&text[index], "null")) + return false; + index += 4; + break; + default: + strbuf_printf(json->error, "bad value"); + return false; + } + *p_index = index; + return true; +} + +// NOTE: text must live as long as json!!! +static bool json_parse(JSON *json, const char *text) { + memset(json, 0, sizeof *json); + json->text = text; + // @TODO: is this a good capacity? + arr_reserve(json->values, strlen(text) / 8); + arr_addp(json->values); // add root + JSONValue val = {0}; + u32 index = 0; + if (!json_parse_value(json, &index, &val)) + return false; + + SKIP_WHITESPACE; + if (text[index]) { + strbuf_printf(json->error, "extra text after end of root object"); + return false; + } + json->values[0] = val; + return true; +} + +static void json_free(JSON *json) { + arr_free(json->values); + memset(json, 0, sizeof *json); +} + +#if __unix__ +static void json_test_time_large(const char *filename) { + struct timespec start={0},end={0}; + FILE *fp = fopen(filename,"rb"); + if (!fp) { + perror(filename); + return; + } + + fseek(fp,0,SEEK_END); + size_t sz = (size_t)ftell(fp); + char *buf = calloc(1,sz+1); + rewind(fp); + fread(buf, 1, sz, fp); + fclose(fp); + for (int trial = 0; trial < 5; ++trial) { + clock_gettime(CLOCK_MONOTONIC, &start); + JSON json={0}; + bool success = json_parse(&json, buf); + if (!success) { + printf("FAIL: %s\n",json.error); + return; + } + + json_free(&json); + clock_gettime(CLOCK_MONOTONIC, &end); + + + + printf("time: %.1fms\n", + ((double)end.tv_sec*1e3+(double)end.tv_nsec*1e-6) + -((double)start.tv_sec*1e3+(double)start.tv_nsec*1e-6)); + } + +} +static void json_test_time_small(void) { + struct timespec start={0},end={0}; + int trials = 50000000; + clock_gettime(CLOCK_MONOTONIC, &start); + for (int trial = 0; trial < trials; ++trial) { + JSON json={0}; + bool success = json_parse(&json, "{\"hello\":\"there\"}"); + if (!success) { + printf("FAIL: %s\n",json.error); + return; + } + + json_free(&json); + } + clock_gettime(CLOCK_MONOTONIC, &end); + printf("time per trial: %.1fns\n", + (((double)end.tv_sec*1e9+(double)end.tv_nsec) + -((double)start.tv_sec*1e9+(double)start.tv_nsec)) + / trials); + +} +#endif + +static void json_debug_print(const JSON *json) { + printf("%u values (capacity %u, text length %zu)\n", + arr_len(json->values), arr_cap(json->values), strlen(json->text)); + json_debug_print_value(json, 0); +} + +#undef SKIP_WHITESPACE diff --git a/lsp.c b/lsp.c index 5847642..1b6d214 100644 --- a/lsp.c +++ b/lsp.c @@ -2,304 +2,6 @@ typedef struct { Process process; } LSP; -typedef struct { - u32 pos; - u32 len; -} JSONString; - -typedef struct JSONValue JSONValue; - -typedef struct { - u32 len; - // this is an index into the values array - // values[items..items+len] store the names - // values[items+len..items+2*len] store the values - u32 items; -} JSONObject; - -typedef struct { - u32 len; - // this is an index into the values array - // values[elements..elements+len] are the elements - u32 elements; -} JSONArray; - -enum { - JSON_NULL, - JSON_FALSE, - JSON_TRUE, - JSON_NUMBER, - JSON_STRING, - JSON_OBJECT, - JSON_ARRAY -}; - -struct JSONValue { - u8 type; - union { - double number; - JSONString string; - JSONArray array; - JSONObject object; - } val; -}; - - -typedef struct { - char error[64]; - const char *text; - // root = values[0] - JSONValue *values; -} JSON; - -static bool json_parse_value(JSON *json, u32 *p_index, JSONValue *val); - -// count number of comma-separated values until -// closing ] or } -static u32 json_count(JSON *json, u32 index) { - int bracket_depth = 0; - int brace_depth = 0; - u32 count = 1; - const char *text = json->text; - while (isspace(text[index])) ++index; - // special case: empty object/array - if (text[index] == '}' || text[index] == ']') - return 0; - - int mark[5000] = {0}; - for (; ; ++index) { - switch (text[index]) { - case '\0': - return 0; // bad no closing bracket - case '[': - ++bracket_depth; - break; - case ']': - --bracket_depth; - if (bracket_depth < 0) - return count; - break; - case '{': - ++brace_depth; - mark[index] = true; - break; - case '}': - --brace_depth; - mark[index] = true; - if (brace_depth < 0){ - - for (int i = 0; text[i]; ++i ){ - switch (mark[i]){ - case 1: printf("\x1b[91m"); break; - case 2: printf("\x1b[92m"); break; - } - printf("%c",text[i]); - if (mark[i]) printf("\x1b[0m"); - } - printf("\n"); - - return count; - } - break; - case ',': - if (bracket_depth == 0 && brace_depth == 0) - ++count; - break; - case '"': { - ++index; // skip opening " - int escaped = 0; - for (; ; ++index) { - mark[index] = 2; - switch (text[index]) { - case '\0': return 0; // bad no closing quote - case '\\': escaped = !escaped; break; - case '"': - if (!escaped) - goto done; - escaped = false; - break; - default: - escaped = false; - break; - } - } - done:; - } break; - } - } -} - -static bool json_parse_object(JSON *json, u32 *p_index, JSONObject *object) { - u32 index = *p_index; - const char *text = json->text; - ++index; // go past { - u32 count = json_count(json, index); - printf("%u\n",count); - exit(0); - object->len = count; - object->items = arr_len(json->values); - arr_set_len(json->values, arr_len(json->values) + 2 * count); - - - for (u32 i = 0; i < count; ++i) { - if (i > 0) { - if (text[index] != ',') { - strbuf_printf(json->error, "stuff after value in object"); - return false; - } - ++index; - } - JSONValue name = {0}, value = {0}; - if (!json_parse_value(json, &index, &name)) - return false; - while (isspace(text[index])) ++index; - if (text[index] != ':') { - strbuf_printf(json->error, "stuff after name in object"); - return false; - } - while (isspace(text[index])) ++index; - if (!json_parse_value(json, &index, &value)) - return false; - while (isspace(text[index])) ++index; - json->values[object->items + i] = name; - json->values[object->items + count + i] = value; - } - - if (text[index] != '}') { - strbuf_printf(json->error, "mismatched brackets or quotes."); - return false; - } - ++index; // skip } - *p_index = index; - return true; -} - -static bool json_parse_array(JSON *json, u32 *p_index, JSONArray *array) { - (void)json;(void)p_index;(void)array; - abort(); -} - -static bool json_parse_string(JSON *json, u32 *p_index, JSONString *string) { - (void)json;(void)p_index;(void)string; - abort(); -} - -static bool json_parse_number(JSON *json, u32 *p_index, double *number) { - (void)json;(void)p_index;(void)number; - abort(); -} - -static bool json_parse_value(JSON *json, u32 *p_index, JSONValue *val) { - const char *text = json->text; - u32 index = *p_index; - while (isspace(text[index])) ++index; - switch (text[index]) { - case '{': - val->type = JSON_OBJECT; - if (!json_parse_object(json, &index, &val->val.object)) - return false; - break; - case '[': - val->type = JSON_ARRAY; - if (!json_parse_array(json, &index, &val->val.array)) - return false; - break; - case '"': - val->type = JSON_STRING; - if (!json_parse_string(json, &index, &val->val.string)) - return false; - break; - case ANY_DIGIT: - val->type = JSON_NUMBER; - if (!json_parse_number(json, &index, &val->val.number)) - return false; - break; - case 'f': - val->type = JSON_FALSE; - if (!str_has_prefix(&text[index], "false")) - return false; - index += 5; - break; - case 't': - val->type = JSON_TRUE; - if (!str_has_prefix(&text[index], "true")) - return false; - index += 4; - break; - case 'n': - val->type = JSON_NULL; - if (!str_has_prefix(&text[index], "null")) - return false; - index += 4; - break; - default: - strbuf_printf(json->error, "bad value"); - } - *p_index = index; - return true; -} - -// NOTE: text must live as long as json!!! -static bool json_parse(JSON *json, const char *text) { - memset(json, 0, sizeof *json); - json->text = text; - // @TODO: is this a good capacity? - arr_reserve(json->values, strlen(text) / 8); - arr_addp(json->values); // add root - JSONValue val = {0}; - u32 index = 0; - if (!json_parse_value(json, &index, &val)) - return false; - - while (isspace(text[index])) ++index; - if (text[index]) { - strbuf_printf(json->error, "extra text after end of root object"); - return false; - } - json->values[0] = val; - return true; -} - - -static void json_debug_print_value(const JSON *json, u32 idx) { - JSONValue *value = &json->values[idx]; - switch (value->type) { - case JSON_NULL: printf("null"); break; - case JSON_FALSE: printf("false"); break; - case JSON_TRUE: printf("true"); break; - case JSON_NUMBER: printf("%f", value->val.number); break; - case JSON_STRING: { - JSONString *string = &value->val.string; - printf("\"%.*s\"", - (int)string->len, - json->text + string->pos); - } break; - case JSON_ARRAY: { - JSONArray *array = &value->val.array; - printf("["); - for (u32 i = 0; i < array->len; ++i) { - json_debug_print_value(json, array->elements + i); - printf(","); - } - printf("]"); - } break; - case JSON_OBJECT: { - JSONObject *obj = &value->val.object; - printf("{"); - for (u32 i = 0; i < obj->len; ++i) { - json_debug_print_value(json, obj->items + i); - printf(": "); - json_debug_print_value(json, obj->items + obj->len + i); - printf(","); - } - printf("}"); - } break; - } -} - -static void json_debug_print(const JSON *json) { - json_debug_print_value(json, 0); -} static void send_request(LSP *lsp, const char *content) { char header[128]; @@ -336,6 +38,6 @@ void lsp_create(LSP *lsp, const char *analyzer_command) { printf("%s\n",rnrn+4); if (!json_parse(&json, rnrn + 4)) printf("fail : %s\n",json.error); - json_debug_print(&json); +// json_debug_print(&json);printf("\n"); } diff --git a/main.c b/main.c index 517faaa..c0feb8c 100644 --- a/main.c +++ b/main.c @@ -101,6 +101,7 @@ bool tag_goto(Ted *ted, char const *tag); #include "command.c" #include "config.c" #include "session.c" +#include "json.c" #include "lsp.c" #if PROFILE -- cgit v1.2.3