diff options
author | pommicket <pommicket@gmail.com> | 2022-12-29 18:44:37 -0500 |
---|---|---|
committer | pommicket <pommicket@gmail.com> | 2022-12-29 18:44:37 -0500 |
commit | d77782564bf0a090a4e7fa4f4c4bb685383275dc (patch) | |
tree | a4bc3c0bf142c663ea7f293b4381927adc896f1c /json.c | |
parent | 5f7af06a9085835a3d1ad3a51374c245859d7d97 (diff) |
textDocument/definition for ctrl+click
Diffstat (limited to 'json.c')
-rw-r--r-- | json.c | 764 |
1 files changed, 0 insertions, 764 deletions
@@ -1,764 +0,0 @@ -// JSON parser for LSP -// provides FAST(ish) parsing but SLOW lookup -// this is especially fast for small objects -// this actually supports "extended json", where objects can have arbitrary values as keys. -typedef struct { - u32 pos; - u32 len; -} JSONString; - -typedef struct JSONValue JSONValue; - -typedef struct { - u32 len; - // this is an index into the values array - // values[items..items+len] store the names - // values[items+len..items+2*len] store the values - u32 items; -} JSONObject; - -typedef struct { - u32 len; - // this is an index into the values array - // values[elements..elements+len] are the elements - u32 elements; -} JSONArray; - -typedef enum { - // note: json doesn't actually include undefined. - // this is only for returning things from json_get etc. - JSON_UNDEFINED, - JSON_NULL, - JSON_FALSE, - JSON_TRUE, - JSON_NUMBER, - JSON_STRING, - JSON_OBJECT, - JSON_ARRAY -} JSONValueType; - -struct JSONValue { - JSONValueType type; - union { - double number; - JSONString string; - JSONArray array; - JSONObject object; - } val; -}; - - -typedef struct { - char error[64]; - bool is_text_copied; // if this is true, then json_free will call free on text - const char *text; - // root = values[0] - JSONValue *values; -} JSON; - - -#define SKIP_WHITESPACE while (json_is_space(text[index])) ++index; - -const char *json_type_to_str(JSONValueType type) { - switch (type) { - case JSON_UNDEFINED: - return "undefined"; - case JSON_NULL: - return "null"; - case JSON_STRING: - return "string"; - case JSON_NUMBER: - return "number"; - case JSON_FALSE: - return "false"; - case JSON_TRUE: - return "true"; - case JSON_ARRAY: - return "array"; - case JSON_OBJECT: - return "object"; - } - assert(0); - return "???"; -} - -static bool json_parse_value(JSON *json, u32 *p_index, JSONValue *val); -void json_debug_print_value(const JSON *json, JSONValue value); - -// defining this instead of using isspace seems to be faster -// probably because isspace depends on the locale. -static inline bool json_is_space(char c) { - return c == ' ' || c == '\n' || c == '\r' || c == '\t'; -} - -void json_debug_print_array(const JSON *json, JSONArray array) { - printf("["); - for (u32 i = 0; i < array.len; ++i) { - json_debug_print_value(json, json->values[array.elements + i]); - printf(", "); - } - printf("]"); -} - -void json_debug_print_object(const JSON *json, JSONObject obj) { - printf("{"); - for (u32 i = 0; i < obj.len; ++i) { - json_debug_print_value(json, json->values[obj.items + i]); - printf(": "); - json_debug_print_value(json, json->values[obj.items + obj.len + i]); - printf(", "); - } - printf("}"); -} - -void json_debug_print_string(const JSON *json, JSONString string) { - printf("\"%.*s\"", - (int)string.len, - json->text + string.pos); -} - -void json_debug_print_value(const JSON *json, JSONValue value) { - switch (value.type) { - case JSON_UNDEFINED: printf("undefined"); break; - case JSON_NULL: printf("null"); break; - case JSON_FALSE: printf("false"); break; - case JSON_TRUE: printf("true"); break; - case JSON_NUMBER: printf("%g", value.val.number); break; - case JSON_STRING: { - json_debug_print_string(json, value.val.string); - } break; - case JSON_ARRAY: { - json_debug_print_array(json, value.val.array); - } break; - case JSON_OBJECT: { - json_debug_print_object(json, value.val.object); - } break; - } -} - -// count number of comma-separated values until -// closing ] or } -static u32 json_count(JSON *json, u32 index) { - int bracket_depth = 0; - int brace_depth = 0; - u32 count = 1; - const char *text = json->text; - SKIP_WHITESPACE; - // special case: empty object/array - if (text[index] == '}' || text[index] == ']') - return 0; - -// int mark[5000] = {0}; - for (; ; ++index) { - switch (text[index]) { - case '\0': - return 0; // bad no closing bracket - case '[': - ++bracket_depth; - break; - case ']': - --bracket_depth; - if (bracket_depth < 0) - return count; - break; - case '{': - ++brace_depth; -// mark[index] = 1; - break; - case '}': - --brace_depth; -// mark[index] = 1; - if (brace_depth < 0){ - // useful visualization for debugging -// for (int i = 0; text[i]; ++i ){ -// switch (mark[i]){ -// case 1: printf("\x1b[91m"); break; -// case 2: printf("\x1b[92m"); break; -// } -// printf("%c",text[i]); -// if (mark[i]) printf("\x1b[0m"); -// } -// printf("\n"); - - return count; - } - break; - case ',': - if (bracket_depth == 0 && brace_depth == 0) - ++count; - break; - case '"': { - ++index; // skip opening " - int escaped = 0; - for (; ; ++index) { -// mark[index] = 2; - switch (text[index]) { - case '\0': return 0; // bad no closing quote - case '\\': escaped = !escaped; break; - case '"': - if (!escaped) - goto done; - escaped = false; - break; - default: - escaped = false; - break; - } - } - done:; - } break; - } - } -} - -static bool json_parse_object(JSON *json, u32 *p_index, JSONObject *object) { - u32 index = *p_index; - const char *text = json->text; - ++index; // go past { - u32 count = json_count(json, index); - object->len = count; - object->items = arr_len(json->values); - arr_set_len(json->values, arr_len(json->values) + 2 * count); - - for (u32 i = 0; i < count; ++i) { - if (i > 0) { - if (text[index] != ',') { - strbuf_printf(json->error, "stuff after value in object"); - return false; - } - ++index; - } - SKIP_WHITESPACE; - JSONValue name = {0}, value = {0}; - if (!json_parse_value(json, &index, &name)) - return false; - SKIP_WHITESPACE; - if (text[index] != ':') { - strbuf_printf(json->error, "stuff after name in object"); - return false; - } - ++index; // skip : - SKIP_WHITESPACE; - if (!json_parse_value(json, &index, &value)) - return false; - SKIP_WHITESPACE; - json->values[object->items + i] = name; - json->values[object->items + count + i] = value; - } - - if (text[index] != '}') { - strbuf_printf(json->error, "mismatched brackets or quotes."); - return false; - } - ++index; // skip } - *p_index = index; - return true; -} - -static bool json_parse_array(JSON *json, u32 *p_index, JSONArray *array) { - u32 index = *p_index; - const char *text = json->text; - ++index; // go past [ - u32 count = json_count(json, index); - array->len = count; - array->elements = arr_len(json->values); - - arr_set_len(json->values, arr_len(json->values) + count); - - SKIP_WHITESPACE; - - for (u32 i = 0; i < count; ++i) { - if (i > 0) { - if (text[index] != ',') { - strbuf_printf(json->error, "stuff after element in array"); - return false; - } - ++index; - } - SKIP_WHITESPACE; - JSONValue element = {0}; - if (!json_parse_value(json, &index, &element)) - return false; - SKIP_WHITESPACE; - json->values[array->elements + i] = element; - } - - if (text[index] != ']') { - strbuf_printf(json->error, "mismatched brackets or quotes."); - return false; - } - ++index; // skip ] - *p_index = index; - return true; -} - -static bool json_parse_string(JSON *json, u32 *p_index, JSONString *string) { - u32 index = *p_index; - ++index; // skip opening " - string->pos = index; - const char *text = json->text; - bool escaped = false; - for (; ; ++index) { - switch (text[index]) { - case '"': - if (!escaped) - goto done; - escaped = false; - break; - case '\\': - escaped = !escaped; - break; - case '\0': - strbuf_printf(json->error, "string literal goes to end of JSON"); - return false; - default: - escaped = false; - break; - } - } - done: - string->len = index - string->pos; - ++index; // skip closing " - *p_index = index; - return true; -} - -static bool json_parse_number(JSON *json, u32 *p_index, double *number) { - char *endp = NULL; - const char *text = json->text; - u32 index = *p_index; - *number = strtod(text + index, &endp); - if (endp == text + index) { - strbuf_printf(json->error, "bad number"); - return false; - } - index = (u32)(endp - text); - *p_index = index; - return true; -} - -static bool json_parse_value(JSON *json, u32 *p_index, JSONValue *val) { - const char *text = json->text; - u32 index = *p_index; - SKIP_WHITESPACE; - switch (text[index]) { - case '{': - val->type = JSON_OBJECT; - if (!json_parse_object(json, &index, &val->val.object)) - return false; - break; - case '[': - val->type = JSON_ARRAY; - if (!json_parse_array(json, &index, &val->val.array)) - return false; - break; - case '"': - val->type = JSON_STRING; - if (!json_parse_string(json, &index, &val->val.string)) - return false; - break; - case ANY_DIGIT: - case '-': - case '+': - val->type = JSON_NUMBER; - if (!json_parse_number(json, &index, &val->val.number)) - return false; - break; - case 'f': - val->type = JSON_FALSE; - if (!str_has_prefix(&text[index], "false")) - return false; - index += 5; - break; - case 't': - val->type = JSON_TRUE; - if (!str_has_prefix(&text[index], "true")) - return false; - index += 4; - break; - case 'n': - val->type = JSON_NULL; - if (!str_has_prefix(&text[index], "null")) - return false; - index += 4; - break; - default: - strbuf_printf(json->error, "bad value"); - return false; - } - *p_index = index; - return true; -} - -void json_free(JSON *json) { - arr_free(json->values); - // important we don't zero json here because we want to preserve json->error. - if (json->is_text_copied) { - free((void*)json->text); - } - json->text = NULL; -} - -// NOTE: text must live as long as json!!! -bool json_parse(JSON *json, const char *text) { - memset(json, 0, sizeof *json); - json->text = text; - arr_reserve(json->values, strlen(text) / 8); - arr_addp(json->values); // add root - JSONValue val = {0}; - u32 index = 0; - if (!json_parse_value(json, &index, &val)) { - json_free(json); - return false; - } - SKIP_WHITESPACE; - if (text[index]) { - json_free(json); - strbuf_printf(json->error, "extra text after end of root object"); - return false; - } - json->values[0] = val; - return true; -} - -// like json_parse, but a copy of text is made, so you can free/overwrite it immediately. -bool json_parse_copy(JSON *json, const char *text) { - bool success = json_parse(json, str_dup(text)); - if (success) { - json->is_text_copied = true; - return true; - } else { - free((void*)json->text); - json->text = NULL; - return false; - } -} - -static bool json_streq(const JSON *json, const JSONString *string, const char *name) { - const char *p = &json->text[string->pos]; - const char *end = p + string->len; - for (; p < end; ++p, ++name) { - if (*name != *p) - return false; - } - return *name == '\0'; -} - -// returns undefined if the property `name` does not exist. -JSONValue json_object_get(const JSON *json, JSONObject object, const char *name) { - const JSONValue *items = &json->values[object.items]; - for (u32 i = 0; i < object.len; ++i) { - const JSONValue *this_name = items++; - if (this_name->type == JSON_STRING && json_streq(json, &this_name->val.string, name)) { - return json->values[object.items + object.len + i]; - } - } - return (JSONValue){0}; -} - -JSONValue json_array_get(const JSON *json, JSONArray array, u64 i) { - if (i < array.len) { - return json->values[array.elements + i]; - } - return (JSONValue){0}; -} - -// returns NaN if `x` is not a number (ha ha). -double json_force_number(JSONValue x) { - if (x.type == JSON_NUMBER) { - return x.val.number; - } else { - return NAN; - } -} - -double json_object_get_number(const JSON *json, JSONObject object, const char *name) { - return json_force_number(json_object_get(json, object, name)); -} - -double json_array_get_number(const JSON *json, JSONArray array, size_t i) { - return json_force_number(json_array_get(json, array, i)); -} - -bool json_force_bool(JSONValue x, bool default_value) { - if (x.type == JSON_TRUE) return true; - if (x.type == JSON_FALSE) return false; - return default_value; -} - -bool json_object_get_bool(const JSON *json, JSONObject object, const char *name, bool default_value) { - return json_force_bool(json_object_get(json, object, name), default_value); -} - -bool json_array_get_bool(const JSON *json, JSONArray array, size_t i, bool default_value) { - return json_force_bool(json_array_get(json, array, i), default_value); -} - -// returns (JSONString){0} (which is interpreted as an empty string) if `x` is not a string -JSONString json_force_string(JSONValue x) { - if (x.type == JSON_STRING) { - return x.val.string; - } else { - return (JSONString){0}; - } -} - -JSONString json_object_get_string(const JSON *json, JSONObject object, const char *name) { - return json_force_string(json_object_get(json, object, name)); -} - -JSONString json_array_get_string(const JSON *json, JSONArray array, size_t i) { - return json_force_string(json_array_get(json, array, i)); -} - -// returns (JSONObject){0} (which is interpreted as an empty object) if `x` is not an object -JSONObject json_force_object(JSONValue x) { - if (x.type == JSON_OBJECT) { - return x.val.object; - } else { - return (JSONObject){0}; - } -} - -JSONObject json_object_get_object(const JSON *json, JSONObject object, const char *name) { - return json_force_object(json_object_get(json, object, name)); -} - -JSONObject json_array_get_object(const JSON *json, JSONArray array, size_t i) { - return json_force_object(json_array_get(json, array, i)); -} - -// returns (JSONArray){0} (which is interpreted as an empty array) if `x` is not an array -JSONArray json_force_array(JSONValue x) { - if (x.type == JSON_ARRAY) { - return x.val.array; - } else { - return (JSONArray){0}; - } -} - -JSONArray json_object_get_array(const JSON *json, JSONObject object, const char *name) { - return json_force_array(json_object_get(json, object, name)); -} - -JSONArray json_array_get_array(const JSON *json, JSONArray array, size_t i) { - return json_force_array(json_array_get(json, array, i)); -} - -// e.g. if json is { "a" : { "b": 3 }}, then json_get(json, "a.b") = 3. -// returns undefined if there is no such property -JSONValue json_get(const JSON *json, const char *path) { - char segment[128]; - const char *p = path; - if (!json->values) { - return (JSONValue){0}; - } - JSONValue curr_value = json->values[0]; - while (*p) { - size_t segment_len = strcspn(p, "."); - strn_cpy(segment, sizeof segment, p, segment_len); - if (curr_value.type != JSON_OBJECT) { - return (JSONValue){0}; - } - curr_value = json_object_get(json, curr_value.val.object, segment); - p += segment_len; - if (*p == '.') ++p; - } - return curr_value; -} - -// equivalent to json_get(json, path).type != JSON_UNDEFINED, but more readable -bool json_has(const JSON *json, const char *path) { - JSONValue value = json_get(json, path); - return value.type != JSON_UNDEFINED; -} - -// turn a json string into a null terminated string. -// this won't be nice if the json string includes \u0000 but that's rare. -// if buf_sz > string->len, the string will fit. -void json_string_get(const JSON *json, JSONString string, char *buf, size_t buf_sz) { - const char *text = json->text; - if (buf_sz == 0) { - assert(0); - return; - } - char *buf_end = buf + buf_sz - 1; - for (u32 i = string.pos, end = string.pos + string.len; i < end && buf < buf_end; ++i) { - if (text[i] != '\\') { - *buf++ = text[i]; - } else { - ++i; - if (i >= end) break; - // escape sequence - switch (text[i]) { - case 'n': *buf++ = '\n'; break; - case 'r': *buf++ = '\r'; break; - case 'b': *buf++ = '\b'; break; - case 't': *buf++ = '\t'; break; - case 'f': *buf++ = '\f'; break; - case '\\': *buf++ = '\\'; break; - case '/': *buf++ = '/'; break; - case '"': *buf++ = '"'; break; - case 'u': { - if ((buf_end - buf) < 4 || i + 5 > end) - goto brk; - ++i; - - char hex[5] = {0}; - hex[0] = text[i++]; - hex[1] = text[i++]; - hex[2] = text[i++]; - hex[3] = text[i++]; - unsigned code_point=0; - sscanf(hex, "%04x", &code_point); - // technically this won't deal with people writing out UTF-16 surrogate halves - // using \u. i dont care. - size_t n = unicode_utf32_to_utf8(buf, code_point); - if (n <= 4) buf += n; - } break; - } - } - } - brk: - *buf = '\0'; -} - -// returns a malloc'd null-terminated string. -static char *json_string_get_alloc(const JSON *json, JSONString string) { - u32 n = string.len + 1; - if (n == 0) --n; // extreme edge case - char *buf = calloc(1, n); - json_string_get(json, string, buf, n); - return buf; -} - - -#if __unix__ -static void json_test_time_large(const char *filename) { - struct timespec start={0},end={0}; - FILE *fp = fopen(filename,"rb"); - if (!fp) { - perror(filename); - return; - } - - fseek(fp,0,SEEK_END); - size_t sz = (size_t)ftell(fp); - char *buf = calloc(1,sz+1); - rewind(fp); - fread(buf, 1, sz, fp); - fclose(fp); - for (int trial = 0; trial < 5; ++trial) { - clock_gettime(CLOCK_MONOTONIC, &start); - JSON json={0}; - bool success = json_parse(&json, buf); - if (!success) { - printf("FAIL: %s\n",json.error); - return; - } - - json_free(&json); - clock_gettime(CLOCK_MONOTONIC, &end); - - - - printf("time: %.1fms\n", - ((double)end.tv_sec*1e3+(double)end.tv_nsec*1e-6) - -((double)start.tv_sec*1e3+(double)start.tv_nsec*1e-6)); - } - -} -static void json_test_time_small(void) { - struct timespec start={0},end={0}; - int trials = 50000000; - clock_gettime(CLOCK_MONOTONIC, &start); - for (int trial = 0; trial < trials; ++trial) { - JSON json={0}; - bool success = json_parse(&json, "{\"hello\":\"there\"}"); - if (!success) { - printf("FAIL: %s\n",json.error); - return; - } - - json_free(&json); - } - clock_gettime(CLOCK_MONOTONIC, &end); - printf("time per trial: %.1fns\n", - (((double)end.tv_sec*1e9+(double)end.tv_nsec) - -((double)start.tv_sec*1e9+(double)start.tv_nsec)) - / trials); - -} -#endif - -void json_debug_print(const JSON *json) { - printf("%u values (capacity %u, text length %zu)\n", - arr_len(json->values), arr_cap(json->values), strlen(json->text)); - json_debug_print_value(json, json->values[0]); -} - -// e.g. converts "Hello\nworld" to "Hello\\nworld" -// if out_sz is at least 2 * strlen(str) + 1, the string will fit. -// returns the number of bytes actually written, not including the null terminator. -size_t json_escape_to(char *out, size_t out_sz, const char *in) { - char *start = out; - char *end = out + out_sz; - assert(out_sz); - - --end; // leave room for null terminator - - for (; *in; ++in) { - if (out + 1 > end) { - break; - } - char esc = '\0'; - switch (*in) { - case '\0': goto brk; - case '\n': - esc = 'n'; - goto escape; - case '\\': - esc = '\\'; - goto escape; - case '"': - esc = '"'; - goto escape; - case '\t': - esc = 't'; - goto escape; - case '\r': - esc = 'r'; - goto escape; - case '\f': - esc = 'f'; - goto escape; - case '\b': - esc = 'b'; - goto escape; - escape: - if (out + 2 > end) - goto brk; - *out++ = '\\'; - *out++ = esc; - break; - default: - *out = *in; - ++out; - break; - } - } - brk: - *out = '\0'; - return (size_t)(out - start); -} - -// e.g. converts "Hello\nworld" to "Hello\\nworld" -// the resulting string should be free'd. -char *json_escape(const char *str) { - size_t out_sz = 2 * strlen(str) + 1; - char *out = calloc(1, out_sz); - json_escape_to(out, out_sz, str); - return out; -} - -#undef SKIP_WHITESPACE |