// JSON parser for LSP // provides FAST(ish) parsing but SLOW lookup // this is especially fast for small objects typedef struct { u32 pos; u32 len; } JSONString; typedef struct JSONValue JSONValue; typedef struct { u32 len; // this is an index into the values array // values[items..items+len] store the names // values[items+len..items+2*len] store the values u32 items; } JSONObject; typedef struct { u32 len; // this is an index into the values array // values[elements..elements+len] are the elements u32 elements; } JSONArray; enum { JSON_NULL, JSON_FALSE, JSON_TRUE, JSON_NUMBER, JSON_STRING, JSON_OBJECT, JSON_ARRAY }; struct JSONValue { u8 type; union { double number; JSONString string; JSONArray array; JSONObject object; } val; }; typedef struct { char error[64]; const char *text; // root = values[0] JSONValue *values; } JSON; #define SKIP_WHITESPACE while (json_is_space(text[index])) ++index; static bool json_parse_value(JSON *json, u32 *p_index, JSONValue *val); // defining this instead of using isspace seems to be faster // probably because isspace depends on the locale. static inline bool json_is_space(char c) { return c == ' ' || c == '\n' || c == '\r' || c == '\t'; } static void json_debug_print_value(const JSON *json, u32 idx) { JSONValue *value = &json->values[idx]; switch (value->type) { case JSON_NULL: printf("null"); break; case JSON_FALSE: printf("false"); break; case JSON_TRUE: printf("true"); break; case JSON_NUMBER: printf("%f", value->val.number); break; case JSON_STRING: { JSONString *string = &value->val.string; printf("\"%.*s\"", (int)string->len, json->text + string->pos); } break; case JSON_ARRAY: { JSONArray *array = &value->val.array; printf("["); for (u32 i = 0; i < array->len; ++i) { json_debug_print_value(json, array->elements + i); printf(", "); } printf("]"); } break; case JSON_OBJECT: { JSONObject *obj = &value->val.object; printf("{"); for (u32 i = 0; i < obj->len; ++i) { json_debug_print_value(json, obj->items + i); printf(": "); json_debug_print_value(json, obj->items + obj->len + i); printf(", "); } printf("}"); } break; } } // count number of comma-separated values until // closing ] or } static u32 json_count(JSON *json, u32 index) { int bracket_depth = 0; int brace_depth = 0; u32 count = 1; const char *text = json->text; SKIP_WHITESPACE; // special case: empty object/array if (text[index] == '}' || text[index] == ']') return 0; // int mark[5000] = {0}; for (; ; ++index) { switch (text[index]) { case '\0': return 0; // bad no closing bracket case '[': ++bracket_depth; break; case ']': --bracket_depth; if (bracket_depth < 0) return count; break; case '{': ++brace_depth; // mark[index] = 1; break; case '}': --brace_depth; // mark[index] = 1; if (brace_depth < 0){ // useful visualization for debugging // for (int i = 0; text[i]; ++i ){ // switch (mark[i]){ // case 1: printf("\x1b[91m"); break; // case 2: printf("\x1b[92m"); break; // } // printf("%c",text[i]); // if (mark[i]) printf("\x1b[0m"); // } // printf("\n"); return count; } break; case ',': if (bracket_depth == 0 && brace_depth == 0) ++count; break; case '"': { ++index; // skip opening " int escaped = 0; for (; ; ++index) { // mark[index] = 2; switch (text[index]) { case '\0': return 0; // bad no closing quote case '\\': escaped = !escaped; break; case '"': if (!escaped) goto done; escaped = false; break; default: escaped = false; break; } } done:; } break; } } } static bool json_parse_object(JSON *json, u32 *p_index, JSONObject *object) { u32 index = *p_index; const char *text = json->text; ++index; // go past { u32 count = json_count(json, index); object->len = count; object->items = arr_len(json->values); arr_set_len(json->values, arr_len(json->values) + 2 * count); for (u32 i = 0; i < count; ++i) { if (i > 0) { if (text[index] != ',') { strbuf_printf(json->error, "stuff after value in object"); return false; } ++index; } SKIP_WHITESPACE; JSONValue name = {0}, value = {0}; if (!json_parse_value(json, &index, &name)) return false; SKIP_WHITESPACE; if (text[index] != ':') { strbuf_printf(json->error, "stuff after name in object"); return false; } ++index; // skip : SKIP_WHITESPACE; if (!json_parse_value(json, &index, &value)) return false; SKIP_WHITESPACE; json->values[object->items + i] = name; json->values[object->items + count + i] = value; } if (text[index] != '}') { strbuf_printf(json->error, "mismatched brackets or quotes."); return false; } ++index; // skip } *p_index = index; return true; } static bool json_parse_array(JSON *json, u32 *p_index, JSONArray *array) { u32 index = *p_index; const char *text = json->text; ++index; // go past [ u32 count = json_count(json, index); array->len = count; array->elements = arr_len(json->values); arr_set_len(json->values, arr_len(json->values) + count); SKIP_WHITESPACE; for (u32 i = 0; i < count; ++i) { if (i > 0) { if (text[index] != ',') { strbuf_printf(json->error, "stuff after element in array"); return false; } ++index; } SKIP_WHITESPACE; JSONValue element = {0}; if (!json_parse_value(json, &index, &element)) return false; SKIP_WHITESPACE; json->values[array->elements + i] = element; } if (text[index] != ']') { strbuf_printf(json->error, "mismatched brackets or quotes."); return false; } ++index; // skip ] *p_index = index; return true; } static bool json_parse_string(JSON *json, u32 *p_index, JSONString *string) { u32 index = *p_index; ++index; // skip opening " string->pos = index; const char *text = json->text; bool escaped = false; for (; ; ++index) { switch (text[index]) { case '"': if (!escaped) goto done; escaped = false; break; case '\\': escaped = !escaped; break; case '\0': strbuf_printf(json->error, "string literal goes to end of JSON"); return false; default: escaped = false; break; } } done: string->len = index - string->pos; ++index; // skip closing " *p_index = index; return true; } static bool json_parse_number(JSON *json, u32 *p_index, double *number) { char *endp = NULL; const char *text = json->text; u32 index = *p_index; *number = strtod(text + index, &endp); if (endp == text + index) { strbuf_printf(json->error, "bad number"); return false; } index = (u32)(endp - text); *p_index = index; return true; } static bool json_parse_value(JSON *json, u32 *p_index, JSONValue *val) { const char *text = json->text; u32 index = *p_index; SKIP_WHITESPACE; switch (text[index]) { case '{': val->type = JSON_OBJECT; if (!json_parse_object(json, &index, &val->val.object)) return false; break; case '[': val->type = JSON_ARRAY; if (!json_parse_array(json, &index, &val->val.array)) return false; break; case '"': val->type = JSON_STRING; if (!json_parse_string(json, &index, &val->val.string)) return false; break; case ANY_DIGIT: val->type = JSON_NUMBER; if (!json_parse_number(json, &index, &val->val.number)) return false; break; case 'f': val->type = JSON_FALSE; if (!str_has_prefix(&text[index], "false")) return false; index += 5; break; case 't': val->type = JSON_TRUE; if (!str_has_prefix(&text[index], "true")) return false; index += 4; break; case 'n': val->type = JSON_NULL; if (!str_has_prefix(&text[index], "null")) return false; index += 4; break; default: strbuf_printf(json->error, "bad value"); return false; } *p_index = index; return true; } // NOTE: text must live as long as json!!! static bool json_parse(JSON *json, const char *text) { memset(json, 0, sizeof *json); json->text = text; // @TODO: is this a good capacity? arr_reserve(json->values, strlen(text) / 8); arr_addp(json->values); // add root JSONValue val = {0}; u32 index = 0; if (!json_parse_value(json, &index, &val)) return false; SKIP_WHITESPACE; if (text[index]) { strbuf_printf(json->error, "extra text after end of root object"); return false; } json->values[0] = val; return true; } static void json_free(JSON *json) { arr_free(json->values); memset(json, 0, sizeof *json); } #if __unix__ static void json_test_time_large(const char *filename) { struct timespec start={0},end={0}; FILE *fp = fopen(filename,"rb"); if (!fp) { perror(filename); return; } fseek(fp,0,SEEK_END); size_t sz = (size_t)ftell(fp); char *buf = calloc(1,sz+1); rewind(fp); fread(buf, 1, sz, fp); fclose(fp); for (int trial = 0; trial < 5; ++trial) { clock_gettime(CLOCK_MONOTONIC, &start); JSON json={0}; bool success = json_parse(&json, buf); if (!success) { printf("FAIL: %s\n",json.error); return; } json_free(&json); clock_gettime(CLOCK_MONOTONIC, &end); printf("time: %.1fms\n", ((double)end.tv_sec*1e3+(double)end.tv_nsec*1e-6) -((double)start.tv_sec*1e3+(double)start.tv_nsec*1e-6)); } } static void json_test_time_small(void) { struct timespec start={0},end={0}; int trials = 50000000; clock_gettime(CLOCK_MONOTONIC, &start); for (int trial = 0; trial < trials; ++trial) { JSON json={0}; bool success = json_parse(&json, "{\"hello\":\"there\"}"); if (!success) { printf("FAIL: %s\n",json.error); return; } json_free(&json); } clock_gettime(CLOCK_MONOTONIC, &end); printf("time per trial: %.1fns\n", (((double)end.tv_sec*1e9+(double)end.tv_nsec) -((double)start.tv_sec*1e9+(double)start.tv_nsec)) / trials); } #endif static void json_debug_print(const JSON *json) { printf("%u values (capacity %u, text length %zu)\n", arr_len(json->values), arr_cap(json->values), strlen(json->text)); json_debug_print_value(json, 0); } #undef SKIP_WHITESPACE