1 files changed, 713 insertions, 0 deletions
diff --git a/json.c b/json.c
new file mode 100644
index 0000000..e07b0ec
--- /dev/null
+++ b/json.c
@@ -0,0 +1,713 @@
+// JSON parser for LSP
+// provides FAST(ish) parsing but SLOW lookup
+// this is especially fast for small objects
+// this actually supports "extended json", where objects can have arbitrary values as keys.
+
+typedef struct {
+	u32 pos;
+	u32 len;
+} JSONString;
+
+typedef struct JSONValue JSONValue;
+
+typedef struct {
+	u32 len;
+	// this is an index into the values array
+	//   values[items..items+len] store the names
+	//   values[items+len..items+2*len] store the values
+	u32 items;
+} JSONObject;
+
+typedef struct {
+	u32 len;
+	// this is an index into the values array
+	//    values[elements..elements+len] are the elements
+	u32 elements;
+} JSONArray;
+
+typedef enum {
+	// note: json doesn't actually include undefined.
+	// this is only for returning things from json_get etc.
+	JSON_UNDEFINED,
+	JSON_NULL,
+	JSON_FALSE,
+	JSON_TRUE,
+	JSON_NUMBER,
+	JSON_STRING,
+	JSON_OBJECT,
+	JSON_ARRAY
+} JSONValueType;
+
+struct JSONValue {
+	JSONValueType type;
+	union {
+		double number;
+		JSONString string;
+		JSONArray array;
+		JSONObject object;
+	} val;
+};
+
+
+typedef struct {
+	char error[64];
+	bool is_text_copied; // if this is true, then json_free will call free on text
+	const char *text;
+	// root = values[0]
+	JSONValue *values;
+} JSON;
+
+#define SKIP_WHITESPACE while (json_is_space(text[index])) ++index;
+
+const char *json_type_to_str(JSONValueType type) {
+	switch (type) {
+	case JSON_UNDEFINED:
+		return "undefined";
+	case JSON_NULL:
+		return "null";
+	case JSON_STRING:
+		return "string";
+	case JSON_NUMBER:
+		return "number";
+	case JSON_FALSE:
+		return "false";
+	case JSON_TRUE:
+		return "true";
+	case JSON_ARRAY:
+		return "array";
+	case JSON_OBJECT:
+		return "object";
+	}
+}
+
+static bool json_parse_value(JSON *json, u32 *p_index, JSONValue *val);
+
+// defining this instead of using isspace seems to be faster
+// probably because isspace depends on the locale.
+static inline bool json_is_space(char c) {
+	return c == ' ' || c == '\n' || c == '\r' || c == '\t';
+}
+
+static void json_debug_print_value(const JSON *json, JSONValue value) {
+	switch (value.type) {
+	case JSON_UNDEFINED: printf("undefined"); break;
+	case JSON_NULL: printf("null"); break;
+	case JSON_FALSE: printf("false"); break;
+	case JSON_TRUE: printf("true"); break;
+	case JSON_NUMBER: printf("%g", value.val.number); break;
+	case JSON_STRING: {
+		JSONString string = value.val.string;
+		printf("\"%.*s\"",
+			(int)string.len,
+			json->text + string.pos);
+		} break;
+	case JSON_ARRAY: {
+		JSONArray array = value.val.array;
+		printf("[");
+		for (u32 i = 0; i < array.len; ++i) {
+			json_debug_print_value(json, json->values[array.elements + i]);
+			printf(", ");
+		}
+		printf("]");
+	} break;
+	case JSON_OBJECT: {
+		JSONObject obj = value.val.object;
+		printf("{");
+		for (u32 i = 0; i < obj.len; ++i) {
+			json_debug_print_value(json, json->values[obj.items + i]);
+			printf(": ");
+			json_debug_print_value(json, json->values[obj.items + obj.len + i]);
+			printf(", ");
+		}
+		printf("}");
+		} break;
+	}
+}
+
+// count number of comma-separated values until
+// closing ] or }
+static u32 json_count(JSON *json, u32 index) {
+	int bracket_depth = 0;
+	int brace_depth = 0;
+	u32 count = 1;
+	const char *text = json->text;
+	SKIP_WHITESPACE;
+	// special case: empty object/array
+	if (text[index] == '}' || text[index] == ']')
+		return 0;
+	
+// 	int mark[5000] = {0};
+	for (; ; ++index) {
+		switch (text[index]) {
+		case '\0':
+			return 0; // bad no closing bracket
+		case '[':
+			++bracket_depth;
+			break;
+		case ']':
+			--bracket_depth;
+			if (bracket_depth < 0)
+				return count;
+			break;
+		case '{':
+			++brace_depth;
+// 			mark[index] = 1;
+			break;
+		case '}':
+			--brace_depth;
+// 			mark[index] = 1;
+			if (brace_depth < 0){
+			// useful visualization for debugging
+// 			for (int i = 0; text[i]; ++i ){
+// 				switch (mark[i]){
+// 				case 1: printf("\x1b[91m"); break;
+// 				case 2: printf("\x1b[92m"); break;
+// 				}
+// 				printf("%c",text[i]);
+// 				if (mark[i]) printf("\x1b[0m");
+// 			}
+// 			printf("\n");
+			
+				return count;
+			}
+			break;
+		case ',':
+			if (bracket_depth == 0 && brace_depth == 0)
+				++count;
+			break;
+		case '"': {
+			++index; // skip opening "
+			int escaped = 0;
+			for (; ; ++index) {
+// 				mark[index] = 2;
+				switch (text[index]) {
+				case '\0': return 0; // bad no closing quote
+				case '\\': escaped = !escaped; break;
+				case '"':
+					if (!escaped)
+						goto done;
+					escaped = false;
+					break;
+				default:
+					escaped = false;
+					break;
+				}
+			}
+			done:;
+			} break;
+		}
+	}
+}
+
+static bool json_parse_object(JSON *json, u32 *p_index, JSONObject *object) {
+	u32 index = *p_index;
+	const char *text = json->text;
+	++index; // go past {
+	u32 count = json_count(json, index);
+	object->len = count;
+	object->items = arr_len(json->values);
+	arr_set_len(json->values, arr_len(json->values) + 2 * count);
+	
+	for (u32 i = 0; i < count; ++i) {
+		if (i > 0) {
+			if (text[index] != ',') {
+				strbuf_printf(json->error, "stuff after value in object");
+				return false;
+			}
+			++index;
+		}
+		SKIP_WHITESPACE;
+		JSONValue name = {0}, value = {0};
+		if (!json_parse_value(json, &index, &name))
+			return false;
+		SKIP_WHITESPACE;
+		if (text[index] != ':') {
+			strbuf_printf(json->error, "stuff after name in object");
+			return false;
+		}
+		++index; // skip :
+		SKIP_WHITESPACE;
+		if (!json_parse_value(json, &index, &value))
+			return false;
+		SKIP_WHITESPACE;
+		json->values[object->items + i] = name;
+		json->values[object->items + count + i] = value;
+	}
+	
+	if (text[index] != '}') {
+		strbuf_printf(json->error, "mismatched brackets or quotes.");
+		return false;
+	}
+	++index; // skip }
+	*p_index = index;
+	return true;
+}
+
+static bool json_parse_array(JSON *json, u32 *p_index, JSONArray *array) {
+	u32 index = *p_index;
+	const char *text = json->text;
+	++index; // go past [
+	u32 count = json_count(json, index);
+	array->len = count;
+	array->elements = arr_len(json->values);
+	
+	arr_set_len(json->values, arr_len(json->values) + count);
+	
+	SKIP_WHITESPACE;
+	
+	for (u32 i = 0; i < count; ++i) {
+		if (i > 0) {
+			if (text[index] != ',') {
+				strbuf_printf(json->error, "stuff after element in array");
+				return false;
+			}
+			++index;
+		}
+		SKIP_WHITESPACE;
+		JSONValue element = {0};
+		if (!json_parse_value(json, &index, &element))
+			return false;
+		SKIP_WHITESPACE;
+		json->values[array->elements + i] = element;
+	}
+	
+	if (text[index] != ']') {
+		strbuf_printf(json->error, "mismatched brackets or quotes.");
+		return false;
+	}
+	++index; // skip ]
+	*p_index = index;
+	return true;
+}
+
+static bool json_parse_string(JSON *json, u32 *p_index, JSONString *string) {
+	u32 index = *p_index;
+	++index; // skip opening "
+	string->pos = index;
+	const char *text = json->text;
+	bool escaped = false;
+	for (; ; ++index) {
+		switch (text[index]) {
+		case '"':
+			if (!escaped)
+				goto done;
+			escaped = false;
+			break;
+		case '\\':
+			escaped = !escaped;
+			break;
+		case '\0':
+			strbuf_printf(json->error, "string literal goes to end of JSON");
+			return false;
+		default:
+			escaped = false;
+			break;
+		}
+	}
+	done:
+	string->len = index - string->pos;
+	++index; // skip closing "
+	*p_index = index;
+	return true;
+}
+
+static bool json_parse_number(JSON *json, u32 *p_index, double *number) {
+	char *endp = NULL;
+	const char *text = json->text;
+	u32 index = *p_index;
+	*number = strtod(text + index, &endp);
+	if (endp == text + index) {
+		strbuf_printf(json->error, "bad number");
+		return false;
+	}
+	index = (u32)(endp - text);
+	*p_index = index;
+	return true;
+}
+
+static bool json_parse_value(JSON *json, u32 *p_index, JSONValue *val) {
+	const char *text = json->text;
+	u32 index = *p_index;
+	SKIP_WHITESPACE;
+	switch (text[index]) {
+	case '{':
+		val->type = JSON_OBJECT;
+		if (!json_parse_object(json, &index, &val->val.object))
+			return false;
+		break;
+	case '[':
+		val->type = JSON_ARRAY;
+		if (!json_parse_array(json, &index, &val->val.array))
+			return false;
+		break;
+	case '"':
+		val->type = JSON_STRING;
+		if (!json_parse_string(json, &index, &val->val.string))
+			return false;
+		break;
+	case ANY_DIGIT:
+	case '-':
+	case '+':
+		val->type = JSON_NUMBER;
+		if (!json_parse_number(json, &index, &val->val.number))
+			return false;
+		break;
+	case 'f':
+		val->type = JSON_FALSE;
+		if (!str_has_prefix(&text[index], "false"))
+			return false;
+		index += 5;
+		break;
+	case 't':
+		val->type = JSON_TRUE;
+		if (!str_has_prefix(&text[index], "true"))
+			return false;
+		index += 4;
+		break;
+	case 'n':
+		val->type = JSON_NULL;
+		if (!str_has_prefix(&text[index], "null"))
+			return false;
+		index += 4;
+		break;
+	default:
+		strbuf_printf(json->error, "bad value");
+		return false;
+	}
+	*p_index = index;
+	return true;
+}
+
+void json_free(JSON *json) {
+	arr_free(json->values);
+	// important we don't zero json here because we want to preserve json->error.
+	if (json->is_text_copied) {
+		free((void*)json->text);
+	}
+	json->text = NULL;
+}
+
+// NOTE: text must live as long as json!!!
+bool json_parse(JSON *json, const char *text) {
+	memset(json, 0, sizeof *json);
+	json->text = text;
+	arr_reserve(json->values, strlen(text) / 8);
+	arr_addp(json->values); // add root
+	JSONValue val = {0};
+	u32 index = 0;
+	if (!json_parse_value(json, &index, &val)) {
+		json_free(json);
+		return false;
+	}
+	SKIP_WHITESPACE;
+	if (text[index]) {
+		json_free(json);
+		strbuf_printf(json->error, "extra text after end of root object");
+		return false;
+	}
+	json->values[0] = val;
+	return true;
+}
+
+// like json_parse, but a copy of text is made, so you can free/overwrite it immediately.
+bool json_parse_copy(JSON *json, const char *text) {
+	bool success = json_parse(json, str_dup(text));
+	if (success) {
+		json->is_text_copied = true;
+		return true;
+	} else {
+		free((void*)json->text);
+		json->text = NULL;
+		return false;
+	}
+}
+
+static bool json_streq(const JSON *json, const JSONString *string, const char *name) {
+	const char *p = &json->text[string->pos];
+	const char *end = p + string->len;
+	for (; p < end; ++p, ++name) {
+		if (*name != *p)
+			return false;
+	}
+	return *name == '\0';
+}
+
+// returns undefined if the property `name` does not exist.
+JSONValue json_object_get(const JSON *json, JSONObject object, const char *name) {
+	const JSONValue *items = &json->values[object.items];
+	for (u32 i = 0; i < object.len; ++i) {
+		const JSONValue *this_name = items++;
+		if (this_name->type == JSON_STRING && json_streq(json, &this_name->val.string, name)) {
+			return json->values[object.items + object.len + i];
+		}
+	}
+	return (JSONValue){0};
+}
+
+// returns (JSONString){0} (which is interpreted as an empty string) if `name` does
+// not exist or is not a string.
+JSONString json_object_get_string(const JSON *json, JSONObject object, const char *name) {
+	JSONValue value = json_object_get(json, object, name);
+	if (value.type == JSON_STRING) {
+		return value.val.string;
+	} else {
+		return (JSONString){0};
+	}
+}
+
+// returns (JSONObject){0} (which is interpreted as an empty object) if `name` does
+// not exist or is not an object.
+JSONObject json_object_get_object(const JSON *json, JSONObject object, const char *name) {
+	JSONValue value = json_object_get(json, object, name);
+	if (value.type == JSON_OBJECT) {
+		return value.val.object;
+	} else {
+		return (JSONObject){0};
+	}
+}
+
+// returns (JSONArray){0} (which is interpreted as an empty array) if `name` does
+// not exist or is not an array.
+JSONArray json_object_get_array(const JSON *json, JSONObject object, const char *name) {
+	JSONValue value = json_object_get(json, object, name);
+	if (value.type == JSON_ARRAY) {
+		return value.val.array;
+	} else {
+		return (JSONArray){0};
+	}
+}
+
+// returns NaN if `name` does not exist or is not a number.
+double json_object_get_number(const JSON *json, JSONObject object, const char *name) {
+	JSONValue value = json_object_get(json, object, name);
+	if (value.type == JSON_NUMBER) {
+		return value.val.number;
+	} else {
+		return NAN;
+	}
+}
+
+JSONValue json_array_get(const JSON *json, JSONArray array, u64 i) {
+	if (i < array.len) {
+		return json->values[array.elements + i];
+	}
+	return (JSONValue){0};
+}
+
+// e.g. if json is  { "a" : { "b": 3 }}, then json_get(json, "a.b") = 3.
+// returns undefined if there is no such property
+JSONValue json_get(const JSON *json, const char *path) {
+	char segment[128];
+	const char *p = path;
+	if (!json->values) {
+		return (JSONValue){0};
+	}
+	JSONValue curr_value = json->values[0];
+	while (*p) {
+		size_t segment_len = strcspn(p, ".");
+		strn_cpy(segment, sizeof segment, p, segment_len);
+		if (curr_value.type != JSON_OBJECT) {
+			return (JSONValue){0};
+		}
+		curr_value = json_object_get(json, curr_value.val.object, segment);
+		p += segment_len;
+		if (*p == '.') ++p;
+	}
+	return curr_value;
+}
+
+// equivalent to json_get(json, path).type != JSON_UNDEFINED, but more readable
+bool json_has(const JSON *json, const char *path) {
+	JSONValue value = json_get(json, path);
+	return value.type != JSON_UNDEFINED;
+}
+
+// turn a json string into a null terminated string.
+// this won't be nice if the json string includes \u0000 but that's rare.
+// if buf_sz > string->len, the string will fit.
+void json_string_get(const JSON *json, JSONString string, char *buf, size_t buf_sz) {
+	const char *text = json->text;
+	if (buf_sz == 0) {
+		assert(0);
+		return;
+	}
+	char *buf_end = buf + buf_sz - 1;
+	for (u32 i = string.pos, end = string.pos + string.len; i < end && buf < buf_end; ++i) {
+		if (text[i] != '\\') {
+			*buf++ = text[i];
+		} else {
+			++i;
+			if (i >= end) break;
+			// escape sequence
+			switch (text[i]) {
+			case 'n': *buf++ = '\n'; break;
+			case 'r': *buf++ = '\r'; break;
+			case 'b': *buf++ = '\b'; break;
+			case 't': *buf++ = '\t'; break;
+			case 'f': *buf++ = '\f'; break;
+			case '\\': *buf++ = '\\'; break;
+			case '/': *buf++ = '/'; break;
+			case '"': *buf++ = '"'; break;
+			case 'u': {
+				if ((buf_end - buf) < 4 || i + 5 > end)
+					goto brk;
+				++i;
+				
+				char hex[5] = {0};
+				hex[0] = text[i++];
+				hex[1] = text[i++];
+				hex[2] = text[i++];
+				hex[3] = text[i++];
+				unsigned code_point=0;
+				sscanf(hex, "%04x", &code_point);
+				// technically this won't deal with people writing out UTF-16 surrogate halves
+				// using \u. i dont care.
+				size_t n = unicode_utf32_to_utf8(buf, code_point);
+				if (n <= 4) buf += n;
+				} break;
+			}
+		}
+	}
+	brk:
+	*buf = '\0';
+}
+
+// returns a malloc'd null-terminated string.
+static char *json_string_get_alloc(const JSON *json, JSONString string) {
+	u32 n = string.len + 1;
+	if (n == 0) --n; // extreme edge case
+	char *buf = calloc(1, n);
+	json_string_get(json, string, buf, n);
+	return buf;
+}
+
+
+#if __unix__
+static void json_test_time_large(const char *filename) {
+	struct timespec start={0},end={0};
+	FILE *fp = fopen(filename,"rb");
+	if (!fp) {
+		perror(filename);
+		return;
+	}
+	
+	fseek(fp,0,SEEK_END);
+	size_t sz = (size_t)ftell(fp);
+	char *buf = calloc(1,sz+1);
+	rewind(fp);
+	fread(buf, 1, sz, fp);
+	fclose(fp);
+	for (int trial = 0; trial < 5; ++trial) {
+		clock_gettime(CLOCK_MONOTONIC, &start);
+		JSON json={0};
+		bool success = json_parse(&json, buf);
+		if (!success) {
+			printf("FAIL: %s\n",json.error);
+			return;
+		}
+		
+		json_free(&json);
+		clock_gettime(CLOCK_MONOTONIC, &end);
+		
+		
+		
+		printf("time: %.1fms\n",
+			((double)end.tv_sec*1e3+(double)end.tv_nsec*1e-6)
+			-((double)start.tv_sec*1e3+(double)start.tv_nsec*1e-6));
+	}
+		
+}
+static void json_test_time_small(void) {
+	struct timespec start={0},end={0};
+	int trials = 50000000;
+	clock_gettime(CLOCK_MONOTONIC, &start);
+	for (int trial = 0; trial < trials; ++trial) {
+		JSON json={0};
+		bool success = json_parse(&json, "{\"hello\":\"there\"}");
+		if (!success) {
+			printf("FAIL: %s\n",json.error);
+			return;
+		}
+		
+		json_free(&json);
+	}
+	clock_gettime(CLOCK_MONOTONIC, &end);
+	printf("time per trial: %.1fns\n",
+		(((double)end.tv_sec*1e9+(double)end.tv_nsec)
+		-((double)start.tv_sec*1e9+(double)start.tv_nsec))
+		/ trials);
+		
+}
+#endif
+
+void json_debug_print(const JSON *json) {
+	printf("%u values (capacity %u, text length %zu)\n",
+		arr_len(json->values), arr_cap(json->values), strlen(json->text));
+	json_debug_print_value(json, json->values[0]);
+}
+
+// e.g. converts "Hello\nworld" to "Hello\\nworld"
+// if out_sz is at least 2 * strlen(str) + 1, the string will fit.
+// returns the number of bytes actually written, not including the null terminator.
+size_t json_escape_to(char *out, size_t out_sz, const char *in) {
+	char *start = out;
+	char *end = out + out_sz;
+	assert(out_sz);
+	
+	--end; // leave room for null terminator
+	
+	for (; *in; ++in) {
+		if (out + 1 > end) {
+			break;
+		}
+		char esc = '\0';
+		switch (*in) {
+		case '\0': goto brk;
+		case '\n':
+			esc = 'n';
+			goto escape;
+		case '\\':
+			esc = '\\';
+			goto escape;
+		case '"':
+			esc = '"';
+			goto escape;
+		case '\t':
+			esc = 't';
+			goto escape;
+		case '\r':
+			esc = 'r';
+			goto escape;
+		case '\f':
+			esc = 'f';
+			goto escape;
+		case '\b':
+			esc = 'b';
+			goto escape;
+		escape:
+			if (out + 2 > end)
+				goto brk;
+			*out++ = '\\';
+			*out++ = esc;
+			break;
+		default:
+			*out = *in;
+			++out;
+			break;
+		}
+	}
+	brk:
+	*out = '\0';
+	return (size_t)(out - start);
+}
+
+// e.g. converts "Hello\nworld" to "Hello\\nworld"
+// the resulting string should be free'd.
+char *json_escape(const char *str) {
+	size_t out_sz = 2 * strlen(str) + 1;
+	char *out = calloc(1, out_sz);
+	json_escape_to(out, out_sz, str);
+	return out;
+}
+
+#undef SKIP_WHITESPACE