From b983ee158ace30f836ec25c4d6a6426281e5700c Mon Sep 17 00:00:00 2001
From: pommicket <pommicket@gmail.com>
Date: Wed, 11 Jan 2023 17:34:27 -0500
Subject: URI %-escape sequences

---
 lsp-parse.c | 31 +++++++++++++++++++++++++++----
 lsp-write.c | 25 ++++++++++++++++++++++++-
 2 files changed, 51 insertions(+), 5 deletions(-)

diff --git a/lsp-parse.c b/lsp-parse.c
index fd07913..6cb7cd4 100644
--- a/lsp-parse.c
+++ b/lsp-parse.c
@@ -104,13 +104,36 @@ static bool parse_document_uri(LSP *lsp, const JSON *json, JSONValue value, LSPD
 	char *path;
 	#if _WIN32
 	path = string + strlen("file:///");
-	// replace slashes with backslashes
-	for (char *p = path; *p; ++p)
-		if (*p == '/')
-			*p = '\\';
 	#else
 	path = string + strlen("file://");
 	#endif
+	
+	// replace percent-encoded sequences (e.g. replace %20 with ' ')
+	char *out = path;
+	for (const char *in = path; *in; ) {
+		char c = *in++;
+		if (c == '%') {
+			char sequence[3] = {0};
+			if (!in[0] || !in[1] || !isxdigit(in[0]) || !isxdigit(in[1])) {
+				lsp_set_error(lsp, "Bad escape sequence in URI.");
+				free(string);
+				return false;
+			}
+			sequence[0] = in[0];
+			sequence[1] = in[1];
+			in += 2;
+			long byte = strtol(sequence, NULL, 16);
+			assert(byte >= 0 && byte <= 255);
+			c = (char)byte;
+		}
+		#if _WIN32
+		// replace forward slashes with backslashes for consistency
+		if (c == '/') c = '\\';
+		#endif
+		*out++ = c;
+	}
+	*out = '\0';
+	
 	*id = lsp_document_id(lsp, path);
 	free(string);
 	return true;
diff --git a/lsp-write.c b/lsp-write.c
index 8472190..19bd766 100644
--- a/lsp-write.c
+++ b/lsp-write.c
@@ -188,7 +188,30 @@ static void write_file_uri(JSONWriter *o, LSPDocumentID document) {
 		// why the fuck is there another slash it makes no goddamn sense
 		str_builder_append(&o->builder, "/");
 	#endif
-	write_escaped(o, path);
+	for (const char *p = path; *p; ++p) {
+		char c = *p;
+		#if _WIN32
+		// i think file URIs have to use slashes?
+		if (c == '\\') c = '/';
+		#endif
+		
+		// see https://www.rfc-editor.org/rfc/rfc3986#page-12
+		// these are the only allowed un-escaped characters in URIs
+		bool escaped = !(
+			   (c >= '0' && c <= '9')
+			|| (c >= 'a' && c <= 'z')
+			|| (c >= 'A' && c <= 'Z')
+			|| c == '_' || c == '-' || c == '.' || c == '~' || c == '/'
+			#if _WIN32
+			|| c == ':' // i dont think you're supposed to escape the : in C:\...
+			#endif
+			);
+		if (escaped) {
+			str_builder_appendf(&o->builder, "%%%02x", (uint8_t)c);
+		} else {
+			str_builder_appendf(&o->builder, "%c", c);
+		}
+	}
 	str_builder_append(&o->builder, "\"");
 }
 
-- 
cgit v1.2.3