From b983ee158ace30f836ec25c4d6a6426281e5700c Mon Sep 17 00:00:00 2001 From: pommicket Date: Wed, 11 Jan 2023 17:34:27 -0500 Subject: URI %-escape sequences --- lsp-parse.c | 31 +++++++++++++++++++++++++++---- lsp-write.c | 25 ++++++++++++++++++++++++- 2 files changed, 51 insertions(+), 5 deletions(-) diff --git a/lsp-parse.c b/lsp-parse.c index fd07913..6cb7cd4 100644 --- a/lsp-parse.c +++ b/lsp-parse.c @@ -104,13 +104,36 @@ static bool parse_document_uri(LSP *lsp, const JSON *json, JSONValue value, LSPD char *path; #if _WIN32 path = string + strlen("file:///"); - // replace slashes with backslashes - for (char *p = path; *p; ++p) - if (*p == '/') - *p = '\\'; #else path = string + strlen("file://"); #endif + + // replace percent-encoded sequences (e.g. replace %20 with ' ') + char *out = path; + for (const char *in = path; *in; ) { + char c = *in++; + if (c == '%') { + char sequence[3] = {0}; + if (!in[0] || !in[1] || !isxdigit(in[0]) || !isxdigit(in[1])) { + lsp_set_error(lsp, "Bad escape sequence in URI."); + free(string); + return false; + } + sequence[0] = in[0]; + sequence[1] = in[1]; + in += 2; + long byte = strtol(sequence, NULL, 16); + assert(byte >= 0 && byte <= 255); + c = (char)byte; + } + #if _WIN32 + // replace forward slashes with backslashes for consistency + if (c == '/') c = '\\'; + #endif + *out++ = c; + } + *out = '\0'; + *id = lsp_document_id(lsp, path); free(string); return true; diff --git a/lsp-write.c b/lsp-write.c index 8472190..19bd766 100644 --- a/lsp-write.c +++ b/lsp-write.c @@ -188,7 +188,30 @@ static void write_file_uri(JSONWriter *o, LSPDocumentID document) { // why the fuck is there another slash it makes no goddamn sense str_builder_append(&o->builder, "/"); #endif - write_escaped(o, path); + for (const char *p = path; *p; ++p) { + char c = *p; + #if _WIN32 + // i think file URIs have to use slashes? + if (c == '\\') c = '/'; + #endif + + // see https://www.rfc-editor.org/rfc/rfc3986#page-12 + // these are the only allowed un-escaped characters in URIs + bool escaped = !( + (c >= '0' && c <= '9') + || (c >= 'a' && c <= 'z') + || (c >= 'A' && c <= 'Z') + || c == '_' || c == '-' || c == '.' || c == '~' || c == '/' + #if _WIN32 + || c == ':' // i dont think you're supposed to escape the : in C:\... + #endif + ); + if (escaped) { + str_builder_appendf(&o->builder, "%%%02x", (uint8_t)c); + } else { + str_builder_appendf(&o->builder, "%c", c); + } + } str_builder_append(&o->builder, "\""); } -- cgit v1.2.3