10 files changed, 371 insertions, 194 deletions
diff --git a/base.h b/base.h
index a2ba470..ed64fe8 100644
--- a/base.h
+++ b/base.h
@@ -168,34 +168,4 @@ static void print(const char *fmt, ...) {
 #define debug_println(...)
 #endif
 
-// NOTE: these have to be defined here because lsp.h uses Language
-
-// If you are adding new languages, DO NOT change the constant values
-// of the previous languages. It will mess up config files which use :set-language!
-typedef enum {
-	/// avoid using this and use LANG_TEXT instead.
-	LANG_NONE = 0,
-	LANG_C = 1,
-	LANG_CPP = 2,
-	LANG_RUST = 3,
-	LANG_PYTHON = 4,
-	LANG_TEX = 5,
-	LANG_MARKDOWN = 6,
-	LANG_HTML = 7,
-	/// .cfg files
-	LANG_CONFIG = 8,
-	LANG_JAVASCRIPT = 9,
-	LANG_JAVA = 10,
-	LANG_GO = 11,
-	/// like \ref LANG_CONFIG, but with multiline strings.
-	LANG_TED_CFG = 12,
-	LANG_TYPESCRIPT = 13,
-	LANG_JSON = 14,
-	LANG_XML = 15,
-	LANG_GLSL = 16,
-	/// plain text
-	LANG_TEXT = 17,
-	LANG_COUNT
-} Language;
-
 #endif // BASE_H_
diff --git a/buffer.c b/buffer.c
index f72ad9e..95cd3a3 100644
--- a/buffer.c
+++ b/buffer.c
@@ -222,7 +222,7 @@ Language buffer_language(TextBuffer *buffer) {
 	
 	// @TODO(optimization): cache this?
 	//         (we're calling buffer_lsp on every edit and that calls this)
-	if (buffer->manual_language >= 1 && buffer->manual_language <= LANG_COUNT)
+	if (buffer->manual_language >= 1)
 		return (Language)(buffer->manual_language - 1);
 	const Settings *settings = buffer->ted->default_settings; // important we don't use buffer_settings here since that would cause a loop!
 	const char *filename = path_filename(buffer->path);
@@ -230,24 +230,14 @@ Language buffer_language(TextBuffer *buffer) {
 
 	int match_score = 0;
 	Language match = LANG_TEXT;
-	
-	for (u16 l = 0; l < LANG_COUNT; ++l) {
-		const char *extensions = settings->language_extensions[l];
-		
-		if (extensions) {
-			// extensions is a string with commas separating each extension.
-			size_t len = 0;
-			for (const char *p = extensions; *p; p += len) {
-				if (*p == ',') ++p; // move past comma
-				len = strcspn(p, ",");
-				if (filename_len >= len && strncmp(&filename[filename_len - len], p, len) == 0) {
-					int score = (int)len;
-					if (score > match_score) {
-						// found a better match!
-						match_score = score;
-						match = l;
-					}
-				}
+	arr_foreach_ptr(settings->language_extensions, LanguageExtension, ext) {
+		size_t len = strlen(ext->extension);
+		if (filename_len >= len && memcmp(&filename[filename_len - len], ext->extension, len) == 0) {
+			int score = (int)len;
+			if (score > match_score) {
+				// found a better match!
+				match_score = score;
+				match = ext->language;
 			}
 		}
 	}
diff --git a/command.c b/command.c
index 36fd2d1..775d6bc 100644
--- a/command.c
+++ b/command.c
@@ -411,7 +411,7 @@ void command_execute(Ted *ted, Command c, i64 argument) {
 	
 	case CMD_SET_LANGUAGE:
 		if (buffer && !buffer->is_line_buffer) {
-			if (argument < 0 || argument >= LANG_COUNT)
+			if (argument < 0 || argument >= LANG_COUNT_MAX)
 				buffer->manual_language = -1;
 			else
 				buffer->manual_language = (i16)(argument + 1);
diff --git a/config.c b/config.c
index 78b3e8e..f19603a 100644
--- a/config.c
+++ b/config.c
@@ -226,10 +226,7 @@ static void settings_copy(Settings *dest, const Settings *src) {
 	gl_rc_texture_incref(dest->bg_texture);
 	
 	context_copy(&dest->context, &src->context);
-	for (u32 i = 0; i < LANG_COUNT; ++i) {
-		if (src->language_extensions[i])
-			dest->language_extensions[i] = str_dup(src->language_extensions[i]);
-	}
+	dest->language_extensions = arr_copy(src->language_extensions);
 	dest->key_actions = arr_copy(src->key_actions);
 }
 
@@ -824,9 +821,25 @@ static void config_parse_line(ConfigReader *cfg, Settings **applicable_settings,
 			*dst = 0;
 			arr_foreach_ptr(applicable_settings, Settings *, psettings) {
 				Settings *settings = *psettings;
-				if (settings->language_extensions[lang])
-					free(settings->language_extensions[lang]);
-				settings->language_extensions[lang] = str_dup(exts);
+				// remove old extensions
+				u32 *indices = NULL;
+				arr_foreach_ptr(settings->language_extensions, LanguageExtension, ext) {
+					if (ext->language == lang) {
+						arr_add(indices, (u32)(ext - settings->language_extensions));
+					}
+				}
+				for (u32 i = 0; i < arr_len(indices); ++i)
+					arr_remove(settings->language_extensions, indices[i] - i);
+				arr_free(indices);
+				
+				char *p = exts;
+				while (*p) {
+					size_t len = strcspn(p, ",");
+					LanguageExtension *ext = arr_addp(settings->language_extensions);
+					ext->language = lang;
+					memcpy(ext->extension, p, len);
+					p += len;
+				}
 			}
 			free(exts);
 		}
@@ -1087,8 +1100,7 @@ static void gluint_eliminate_duplicates(GLuint **arr) {
 void config_free(Ted *ted) {
 	arr_foreach_ptr(ted->all_settings, Settings, settings) {
 		context_free(&settings->context);
-		for (u32 i = 0; i < LANG_COUNT; ++i)
-			free(settings->language_extensions[i]);
+		arr_free(settings->language_extensions);
 		gl_rc_sab_decref(&settings->bg_shader);
 		gl_rc_texture_decref(&settings->bg_texture);
 		arr_free(settings->key_actions);
diff --git a/lsp-write.c b/lsp-write.c
index 79c1dd9..2689869 100644
--- a/lsp-write.c
+++ b/lsp-write.c
@@ -6,43 +6,30 @@
 
 #define write_bool lsp_write_bool // prevent naming conflict
 
-static const char *lsp_language_id(Language lang) {
-	switch (lang) {
-	case LANG_CONFIG:
-	case LANG_TED_CFG:
-	case LANG_TEXT:
-	case LANG_NONE:
-		return "text";
-	case LANG_C:
-		return "c";
-	case LANG_CPP:
-		return "cpp";
-	case LANG_JAVA:
-		return "java";
-	case LANG_JAVASCRIPT:
-		return "javascript";
-	case LANG_JSON:
-		return "json";
-	case LANG_TYPESCRIPT:
-		return "typescript";
-	case LANG_MARKDOWN:
-		return "markdown";
-	case LANG_GO:
-		return "go";
-	case LANG_RUST:
-		return "rust";
-	case LANG_PYTHON:
-		return "python";
-	case LANG_HTML:
-		return "html";
-	case LANG_TEX:
-		return "latex";
-	case LANG_XML:
-		return "xml";
-	case LANG_GLSL:
-		// not specified as of LSP 3.17, but this seems like the natural choice
-		return "glsl";
-	case LANG_COUNT: break;
+typedef struct {
+	u64 number;
+	char identifier[32];
+} LanguageId;
+static LanguageId language_ids[512];
+void lsp_register_language(u64 id, const char *lsp_identifier) {
+	int i;
+	for (i = 0; *language_ids[i].identifier; ++i) {
+		if (language_ids[i].number == id) {
+			break;
+		}
+	}
+	if (i < (int)arr_count(language_ids) - 1) {
+		language_ids[i].number = id;
+		strbuf_cpy(language_ids[i].identifier, lsp_identifier);
+	}
+}
+
+static const char *lsp_language_id(u64 lang) {
+	int i;
+	for (i = 0; *language_ids[i].identifier; ++i) {
+		if (language_ids[i].number == lang) {
+			return language_ids[i].identifier;
+		}
 	}
 	assert(0);
 	return "text";
diff --git a/lsp.c b/lsp.c
index 9617f6d..9761049 100644
--- a/lsp.c
+++ b/lsp.c
@@ -4,8 +4,6 @@
 #include "lsp.h"
 #include "util.h"
 
-const char *language_to_str(Language language);
-
 static LSPMutex request_id_mutex;
 
 // it's nice to have request IDs be totally unique, including across LSP servers.
diff --git a/lsp.h b/lsp.h
index efaca11..2ebcce4 100644
--- a/lsp.h
+++ b/lsp.h
@@ -111,7 +111,7 @@ typedef struct {
 } LSPRequestCancel;
 
 typedef struct {
-	Language language;
+	u64 language;
 	LSPDocumentID document;
 	// freed by lsp_request_free
 	char *file_contents;
@@ -587,6 +587,8 @@ typedef struct LSP {
 		char error[512];
 } LSP;
 
+/// Assiociate `id` with the LSP language identifier `lsp_identifier` (see https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#-textdocumentitem-)
+void lsp_register_language(u64 id, const char *lsp_identifier);
 // returns true if there's an error.
 // returns false and sets error to "" if there's no error.
 // if clear = true, the error will be cleared.
diff --git a/main.c b/main.c
index 2cbe3ce..6050771 100644
--- a/main.c
+++ b/main.c
@@ -1,4 +1,7 @@
 /*
+@TODO:
+- check bounds in CMD_SET_LANGUAGE
+- comment-start & comment-end settings
 FUTURE FEATURES:
 - manual.md
 - CSS highlighting
@@ -9,7 +12,6 @@ FUTURE FEATURES:
 - return to previous location in buffer
 - font setting & support for multiple fonts to cover more characters
 - support for variable-width fonts
-- comment-start & comment-end settings
 - robust find (results shouldn't move around when you type things)
 - open multiple files with command line arguments
 - document links using LSP textDocument/documentLink request
diff --git a/syntax.c b/syntax.c
index bb69a50..936f925 100644
--- a/syntax.c
+++ b/syntax.c
@@ -67,36 +67,15 @@ enum {
 
 typedef struct {
 	Language lang;
-	const char *name;
+	char name[32];
 } LanguageName;
 
-static const LanguageName language_names[] = {
-	{LANG_NONE, "None"},
-	{LANG_C, "C"},
-	{LANG_CPP, "C++"},
-	{LANG_RUST, "Rust"},
-	{LANG_PYTHON, "Python"},
-	{LANG_TEX, "Tex"},
-	{LANG_MARKDOWN, "Markdown"},
-	{LANG_HTML, "HTML"},
-	{LANG_CONFIG, "Config"},
-	{LANG_JAVASCRIPT, "JavaScript"},
-	{LANG_JAVA, "Java"},
-	{LANG_GO, "Go"},
-	{LANG_TED_CFG, "TedCfg"},
-	{LANG_TYPESCRIPT, "TypeScript"},
-	{LANG_JSON, "JSON"},
-	{LANG_XML, "XML"},
-	{LANG_GLSL, "GLSL"},
-	{LANG_TEXT, "Text"},
-};
-
-static_assert_if_possible(arr_count(language_names) == LANG_COUNT)
+static LanguageName language_names[LANG_COUNT_MAX];
 
 
 // returns the language this string is referring to, or LANG_NONE if it's invalid.
 Language language_from_str(const char *str) {
-	for (int i = 0; i < LANG_COUNT; ++i) {
+	for (int i = 0; language_names[i].name[0]; ++i) {
 		if (strcmp_case_insensitive(language_names[i].name, str) == 0)
 			return language_names[i].lang;
 	}
@@ -104,7 +83,7 @@ Language language_from_str(const char *str) {
 }
 
 const char *language_to_str(Language language) {
-	for (int i = 0; i < LANG_COUNT; ++i) {
+	for (int i = 0; language_names[i].name[0]; ++i) {
 		if (language_names[i].lang == language)
 			return language_names[i].name;
 	}
@@ -137,7 +116,6 @@ const char *language_comment_start(Language l) {
 	case LANG_NONE:
 	case LANG_MARKDOWN:
 	case LANG_TEXT:
-	case LANG_COUNT:
 		break;
 	}
 	return "";
@@ -1091,7 +1069,7 @@ static bool is_html_tag_char(char32_t c) {
 }
 
 // highlights XML and HTML
-static void syntax_highlight_xml(SyntaxState *state, const char32_t *line, u32 line_len, SyntaxCharType *char_types, Language lang) {
+static void syntax_highlight_html_like(SyntaxState *state, const char32_t *line, u32 line_len, SyntaxCharType *char_types, Language lang) {
 	bool comment = (*state & SYNTAX_STATE_HTML_COMMENT) != 0;
 	bool in_sgl_string = false; // 'string'
 	bool in_dbl_string = false; // "string"
@@ -1205,7 +1183,7 @@ static void syntax_highlight_xml(SyntaxState *state, const char32_t *line, u32 l
 	*state = (comment * SYNTAX_STATE_HTML_COMMENT);
 }
 
-static void syntax_highlight_config(SyntaxState *state, const char32_t *line, u32 line_len, SyntaxCharType *char_types, bool is_ted_cfg) {
+static void syntax_highlight_cfg(SyntaxState *state, const char32_t *line, u32 line_len, SyntaxCharType *char_types, bool is_ted_cfg) {
 	bool string = (*state & SYNTAX_STATE_TED_CFG_STRING) != 0;
 	char32_t string_delimiter = (*state & SYNTAX_STATE_TED_CFG_STRING_BACKTICK) ? '`' : '"';
 	
@@ -1727,61 +1705,208 @@ static void syntax_highlight_go(SyntaxState *state_ptr, const char32_t *line, u3
 	);
 }
 
+static void syntax_highlight_text(SyntaxState *state, const char32_t *line, u32 line_len, SyntaxCharType *char_types) {
+	(void)state;
+	(void)line;
+	(void)line_len;
+	if (char_types) {
+		memset(char_types, 0, line_len);
+	}
+}
+
+typedef struct {
+	Language lang;
+	SyntaxHighlightFunction func;
+} SyntaxHighlighter;
+
+static SyntaxHighlighter syntax_highlighters[LANG_COUNT_MAX];
+
 void syntax_highlight(SyntaxState *state, Language lang, const char32_t *line, u32 line_len, SyntaxCharType *char_types) {
-	switch (lang) {
-	case LANG_NONE:
-	case LANG_TEXT:
-		if (char_types)
-			memset(char_types, 0, line_len * sizeof *char_types);
-		break;
-	case LANG_C:
-		syntax_highlight_c_cpp(state, line, line_len, char_types, LANG_C);
-		break;
-	case LANG_CPP:
-		syntax_highlight_c_cpp(state, line, line_len, char_types, LANG_CPP);
-		break;
-	case LANG_GLSL:
-		syntax_highlight_c_cpp(state, line, line_len, char_types, LANG_GLSL);
-		break;
-	case LANG_RUST:
-		syntax_highlight_rust(state, line, line_len, char_types);
-		break;
-	case LANG_PYTHON:
-		syntax_highlight_python(state, line, line_len, char_types);
-		break;
-	case LANG_TEX:
-		syntax_highlight_tex(state, line, line_len, char_types);
-		break;
-	case LANG_MARKDOWN:
-		syntax_highlight_markdown(state, line, line_len, char_types);
-		break;
-	case LANG_HTML:
-		syntax_highlight_xml(state, line, line_len, char_types, LANG_HTML);
-		break;
-	case LANG_XML:
-		syntax_highlight_xml(state, line, line_len, char_types, LANG_XML);
-		break;
-	case LANG_CONFIG:
-		syntax_highlight_config(state, line, line_len, char_types, false);
-		break;
-	case LANG_TED_CFG:
-		syntax_highlight_config(state, line, line_len, char_types, true);
-		break;
-	case LANG_JAVASCRIPT:
-		syntax_highlight_javascript_like(state, line, line_len, char_types, LANG_JAVASCRIPT);
-		break;
-	case LANG_TYPESCRIPT:
-		syntax_highlight_javascript_like(state, line, line_len, char_types, LANG_TYPESCRIPT);
-		break;
-	case LANG_JSON:
-		syntax_highlight_javascript_like(state, line, line_len, char_types, LANG_JSON);
-		break;
-	case LANG_JAVA:
-		syntax_highlight_java(state, line, line_len, char_types);
-		break;
-	case LANG_GO:
-		syntax_highlight_go(state, line, line_len, char_types);
-		break;
-	case LANG_COUNT: assert(0); break;
+	for (int i = 0; syntax_highlighters[i].func; ++i) {
+		if (syntax_highlighters[i].lang == lang) {
+			syntax_highlighters[i].func(state, line, line_len, char_types);
+			return;
+		}
+	}
+	syntax_highlight_text(state, line, line_len, char_types);
+}
+
+static void syntax_highlight_c(SyntaxState *state, const char32_t *line, u32 line_len, SyntaxCharType *char_types) {
+	syntax_highlight_c_cpp(state, line, line_len, char_types, LANG_C);
+}
+static void syntax_highlight_cpp(SyntaxState *state, const char32_t *line, u32 line_len, SyntaxCharType *char_types) {
+	syntax_highlight_c_cpp(state, line, line_len, char_types, LANG_CPP);
+}
+static void syntax_highlight_xml(SyntaxState *state, const char32_t *line, u32 line_len, SyntaxCharType *char_types) {
+	syntax_highlight_html_like(state, line, line_len, char_types, LANG_XML);
+}
+static void syntax_highlight_html(SyntaxState *state, const char32_t *line, u32 line_len, SyntaxCharType *char_types) {
+	syntax_highlight_html_like(state, line, line_len, char_types, LANG_HTML);
+}
+static void syntax_highlight_javascript(SyntaxState *state, const char32_t *line, u32 line_len, SyntaxCharType *char_types) {
+	syntax_highlight_javascript_like(state, line, line_len, char_types, LANG_JAVASCRIPT);
+}
+static void syntax_highlight_typescript(SyntaxState *state, const char32_t *line, u32 line_len, SyntaxCharType *char_types) {
+	syntax_highlight_javascript_like(state, line, line_len, char_types, LANG_TYPESCRIPT);
+}
+static void syntax_highlight_json(SyntaxState *state, const char32_t *line, u32 line_len, SyntaxCharType *char_types) {
+	syntax_highlight_javascript_like(state, line, line_len, char_types, LANG_JSON);
+}
+static void syntax_highlight_config(SyntaxState *state, const char32_t *line, u32 line_len, SyntaxCharType *char_types) {
+	syntax_highlight_cfg(state, line, line_len, char_types, false);
+}
+static void syntax_highlight_ted_cfg(SyntaxState *state, const char32_t *line, u32 line_len, SyntaxCharType *char_types) {
+	syntax_highlight_cfg(state, line, line_len, char_types, true);
+}
+
+
+void syntax_register_builtin_languages(void) {
+	static const LanguageInfo builtins[] = {
+		{
+			.id = LANG_TEXT,
+			.name = "Text",
+			.lsp_identifier = "text",
+			.highlighter = syntax_highlight_text,
+		},
+		{
+			.id = LANG_C,
+			.name = "C",
+			.lsp_identifier = "c",
+			.highlighter = syntax_highlight_c,
+		},
+		{
+			.id = LANG_CPP,
+			.name = "C++",
+			.lsp_identifier = "cpp",
+			.highlighter = syntax_highlight_cpp,
+		},
+		{
+			.id = LANG_RUST,
+			.name = "Rust",
+			.lsp_identifier = "rust",
+			.highlighter = syntax_highlight_rust,
+		},
+		{
+			.id = LANG_JAVA,
+			.name = "Java",
+			.lsp_identifier = "java",
+			.highlighter = syntax_highlight_java,
+		},
+		{
+			.id = LANG_GO,
+			.name = "Go",
+			.lsp_identifier = "go",
+			.highlighter = syntax_highlight_go,
+		},
+		{
+			.id = LANG_PYTHON,
+			.name = "Python",
+			.lsp_identifier = "python",
+			.highlighter = syntax_highlight_python,
+		},
+		{
+			.id = LANG_TEX,
+			.name = "TeX",
+			.lsp_identifier = "latex",
+			.highlighter = syntax_highlight_tex,
+		},
+		{
+			.id = LANG_MARKDOWN,
+			.name = "Markdown",
+			.lsp_identifier = "markdown",
+			.highlighter = syntax_highlight_markdown,
+		},
+		{
+			.id = LANG_HTML,
+			.name = "HTML",
+			.lsp_identifier = "html",
+			.highlighter = syntax_highlight_html,
+		},
+		{
+			.id = LANG_XML,
+			.name = "XML",
+			.lsp_identifier = "xml",
+			.highlighter = syntax_highlight_xml,
+		},
+		{
+			.id = LANG_CONFIG,
+			.name = "Config",
+			.lsp_identifier = "text",
+			.highlighter = syntax_highlight_config,
+		},
+		{
+			.id = LANG_TED_CFG,
+			.name = "TedCfg",
+			.lsp_identifier = "text",
+			.highlighter = syntax_highlight_ted_cfg,
+		},
+		{
+			.id = LANG_JAVASCRIPT,
+			.name = "JavaScript",
+			.lsp_identifier = "javascript",
+			.highlighter = syntax_highlight_javascript,
+		},
+		{
+			.id = LANG_TYPESCRIPT,
+			.name = "TypeScript",
+			.lsp_identifier = "typescript",
+			.highlighter = syntax_highlight_typescript,
+		},
+		{
+			.id = LANG_JSON,
+			.name = "JSON",
+			.lsp_identifier = "json",
+			.highlighter = syntax_highlight_json,
+		},
+		{
+			.id = LANG_GLSL,
+			.name = "GLSL",
+			// not specified as of LSP 3.17, but this seems like the natural choice
+			.lsp_identifier = "glsl",
+			.highlighter = syntax_highlight_html,
+		},
+		
+	};
+	for (size_t i = 0; i < arr_count(builtins); ++i) {
+		syntax_register_language(&builtins[i]);
+	}
+}
+
+
+void syntax_register_language(const LanguageInfo *info) {
+	if (!info->id || info->id > LANG_USER_MAX) {
+		debug_println("Bad language ID: %" PRIu32, info->id);
+		return;
+	}
+	if (!info->name[0]) {
+		debug_println("Language with ID %" PRIu32 " has no name.", info->id);
+		return;
+	}
+	
+	int i;
+	for (i = 0; language_names[i].name[0]; i++) {
+		if (streq(language_names[i].name, info->name) || language_names[i].lang == info->id) {
+			// this language will be overridden i guess
+			break;
+		}
+	}
+	if (i < LANG_COUNT_MAX) {
+		language_names[i].lang = info->id;
+		strbuf_cpy(language_names[i].name, info->name);
+	}
+	if (info->highlighter) {
+		for (i = 0; syntax_highlighters[i].func; i++) {
+			if (syntax_highlighters[i].lang == info->id) {
+				// this language will be overridden i guess
+				break;
+			}
+		}
+		if (i < LANG_COUNT_MAX) {
+			syntax_highlighters[i].lang = info->id;
+			syntax_highlighters[i].func = info->highlighter;
+		}
 	}
+	
+	lsp_register_language(info->id, info->lsp_identifier);
+	
 }
diff --git a/ted.h b/ted.h
index cec54a4..9dbbbc2 100644
--- a/ted.h
+++ b/ted.h
@@ -43,22 +43,106 @@ extern "C" {
 /// max number of LSPs running at once
 #define TED_LSP_MAX 200
 
+// If you are adding new languages, DO NOT change the constant values
+// of the previous languages. It will mess up config files which use :set-language!
+enum {
+	/// avoid using this and use LANG_TEXT instead.
+	LANG_NONE = 0,
+	/// C
+	LANG_C = 1,
+	/// C++
+	LANG_CPP = 2,
+	/// Rust
+	LANG_RUST = 3,
+	/// Python
+	LANG_PYTHON = 4,
+	/// TeX/LaTeX
+	LANG_TEX = 5,
+	/// Markdown
+	LANG_MARKDOWN = 6,
+	/// HTML
+	LANG_HTML = 7,
+	/// .cfg files
+	LANG_CONFIG = 8,
+	/// JavaScript
+	LANG_JAVASCRIPT = 9,
+	/// Java
+	LANG_JAVA = 10,
+	/// Go
+	LANG_GO = 11,
+	/// like \ref LANG_CONFIG, but with multiline strings.
+	LANG_TED_CFG = 12,
+	/// TypeScript
+	LANG_TYPESCRIPT = 13,
+	/// JSON
+	LANG_JSON = 14,
+	/// XML
+	LANG_XML = 15,
+	/// GL shading language
+	LANG_GLSL = 16,
+	/// plain text
+	LANG_TEXT = 17,
+	
+	/// all user-defined languages are greater than this.
+	LANG_USER_MIN = 100000,
+	/// all user-defined languages are less than this.
+	LANG_USER_MAX = 2000000000,
+};
+
+/// A programming language
+///
+/// May be one of the `LANG_*` constants, or a dynamically registered language.
+typedef u32 Language;
+
+/// Maximum number of languages available.
+#define LANG_COUNT_MAX 511
+
+
 /// Current state of syntax highlighting.
 typedef u32 SyntaxState;
 
 /// types of syntax highlighting
 enum SyntaxCharType {
-	SYNTAX_NORMAL,
-	SYNTAX_KEYWORD,
-	SYNTAX_BUILTIN,
-	SYNTAX_COMMENT,
-	SYNTAX_PREPROCESSOR,
-	SYNTAX_STRING,
-	SYNTAX_CHARACTER,
-	SYNTAX_CONSTANT,
+	// do not change these numbers as it will break backwards compatibility
+	SYNTAX_NORMAL = 0,
+	SYNTAX_KEYWORD = 1,
+	SYNTAX_BUILTIN = 2,
+	SYNTAX_COMMENT = 3,
+	SYNTAX_PREPROCESSOR = 4,
+	SYNTAX_STRING = 5,
+	SYNTAX_CHARACTER = 6,
+	SYNTAX_CONSTANT = 7,
 };
 /// Type of syntax highlighting.
 typedef u8 SyntaxCharType;
+/// Function for syntax highlighting.
+/// If you want to add a language to `ted`, you will need to implement this function.
+///
+/// `state` is used to keep track of state between lines (e.g. whether or not we are in a multiline comment)\n
+/// `line` is the UTF-32 text of the line (not guaranteed to be null-terminated).\n
+/// `line_len` is the length of the line, in UTF-32 codepoints.\n
+/// `char_types` is either `NULL` (in which case only `state` should be updated), or a pointer to `line_len` SyntaxCharTypes, which should be filled out using the `SYNTAX_*` constants.
+///
+/// no guarantees are made about which order lines will be highlighted in. the only guarantee is that `*state = 0` for the first line, and for line `n > 0`,
+/// `*state` was derived from calling this function on line `n-1`.
+typedef void (*SyntaxHighlightFunction)(SyntaxState *state, const char32_t *line, u32 line_len, SyntaxCharType *char_types);
+
+/// Information about a programming language
+///
+/// Used for dynamic language registration.
+/// Please zero all the fields of the struct which you aren't using.
+///
+/// The fields `id` and `name` MUST NOT be 0, or `ted` will reject your language.
+typedef struct {
+	/// Language ID number. For user-defined languages, this must be `>= LANG_USER_MIN` and `< LANG_USER_MAX`.
+	///
+	/// To avoid conflict, try picking a unique number.
+	Language id;
+	char name[30];
+	char lsp_identifier[32];
+	SyntaxHighlightFunction highlighter;
+	char reserved[128];
+} LanguageInfo;
 
 /// for tex
 #define SYNTAX_MATH SYNTAX_STRING
@@ -133,6 +217,10 @@ typedef struct {
 	GLuint buffer;
 } GlRcSAB;
 
+typedef struct {
+	Language language;
+	char extension[16];
+} LanguageExtension;
 
 /// All of ted's settings
 ///
@@ -188,8 +276,7 @@ typedef struct {
 	char build_command[1024];
 	/// Default build command for if `Cargo.toml`, `Makefile`, etc. do not exist.
 	char build_default_command[1024];
-	/// `[i]` = comma-separated string of file extensions for language `i`, or `NULL` for none
-	char *language_extensions[LANG_COUNT];
+	LanguageExtension *language_extensions;
 	/// dynamic array, sorted by KEY_COMBO(modifier, key)
 	KeyAction *key_actions;
 } Settings;
@@ -250,7 +337,7 @@ typedef struct {
 	/// last write time to `path`
 	double last_write_time;
 	/// 1 + the language the buffer has been manually set to, or 0 if it hasn't been manually set to anything
-	i16 manual_language;
+	i64 manual_language;
 	/// position of cursor
 	BufferPos cursor_pos;
 	/// if `selection` is true, the text between `selection_pos` and `cursor_pos` is selected.
@@ -1401,6 +1488,10 @@ void session_write(Ted *ted);
 void session_read(Ted *ted);
 
 // === syntax.c ===
+/// register a new language for `ted`.
+///
+/// this should be done before loading configs so language-specific settings are recognized properly.
+void syntax_register_language(const LanguageInfo *info);
 Language language_from_str(const char *str);
 const char *language_to_str(Language language);
 /// string which should be put before comments in the given language