5 files changed, 164 insertions, 3 deletions
diff --git a/README.md b/README.md
index 280c1f3..2ef6bea 100644
--- a/README.md
+++ b/README.md
@@ -23,7 +23,7 @@ in other editors.
 - Multiple tabs, each with a different file
 - Split screen (default: Ctrl+\\, Ctrl+Shift+\\)
 - Auto-indent
-- Syntax highlighting for C, C++, Rust, and Python.
+- Syntax highlighting for C, C++, Rust, Python, and LaTeX.
 - Find and replace (with regular expressions!)
 - Run build command (F4), go to errors
 - Run any shell command (Ctrl+!)
diff --git a/string32.c b/string32.c
index 09874bc..9b88f0e 100644
--- a/string32.c
+++ b/string32.c
@@ -76,6 +76,43 @@ static char *str32_to_utf8_cstr(String32 s) {
 	return utf8;
 }
 
+// compare s to the ASCII string `ascii`
+static int str32_cmp_ascii(String32 s, char const *ascii) {
+	for (size_t i = 0; i < s.len; ++i) {
+		assert((char32_t)ascii[i] < 128);
+		if ((char32_t)ascii[i] == '\0')
+			return -1; // ascii is a prefix of s
+		if (s.str[i] > (char32_t)ascii[i])
+			return +1;
+		if (s.str[i] < (char32_t)ascii[i])
+			return -1;
+	}
+	if (ascii[s.len]) {
+		// s is a prefix of ascii
+		return +1;
+	}
+	return 0;
+}
+
+// check if s starts with the ASCII string `ascii`
+static int str32_has_ascii_prefix(String32 s, char const *ascii) {
+	for (size_t i = 0; i < s.len; ++i) {
+		assert((char32_t)ascii[i] < 128);
+		if ((char32_t)ascii[i] == '\0')
+			return true; // ascii is a prefix of s
+		if (s.str[i] > (char32_t)ascii[i])
+			return false;
+		if (s.str[i] < (char32_t)ascii[i])
+			return false;
+	}
+	if (ascii[s.len]) {
+		// s is a prefix of ascii
+		return false;
+	}
+	// s is the same as ascii
+	return true;
+}
+
 // returns the index of the given character in the string, or the length of the string if it's not found.
 size_t str32chr(String32 s, char32_t c) {
 	for (size_t i = 0; i < s.len; ++i) {
@@ -127,8 +164,11 @@ bool is32_digit(char32_t c) {
 	return c <= WINT_MAX && iswdigit((wint_t)c);
 }
 
+bool is32_graph(char32_t c) {
+	return c <= WINT_MAX && iswgraph((wint_t)c);
+}
+
 // could this character appear in a C-style identifier?
 bool is32_ident(char32_t c) {
 	return c <= WINT_MAX && (iswalnum((wint_t)c) || c == '_');
 }
-
diff --git a/syntax.c b/syntax.c
index b8efa93..fdb2d6a 100644
--- a/syntax.c
+++ b/syntax.c
@@ -13,13 +13,14 @@ Language language_from_str(char const *str) {
 	return LANG_NONE;
 }
 
-// start of single line comment for language l
+// start of single line comment for language l -- used for comment/uncomment selection
 char const *language_comment_start(Language l) {
 	switch (l) {
 	case LANG_C:   return "/* ";
 	case LANG_RUST:
 	case LANG_CPP: return "// ";
 	case LANG_PYTHON: return "# ";
+	case LANG_TEX: return "% ";
 	case LANG_NONE:
 	case LANG_COUNT:
 		break;
@@ -552,6 +553,110 @@ static void syntax_highlight_python(SyntaxState *state, char32_t *line, u32 line
 	}
 }
 
+static bool is_tex_ident(char32_t c) {
+	// digits cannot appear in tex identifiers
+	return is32_ident(c) && !is32_digit(c);
+}
+
+static void syntax_highlight_tex(SyntaxState *state, char32_t *line, u32 line_len, SyntaxCharType *char_types) {
+	bool dollar = (*state & SYNTAX_STATE_TEX_DOLLAR) != 0;
+	bool dollardollar = (*state & SYNTAX_STATE_TEX_DOLLARDOLLAR) != 0;
+	bool verbatim = (*state & SYNTAX_STATE_TEX_VERBATIM) != 0;
+	
+	for (u32 i = 0; i < line_len; ++i) {
+		char32_t c = line[i];
+		bool has_1_char = i + 1 < line_len;
+		
+		if (char_types)
+			char_types[i] = dollar || dollardollar ? SYNTAX_MATH : SYNTAX_NORMAL;
+		switch (c) {
+		case '\\':
+			if (has_1_char) {
+				if (is32_graph(line[i+1])) {
+					if (is_tex_ident(line[i+1])) {
+						// command, e.g. \begin
+						String32 command_str = {
+							.str = line + i+1,
+							.len = line_len - (i+1),
+						};
+						bool new_verbatim = false;
+						if (!dollar && !dollardollar) {
+							if (!verbatim && str32_has_ascii_prefix(command_str, "begin{verbatim}")) {
+								new_verbatim = true;
+							} else if (verbatim && str32_has_ascii_prefix(command_str, "end{verbatim}")) {
+								verbatim = false;
+							}
+						}
+						
+						if (!verbatim) {
+							if (char_types) char_types[i] = SYNTAX_KEYWORD;
+							for (++i; i < line_len; ++i) {
+								if (is_tex_ident(line[i])) {
+									if (char_types) char_types[i] = SYNTAX_KEYWORD;
+								} else {
+									--i;
+									break;
+								}
+							}
+							verbatim = new_verbatim;
+						}
+					} else if (!verbatim) {
+						// something like \\, \%, etc.
+						if (char_types) char_types[i] = SYNTAX_KEYWORD;
+						++i;
+						if (char_types) char_types[i] = SYNTAX_KEYWORD;
+					}
+				}
+			}
+			break;
+		case '%':
+			// comment
+			if (!verbatim) {
+				for (; i < line_len; ++i) {
+					if (char_types)
+						char_types[i] = SYNTAX_COMMENT;
+				}
+			}
+			break;
+		case '&':
+			// table/matrix/etc. separator
+			if (char_types && !verbatim)
+				char_types[i] = SYNTAX_BUILTIN;
+			break;
+		case '$':
+			if (!verbatim) {
+				if (!dollar && has_1_char && line[i+1] == '$') {
+					// $$
+					if (dollardollar) {
+						if (char_types) char_types[i] = SYNTAX_MATH;
+						++i;
+						if (char_types) char_types[i] = SYNTAX_MATH;
+						dollardollar = false;
+					} else {
+						if (char_types) char_types[i] = SYNTAX_MATH;
+						dollardollar = true;
+					}
+				} else if (!dollardollar) {
+					// single $
+					if (dollar) {
+						dollar = false;
+					} else {
+						dollar = true;
+						if (char_types) char_types[i] = SYNTAX_MATH;
+					}
+				}
+			}
+			break;
+		}
+	}
+	
+	*state = (SyntaxState)(
+		(dollar * SYNTAX_STATE_TEX_DOLLAR)
+		| (dollardollar * SYNTAX_STATE_TEX_DOLLARDOLLAR)
+		| (verbatim * SYNTAX_STATE_TEX_VERBATIM)
+	);
+}
+
 // This is the main syntax highlighting function. It will determine which colors to use for each character.
 // Rather than returning colors, it returns a character type (e.g. comment) which can be converted to a color.
 // To highlight multiple lines, start out with a zeroed SyntaxState, and pass a pointer to it each time.
@@ -574,6 +679,9 @@ void syntax_highlight(SyntaxState *state, Language lang, char32_t *line, u32 lin
 	case LANG_PYTHON:
 		syntax_highlight_python(state, line, line_len, char_types);
 		break;
+	case LANG_TEX:
+		syntax_highlight_tex(state, line, line_len, char_types);
+		break;
 	case LANG_COUNT: assert(0); break;
 	}
 }
diff --git a/ted.cfg b/ted.cfg
index 8d6b251..1e01cc3 100644
--- a/ted.cfg
+++ b/ted.cfg
@@ -207,3 +207,4 @@ C = .c, .h
 C++ = .cpp, .hpp, .C, .H, .cxx, .hxx, .cc, .hh
 Rust = .rs
 Python = .py
+Tex = .tex
diff --git a/ted.h b/ted.h
index 5410398..66648f5 100644
--- a/ted.h
+++ b/ted.h
@@ -27,6 +27,12 @@ enum {
 	SYNTAX_STATE_PYTHON_STRING_DBL_QUOTED = 0x02u, // is this a """ string, as opposed to a ''' string?
 };
 
+enum {
+	SYNTAX_STATE_TEX_DOLLAR = 0x01u, // inside math $ ... $
+	SYNTAX_STATE_TEX_DOLLARDOLLAR = 0x02u, // inside math $$ ... $$
+	SYNTAX_STATE_TEX_VERBATIM = 0x04u, // inside \begin{verbatim} ... \end{verbatim}
+};
+
 typedef u8 SyntaxState;
 
 ENUM_U16 {
@@ -35,6 +41,7 @@ ENUM_U16 {
 	LANG_CPP,
 	LANG_RUST,
 	LANG_PYTHON,
+	LANG_TEX,
 	LANG_COUNT
 } ENUM_U16_END(Language);
 
@@ -49,8 +56,11 @@ static LanguageName const language_names[] = {
 	{LANG_CPP, "C++"},
 	{LANG_RUST, "Rust"},
 	{LANG_PYTHON, "Python"},
+	{LANG_TEX, "Tex"},
 };
 
+static_assert_if_possible(arr_count(language_names) == LANG_COUNT)
+
 ENUM_U8 {
 	SYNTAX_NORMAL,
 	SYNTAX_KEYWORD,
@@ -62,6 +72,8 @@ ENUM_U8 {
 	SYNTAX_CONSTANT,
 } ENUM_U8_END(SyntaxCharType);
 
+#define SYNTAX_MATH SYNTAX_STRING // for tex
+
 typedef struct {
 	float cursor_blink_time_on, cursor_blink_time_off;
 	u32 colors[COLOR_COUNT];