From 61cd687d858b7a3811a168393f6b698c24cfff17 Mon Sep 17 00:00:00 2001 From: Leo Tenenbaum Date: Mon, 19 Apr 2021 10:07:07 -0400 Subject: tex syntax highlighting --- README.md | 2 +- string32.c | 42 ++++++++++++++++++++++- syntax.c | 110 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++- ted.cfg | 1 + ted.h | 12 +++++++ 5 files changed, 164 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 280c1f3..2ef6bea 100644 --- a/README.md +++ b/README.md @@ -23,7 +23,7 @@ in other editors. - Multiple tabs, each with a different file - Split screen (default: Ctrl+\\, Ctrl+Shift+\\) - Auto-indent -- Syntax highlighting for C, C++, Rust, and Python. +- Syntax highlighting for C, C++, Rust, Python, and LaTeX. - Find and replace (with regular expressions!) - Run build command (F4), go to errors - Run any shell command (Ctrl+!) diff --git a/string32.c b/string32.c index 09874bc..9b88f0e 100644 --- a/string32.c +++ b/string32.c @@ -76,6 +76,43 @@ static char *str32_to_utf8_cstr(String32 s) { return utf8; } +// compare s to the ASCII string `ascii` +static int str32_cmp_ascii(String32 s, char const *ascii) { + for (size_t i = 0; i < s.len; ++i) { + assert((char32_t)ascii[i] < 128); + if ((char32_t)ascii[i] == '\0') + return -1; // ascii is a prefix of s + if (s.str[i] > (char32_t)ascii[i]) + return +1; + if (s.str[i] < (char32_t)ascii[i]) + return -1; + } + if (ascii[s.len]) { + // s is a prefix of ascii + return +1; + } + return 0; +} + +// check if s starts with the ASCII string `ascii` +static int str32_has_ascii_prefix(String32 s, char const *ascii) { + for (size_t i = 0; i < s.len; ++i) { + assert((char32_t)ascii[i] < 128); + if ((char32_t)ascii[i] == '\0') + return true; // ascii is a prefix of s + if (s.str[i] > (char32_t)ascii[i]) + return false; + if (s.str[i] < (char32_t)ascii[i]) + return false; + } + if (ascii[s.len]) { + // s is a prefix of ascii + return false; + } + // s is the same as ascii + return true; +} + // returns the index of the given character in the string, or the length of the string if it's not found. size_t str32chr(String32 s, char32_t c) { for (size_t i = 0; i < s.len; ++i) { @@ -127,8 +164,11 @@ bool is32_digit(char32_t c) { return c <= WINT_MAX && iswdigit((wint_t)c); } +bool is32_graph(char32_t c) { + return c <= WINT_MAX && iswgraph((wint_t)c); +} + // could this character appear in a C-style identifier? bool is32_ident(char32_t c) { return c <= WINT_MAX && (iswalnum((wint_t)c) || c == '_'); } - diff --git a/syntax.c b/syntax.c index b8efa93..fdb2d6a 100644 --- a/syntax.c +++ b/syntax.c @@ -13,13 +13,14 @@ Language language_from_str(char const *str) { return LANG_NONE; } -// start of single line comment for language l +// start of single line comment for language l -- used for comment/uncomment selection char const *language_comment_start(Language l) { switch (l) { case LANG_C: return "/* "; case LANG_RUST: case LANG_CPP: return "// "; case LANG_PYTHON: return "# "; + case LANG_TEX: return "% "; case LANG_NONE: case LANG_COUNT: break; @@ -552,6 +553,110 @@ static void syntax_highlight_python(SyntaxState *state, char32_t *line, u32 line } } +static bool is_tex_ident(char32_t c) { + // digits cannot appear in tex identifiers + return is32_ident(c) && !is32_digit(c); +} + +static void syntax_highlight_tex(SyntaxState *state, char32_t *line, u32 line_len, SyntaxCharType *char_types) { + bool dollar = (*state & SYNTAX_STATE_TEX_DOLLAR) != 0; + bool dollardollar = (*state & SYNTAX_STATE_TEX_DOLLARDOLLAR) != 0; + bool verbatim = (*state & SYNTAX_STATE_TEX_VERBATIM) != 0; + + for (u32 i = 0; i < line_len; ++i) { + char32_t c = line[i]; + bool has_1_char = i + 1 < line_len; + + if (char_types) + char_types[i] = dollar || dollardollar ? SYNTAX_MATH : SYNTAX_NORMAL; + switch (c) { + case '\\': + if (has_1_char) { + if (is32_graph(line[i+1])) { + if (is_tex_ident(line[i+1])) { + // command, e.g. \begin + String32 command_str = { + .str = line + i+1, + .len = line_len - (i+1), + }; + bool new_verbatim = false; + if (!dollar && !dollardollar) { + if (!verbatim && str32_has_ascii_prefix(command_str, "begin{verbatim}")) { + new_verbatim = true; + } else if (verbatim && str32_has_ascii_prefix(command_str, "end{verbatim}")) { + verbatim = false; + } + } + + if (!verbatim) { + if (char_types) char_types[i] = SYNTAX_KEYWORD; + for (++i; i < line_len; ++i) { + if (is_tex_ident(line[i])) { + if (char_types) char_types[i] = SYNTAX_KEYWORD; + } else { + --i; + break; + } + } + verbatim = new_verbatim; + } + } else if (!verbatim) { + // something like \\, \%, etc. + if (char_types) char_types[i] = SYNTAX_KEYWORD; + ++i; + if (char_types) char_types[i] = SYNTAX_KEYWORD; + } + } + } + break; + case '%': + // comment + if (!verbatim) { + for (; i < line_len; ++i) { + if (char_types) + char_types[i] = SYNTAX_COMMENT; + } + } + break; + case '&': + // table/matrix/etc. separator + if (char_types && !verbatim) + char_types[i] = SYNTAX_BUILTIN; + break; + case '$': + if (!verbatim) { + if (!dollar && has_1_char && line[i+1] == '$') { + // $$ + if (dollardollar) { + if (char_types) char_types[i] = SYNTAX_MATH; + ++i; + if (char_types) char_types[i] = SYNTAX_MATH; + dollardollar = false; + } else { + if (char_types) char_types[i] = SYNTAX_MATH; + dollardollar = true; + } + } else if (!dollardollar) { + // single $ + if (dollar) { + dollar = false; + } else { + dollar = true; + if (char_types) char_types[i] = SYNTAX_MATH; + } + } + } + break; + } + } + + *state = (SyntaxState)( + (dollar * SYNTAX_STATE_TEX_DOLLAR) + | (dollardollar * SYNTAX_STATE_TEX_DOLLARDOLLAR) + | (verbatim * SYNTAX_STATE_TEX_VERBATIM) + ); +} + // This is the main syntax highlighting function. It will determine which colors to use for each character. // Rather than returning colors, it returns a character type (e.g. comment) which can be converted to a color. // To highlight multiple lines, start out with a zeroed SyntaxState, and pass a pointer to it each time. @@ -574,6 +679,9 @@ void syntax_highlight(SyntaxState *state, Language lang, char32_t *line, u32 lin case LANG_PYTHON: syntax_highlight_python(state, line, line_len, char_types); break; + case LANG_TEX: + syntax_highlight_tex(state, line, line_len, char_types); + break; case LANG_COUNT: assert(0); break; } } diff --git a/ted.cfg b/ted.cfg index 8d6b251..1e01cc3 100644 --- a/ted.cfg +++ b/ted.cfg @@ -207,3 +207,4 @@ C = .c, .h C++ = .cpp, .hpp, .C, .H, .cxx, .hxx, .cc, .hh Rust = .rs Python = .py +Tex = .tex diff --git a/ted.h b/ted.h index 5410398..66648f5 100644 --- a/ted.h +++ b/ted.h @@ -27,6 +27,12 @@ enum { SYNTAX_STATE_PYTHON_STRING_DBL_QUOTED = 0x02u, // is this a """ string, as opposed to a ''' string? }; +enum { + SYNTAX_STATE_TEX_DOLLAR = 0x01u, // inside math $ ... $ + SYNTAX_STATE_TEX_DOLLARDOLLAR = 0x02u, // inside math $$ ... $$ + SYNTAX_STATE_TEX_VERBATIM = 0x04u, // inside \begin{verbatim} ... \end{verbatim} +}; + typedef u8 SyntaxState; ENUM_U16 { @@ -35,6 +41,7 @@ ENUM_U16 { LANG_CPP, LANG_RUST, LANG_PYTHON, + LANG_TEX, LANG_COUNT } ENUM_U16_END(Language); @@ -49,8 +56,11 @@ static LanguageName const language_names[] = { {LANG_CPP, "C++"}, {LANG_RUST, "Rust"}, {LANG_PYTHON, "Python"}, + {LANG_TEX, "Tex"}, }; +static_assert_if_possible(arr_count(language_names) == LANG_COUNT) + ENUM_U8 { SYNTAX_NORMAL, SYNTAX_KEYWORD, @@ -62,6 +72,8 @@ ENUM_U8 { SYNTAX_CONSTANT, } ENUM_U8_END(SyntaxCharType); +#define SYNTAX_MATH SYNTAX_STRING // for tex + typedef struct { float cursor_blink_time_on, cursor_blink_time_off; u32 colors[COLOR_COUNT]; -- cgit v1.2.3