From 609cdb6f0cc9f6f5d86366fbf60364f463cba551 Mon Sep 17 00:00:00 2001 From: pommicket Date: Thu, 3 Nov 2022 12:16:38 -0400 Subject: multiline string highlighting for ted.cfg --- buffer.c | 16 ++++++++++++---- syntax.c | 56 ++++++++++++++++++++++++++++++++++++++++---------------- ted.cfg | 2 ++ ted.h | 14 ++++++++++---- 4 files changed, 64 insertions(+), 24 deletions(-) diff --git a/buffer.c b/buffer.c index 4a54ff8..6e7ba7e 100644 --- a/buffer.c +++ b/buffer.c @@ -253,8 +253,12 @@ Language buffer_language(TextBuffer *buffer) { return LANG_NONE; size_t filename_len = strlen(filename); + int match_score = 0; + Language match = LANG_NONE; + for (u16 l = 0; l < LANG_COUNT; ++l) { char const *extensions = settings->language_extensions[l]; + if (extensions) { // extensions is a string with commas separating each extension. size_t len = 0; @@ -262,14 +266,18 @@ Language buffer_language(TextBuffer *buffer) { if (*p == ',') ++p; // move past comma len = strcspn(p, ","); if (filename_len >= len && strncmp(&filename[filename_len - len], p, len) == 0) { - // found a match! - return (Language)l; + int score = (int)len; + if (score > match_score) { + // found a better match! + match_score = score; + match = l; + } } } } } - // no extensions matched - return LANG_NONE; + + return match; } // score is higher if context is closer match. diff --git a/syntax.c b/syntax.c index c84a77d..6912671 100644 --- a/syntax.c +++ b/syntax.c @@ -25,6 +25,7 @@ char const *language_comment_start(Language l) { case LANG_GO: return "// "; case LANG_CONFIG: + case LANG_TED_CFG: case LANG_PYTHON: return "# "; case LANG_TEX: @@ -1079,50 +1080,60 @@ static void syntax_highlight_html(SyntaxState *state, char32_t const *line, u32 *state = (comment * SYNTAX_STATE_HTML_COMMENT); } -static void syntax_highlight_config(SyntaxState *state, char32_t const *line, u32 line_len, SyntaxCharType *char_types) { - (void)state; +static void syntax_highlight_config(SyntaxState *state, char32_t const *line, u32 line_len, SyntaxCharType *char_types, bool is_ted_cfg) { + bool string = (*state & SYNTAX_STATE_TED_CFG_STRING) != 0; + if (line_len == 0) return; - if (!char_types) return; // there's no state for config files. - if (line[0] == '#') { - memset(char_types, SYNTAX_COMMENT, line_len); + if (!string && line[0] == '#') { + if (char_types) memset(char_types, SYNTAX_COMMENT, line_len); return; } - if (line[0] == '[' && line[line_len - 1] == ']') { - memset(char_types, SYNTAX_BUILTIN, line_len); + if (!string && line[0] == '[' && line[line_len - 1] == ']') { + if (char_types) memset(char_types, SYNTAX_BUILTIN, line_len); return; } - bool string = false; + + int backslashes = 0; + for (u32 i = 0; i < line_len; ++i) { - char_types[i] = string ? SYNTAX_STRING : SYNTAX_NORMAL; + if (char_types) + char_types[i] = string ? SYNTAX_STRING : SYNTAX_NORMAL; switch (line[i]) { case '"': - string = !string; - if (string) + if (string && backslashes % 2 == 0) { + string = false; + } else { + string = true; + } + if (char_types) char_types[i] = SYNTAX_STRING; break; case '#': // don't try highlighting the rest of the line. // for ted.cfg, this could be a color, but for other cfg files, // it might be a comment - memset(&char_types[i], 0, line_len - i); + if (char_types) + memset(&char_types[i], 0, line_len - i); i = line_len; break; case ANY_DIGIT: - if (i > 0 && !string) { + if (char_types && i > 0 && !string) { if (is32_ident(line[i-1]) // something like e5 || line[i-1] == '+') // key combinations, e.g. Alt+0 break; - while (i < line_len && syntax_number_continues(LANG_CONFIG, line, line_len, i)) + while (i < line_len && syntax_number_continues(LANG_CONFIG, line, line_len, i)) { char_types[i++] = SYNTAX_CONSTANT; + } } break; default: { + if (!char_types) + break; // don't care if (i == 0) // none of the keywords in syntax_all_keywords_config should appear at the start of the line break; if (is32_ident(line[i-1]) || line[i-1] == '-' || !is32_ident(line[i])) break; // can't be a keyword on its own. - u32 keyword_len = syntax_keyword_len(LANG_CONFIG, line, i, line_len); Keyword const *keyword = syntax_keyword_lookup(syntax_all_keywords_config, arr_count(syntax_all_keywords_config), &line[i], keyword_len); @@ -1136,6 +1147,16 @@ static void syntax_highlight_config(SyntaxState *state, char32_t const *line, u3 } } break; } + if (i < line_len) { + if (line[i] == '\\') + ++backslashes; + else + backslashes = 0; + } + } + + if (is_ted_cfg) { + *state = SYNTAX_STATE_TED_CFG_STRING * string; } } @@ -1587,7 +1608,10 @@ void syntax_highlight(SyntaxState *state, Language lang, char32_t const *line, u syntax_highlight_html(state, line, line_len, char_types); break; case LANG_CONFIG: - syntax_highlight_config(state, line, line_len, char_types); + syntax_highlight_config(state, line, line_len, char_types, false); + break; + case LANG_TED_CFG: + syntax_highlight_config(state, line, line_len, char_types, true); break; case LANG_JAVASCRIPT: syntax_highlight_javascript(state, line, line_len, char_types); diff --git a/ted.cfg b/ted.cfg index eef8098..c7a08a7 100644 --- a/ted.cfg +++ b/ted.cfg @@ -235,6 +235,8 @@ Tex = .tex Markdown = .md HTML = .html, .php, .xml, .xhtml, .iml Config = .cfg, .toml +# ted.cfg has its own syntax highlighting for multiline strings. +TedCfg = ted.cfg Javascript = .js Java = .java Go = .go diff --git a/ted.h b/ted.h index 8681ab6..5eebced 100644 --- a/ted.h +++ b/ted.h @@ -55,11 +55,15 @@ enum { SYNTAX_STATE_GO_MULTILINE_COMMENT = 0x02u }; +enum { + SYNTAX_STATE_TED_CFG_STRING = 0x01u, +}; + typedef u8 SyntaxState; // If you are adding new languages, DO NOT change the constant values -// of the previous languages. It will mess up config files! -ENUM_U16 { +// of the previous languages. It will mess up config files which use :set-language! +typedef enum { LANG_NONE = 0, LANG_C = 1, LANG_CPP = 2, @@ -68,12 +72,13 @@ ENUM_U16 { LANG_TEX = 5, LANG_MARKDOWN = 6, LANG_HTML = 7, - LANG_CONFIG = 8, // .cfg files, e.g. ted.cfg + LANG_CONFIG = 8, // .cfg files LANG_JAVASCRIPT = 9, LANG_JAVA = 10, LANG_GO = 11, + LANG_TED_CFG = 12, // like LANG_CONFIG, but with multiline strings. LANG_COUNT -} ENUM_U16_END(Language); +} Language; typedef struct { Language lang; @@ -93,6 +98,7 @@ static LanguageName const language_names[] = { {LANG_JAVASCRIPT, "Javascript"}, {LANG_JAVA, "Java"}, {LANG_GO, "Go"}, + {LANG_TED_CFG, "TedCfg"}, }; static_assert_if_possible(arr_count(language_names) == LANG_COUNT) -- cgit v1.2.3