diff options
author | Leo Tenenbaum <pommicket@gmail.com> | 2021-04-20 13:59:10 -0400 |
---|---|---|
committer | Leo Tenenbaum <pommicket@gmail.com> | 2021-04-20 13:59:10 -0400 |
commit | f5bb2118eadc20402e69d575c9be8a02cf673caa (patch) | |
tree | e04d4853e6cdd1a8cf9415b35c800f0c534f9b3d | |
parent | da61efabb1b28c5500824a560c960a720a628de0 (diff) |
HTML syntax highlighting
-rw-r--r-- | Makefile | 3 | ||||
-rw-r--r-- | README.md | 2 | ||||
-rw-r--r-- | keywords.h | 24 | ||||
-rwxr-xr-x | keywords.py | 35 | ||||
-rw-r--r-- | main.c | 5 | ||||
-rw-r--r-- | syntax.c | 169 | ||||
-rw-r--r-- | ted.cfg | 3 | ||||
-rw-r--r-- | ted.h | 6 |
8 files changed, 224 insertions, 23 deletions
@@ -31,7 +31,8 @@ libpcre2-32.a: pcre2-10.36.zip unzip pcre2-10.36.zip cd pcre2-10.36 && cmake -DPCRE2_BUILD_PCRE2_32=ON . && $(MAKE) -j8 cp pcre2-10.36/libpcre2-32.a ./ - +keywords.h: keywords.py + ./keywords.py ted.deb: release rm -rf /tmp/ted mkdir -p /tmp/ted/DEBIAN @@ -23,7 +23,7 @@ in other editors. - Multiple tabs, each with a different file - Split screen (default: Ctrl+\\, Ctrl+Shift+\\) - Auto-indent -- Syntax highlighting for C, C++, Rust, Python, and LaTeX. +- Syntax highlighting for C, C++, Rust, Python, LaTeX, and Markdown. - Find and replace (with regular expressions!) - Run build command (F4), go to errors - Run any shell command (Ctrl+!) @@ -179,3 +179,27 @@ static Keyword const *const syntax_all_keywords_python[] = { ['A'] = syntax_keywords_python_A, ['B'] = syntax_keywords_python_B, ['C'] = syntax_keywords_python_C, ['D'] = syntax_keywords_python_D, ['E'] = syntax_keywords_python_E, ['F'] = syntax_keywords_python_F, ['G'] = syntax_keywords_python_G, ['I'] = syntax_keywords_python_I, ['K'] = syntax_keywords_python_K, ['L'] = syntax_keywords_python_L, ['M'] = syntax_keywords_python_M, ['N'] = syntax_keywords_python_N, ['O'] = syntax_keywords_python_O, ['P'] = syntax_keywords_python_P, ['R'] = syntax_keywords_python_R, ['S'] = syntax_keywords_python_S, ['T'] = syntax_keywords_python_T, ['U'] = syntax_keywords_python_U, ['V'] = syntax_keywords_python_V, ['W'] = syntax_keywords_python_W, ['Z'] = syntax_keywords_python_Z, ['_'] = syntax_keywords_python__, ['a'] = syntax_keywords_python_a, ['b'] = syntax_keywords_python_b, ['c'] = syntax_keywords_python_c, ['d'] = syntax_keywords_python_d, ['e'] = syntax_keywords_python_e, ['f'] = syntax_keywords_python_f, ['g'] = syntax_keywords_python_g, ['h'] = syntax_keywords_python_h, ['i'] = syntax_keywords_python_i, ['l'] = syntax_keywords_python_l, ['m'] = syntax_keywords_python_m, ['n'] = syntax_keywords_python_n, ['o'] = syntax_keywords_python_o, ['p'] = syntax_keywords_python_p, ['q'] = syntax_keywords_python_q, ['r'] = syntax_keywords_python_r, ['s'] = syntax_keywords_python_s, ['t'] = syntax_keywords_python_t, ['v'] = syntax_keywords_python_v, ['w'] = syntax_keywords_python_w, ['y'] = syntax_keywords_python_y, ['z'] = syntax_keywords_python_z }; +static Keyword const syntax_keywords_html_a[11] = {{"accept-charset=", SYNTAX_BUILTIN},{"accept=", SYNTAX_BUILTIN},{"accesskey=", SYNTAX_BUILTIN},{"action=", SYNTAX_BUILTIN},{"align=", SYNTAX_BUILTIN},{"alt=", SYNTAX_BUILTIN},{"async=", SYNTAX_BUILTIN},{"autocomplete=", SYNTAX_BUILTIN},{"autofocus=", SYNTAX_BUILTIN},{"autoplay=", SYNTAX_BUILTIN}}; +static Keyword const syntax_keywords_html_b[3] = {{"bgcolor=", SYNTAX_BUILTIN},{"border=", SYNTAX_BUILTIN}}; +static Keyword const syntax_keywords_html_c[12] = {{"charset=", SYNTAX_BUILTIN},{"checked=", SYNTAX_BUILTIN},{"cite=", SYNTAX_BUILTIN},{"class=", SYNTAX_BUILTIN},{"color=", SYNTAX_BUILTIN},{"cols=", SYNTAX_BUILTIN},{"colspan=", SYNTAX_BUILTIN},{"content=", SYNTAX_BUILTIN},{"contenteditable=", SYNTAX_BUILTIN},{"controls=", SYNTAX_BUILTIN},{"coords=", SYNTAX_BUILTIN}}; +static Keyword const syntax_keywords_html_d[10] = {{"data=", SYNTAX_BUILTIN},{"datetime=", SYNTAX_BUILTIN},{"default=", SYNTAX_BUILTIN},{"defer=", SYNTAX_BUILTIN},{"dir=", SYNTAX_BUILTIN},{"dirname=", SYNTAX_BUILTIN},{"disabled=", SYNTAX_BUILTIN},{"download=", SYNTAX_BUILTIN},{"draggable=", SYNTAX_BUILTIN}}; +static Keyword const syntax_keywords_html_e[2] = {{"enctype=", SYNTAX_BUILTIN}}; +static Keyword const syntax_keywords_html_f[4] = {{"for=", SYNTAX_BUILTIN},{"form=", SYNTAX_BUILTIN},{"formaction=", SYNTAX_BUILTIN}}; +static Keyword const syntax_keywords_html_h[8] = {{"headers=", SYNTAX_BUILTIN},{"height=", SYNTAX_BUILTIN},{"hidden=", SYNTAX_BUILTIN},{"high=", SYNTAX_BUILTIN},{"href=", SYNTAX_BUILTIN},{"hreflang=", SYNTAX_BUILTIN},{"http-equiv=", SYNTAX_BUILTIN}}; +static Keyword const syntax_keywords_html_i[3] = {{"id=", SYNTAX_BUILTIN},{"ismap=", SYNTAX_BUILTIN}}; +static Keyword const syntax_keywords_html_k[2] = {{"kind=", SYNTAX_BUILTIN}}; +static Keyword const syntax_keywords_html_l[6] = {{"label=", SYNTAX_BUILTIN},{"lang=", SYNTAX_BUILTIN},{"list=", SYNTAX_BUILTIN},{"loop=", SYNTAX_BUILTIN},{"low=", SYNTAX_BUILTIN}}; +static Keyword const syntax_keywords_html_m[8] = {{"max=", SYNTAX_BUILTIN},{"maxlength=", SYNTAX_BUILTIN},{"media=", SYNTAX_BUILTIN},{"method=", SYNTAX_BUILTIN},{"min=", SYNTAX_BUILTIN},{"multiple=", SYNTAX_BUILTIN},{"muted=", SYNTAX_BUILTIN}}; +static Keyword const syntax_keywords_html_n[3] = {{"name=", SYNTAX_BUILTIN},{"novalidate=", SYNTAX_BUILTIN}}; +static Keyword const syntax_keywords_html_o[73] = {{"onabort=", SYNTAX_BUILTIN},{"onafterprint=", SYNTAX_BUILTIN},{"onbeforeprint=", SYNTAX_BUILTIN},{"onbeforeunload=", SYNTAX_BUILTIN},{"onblur=", SYNTAX_BUILTIN},{"oncanplay=", SYNTAX_BUILTIN},{"oncanplaythrough=", SYNTAX_BUILTIN},{"onchange=", SYNTAX_BUILTIN},{"onclick=", SYNTAX_BUILTIN},{"oncontextmenu=", SYNTAX_BUILTIN},{"oncopy=", SYNTAX_BUILTIN},{"oncuechange=", SYNTAX_BUILTIN},{"oncut=", SYNTAX_BUILTIN},{"ondblclick=", SYNTAX_BUILTIN},{"ondrag=", SYNTAX_BUILTIN},{"ondragend=", SYNTAX_BUILTIN},{"ondragenter=", SYNTAX_BUILTIN},{"ondragleave=", SYNTAX_BUILTIN},{"ondragover=", SYNTAX_BUILTIN},{"ondragstart=", SYNTAX_BUILTIN},{"ondrop=", SYNTAX_BUILTIN},{"ondurationchange=", SYNTAX_BUILTIN},{"onemptied=", SYNTAX_BUILTIN},{"onended=", SYNTAX_BUILTIN},{"onerror=", SYNTAX_BUILTIN},{"onfocus=", SYNTAX_BUILTIN},{"onhashchange=", SYNTAX_BUILTIN},{"oninput=", SYNTAX_BUILTIN},{"oninvalid=", SYNTAX_BUILTIN},{"onkeydown=", SYNTAX_BUILTIN},{"onkeypress=", SYNTAX_BUILTIN},{"onkeyup=", SYNTAX_BUILTIN},{"onload=", SYNTAX_BUILTIN},{"onloadeddata=", SYNTAX_BUILTIN},{"onloadedmetadata=", SYNTAX_BUILTIN},{"onloadstart=", SYNTAX_BUILTIN},{"onmousedown=", SYNTAX_BUILTIN},{"onmousemove=", SYNTAX_BUILTIN},{"onmouseout=", SYNTAX_BUILTIN},{"onmouseover=", SYNTAX_BUILTIN},{"onmouseup=", SYNTAX_BUILTIN},{"onmousewheel=", SYNTAX_BUILTIN},{"onoffline=", SYNTAX_BUILTIN},{"ononline=", SYNTAX_BUILTIN},{"onpagehide=", SYNTAX_BUILTIN},{"onpageshow=", SYNTAX_BUILTIN},{"onpaste=", SYNTAX_BUILTIN},{"onpause=", SYNTAX_BUILTIN},{"onplay=", SYNTAX_BUILTIN},{"onplaying=", SYNTAX_BUILTIN},{"onpopstate=", SYNTAX_BUILTIN},{"onprogress=", SYNTAX_BUILTIN},{"onratechange=", SYNTAX_BUILTIN},{"onreset=", SYNTAX_BUILTIN},{"onresize=", SYNTAX_BUILTIN},{"onscroll=", SYNTAX_BUILTIN},{"onsearch=", SYNTAX_BUILTIN},{"onseeked=", SYNTAX_BUILTIN},{"onseeking=", SYNTAX_BUILTIN},{"onselect=", SYNTAX_BUILTIN},{"onstalled=", SYNTAX_BUILTIN},{"onstorage=", SYNTAX_BUILTIN},{"onsubmit=", SYNTAX_BUILTIN},{"onsuspend=", SYNTAX_BUILTIN},{"ontimeupdate=", SYNTAX_BUILTIN},{"ontoggle=", SYNTAX_BUILTIN},{"onunload=", SYNTAX_BUILTIN},{"onvolumechange=", SYNTAX_BUILTIN},{"onwaiting=", SYNTAX_BUILTIN},{"onwheel=", SYNTAX_BUILTIN},{"open=", SYNTAX_BUILTIN},{"optimum=", SYNTAX_BUILTIN}}; +static Keyword const syntax_keywords_html_p[5] = {{"pattern=", SYNTAX_BUILTIN},{"placeholder=", SYNTAX_BUILTIN},{"poster=", SYNTAX_BUILTIN},{"preload=", SYNTAX_BUILTIN}}; +static Keyword const syntax_keywords_html_r[7] = {{"readonly=", SYNTAX_BUILTIN},{"rel=", SYNTAX_BUILTIN},{"required=", SYNTAX_BUILTIN},{"reversed=", SYNTAX_BUILTIN},{"rows=", SYNTAX_BUILTIN},{"rowspan=", SYNTAX_BUILTIN}}; +static Keyword const syntax_keywords_html_s[16] = {{"sandbox=", SYNTAX_BUILTIN},{"scope=", SYNTAX_BUILTIN},{"selected=", SYNTAX_BUILTIN},{"shape=", SYNTAX_BUILTIN},{"size=", SYNTAX_BUILTIN},{"sizes=", SYNTAX_BUILTIN},{"span=", SYNTAX_BUILTIN},{"spellcheck=", SYNTAX_BUILTIN},{"src=", SYNTAX_BUILTIN},{"srcdoc=", SYNTAX_BUILTIN},{"srclang=", SYNTAX_BUILTIN},{"srcset=", SYNTAX_BUILTIN},{"start=", SYNTAX_BUILTIN},{"step=", SYNTAX_BUILTIN},{"style=", SYNTAX_BUILTIN}}; +static Keyword const syntax_keywords_html_t[6] = {{"tabindex=", SYNTAX_BUILTIN},{"target=", SYNTAX_BUILTIN},{"title=", SYNTAX_BUILTIN},{"translate=", SYNTAX_BUILTIN},{"type=", SYNTAX_BUILTIN}}; +static Keyword const syntax_keywords_html_u[2] = {{"usemap=", SYNTAX_BUILTIN}}; +static Keyword const syntax_keywords_html_v[2] = {{"value=", SYNTAX_BUILTIN}}; +static Keyword const syntax_keywords_html_w[3] = {{"width=", SYNTAX_BUILTIN},{"wrap=", SYNTAX_BUILTIN}}; +static Keyword const *const syntax_all_keywords_html[] = { + ['a'] = syntax_keywords_html_a, ['b'] = syntax_keywords_html_b, ['c'] = syntax_keywords_html_c, ['d'] = syntax_keywords_html_d, ['e'] = syntax_keywords_html_e, ['f'] = syntax_keywords_html_f, ['h'] = syntax_keywords_html_h, ['i'] = syntax_keywords_html_i, ['k'] = syntax_keywords_html_k, ['l'] = syntax_keywords_html_l, ['m'] = syntax_keywords_html_m, ['n'] = syntax_keywords_html_n, ['o'] = syntax_keywords_html_o, ['p'] = syntax_keywords_html_p, ['r'] = syntax_keywords_html_r, ['s'] = syntax_keywords_html_s, ['t'] = syntax_keywords_html_t, ['u'] = syntax_keywords_html_u, ['v'] = syntax_keywords_html_v, ['w'] = syntax_keywords_html_w +}; + diff --git a/keywords.py b/keywords.py index 951f3b2..c8f7e9c 100755 --- a/keywords.py +++ b/keywords.py @@ -216,6 +216,40 @@ builtins_python = ['ArithmeticError', 'AssertionError', 'AttributeError', 'BaseE 'vars', 'zip', ] +attributes_html = [ + 'accept','accept-charset','accesskey','action','align','alt','async', + 'autocomplete','autofocus','autoplay','bgcolor','border','charset', + 'checked','cite','class','color','cols','colspan','content', + 'contenteditable','controls','coords','data','datetime', + 'default','defer','dir','dirname','disabled','download','draggable', + 'enctype','for','form','formaction','headers','height','hidden','high', + 'href','hreflang','http-equiv','id','ismap','kind','label','lang','list', + 'loop','low','max','maxlength','media','method','min','multiple','muted', + 'name','novalidate','onabort','onafterprint','onbeforeprint', + 'onbeforeunload','onblur','oncanplay','oncanplaythrough','onchange', + 'onclick','oncontextmenu','oncopy','oncuechange','oncut','ondblclick', + 'ondrag','ondragend','ondragenter','ondragleave','ondragover','ondragstart', + 'ondrop','ondurationchange','onemptied','onended','onerror','onfocus', + 'onhashchange','oninput','oninvalid','onkeydown','onkeypress','onkeyup', + 'onload','onloadeddata','onloadedmetadata','onloadstart','onmousedown', + 'onmousemove','onmouseout','onmouseover','onmouseup','onmousewheel','onoffline', + 'ononline','onpagehide','onpageshow','onpaste','onpause','onplay','onplaying', + 'onpopstate','onprogress','onratechange','onreset','onresize','onscroll', + 'onsearch','onseeked','onseeking','onselect','onstalled','onstorage', + 'onsubmit','onsuspend','ontimeupdate','ontoggle','onunload','onvolumechange', + 'onwaiting','onwheel','open','optimum','pattern','placeholder','poster', + 'preload','readonly','rel','required','reversed','rows','rowspan','sandbox', + 'scope','selected','shape','size','sizes','span','spellcheck','src','srcdoc', + 'srclang','srcset','start','step','style','tabindex','target', + 'title','translate','type','usemap','value','width','wrap' +] + +assert len(attributes_html) == len(set(attributes_html)) + +builtins_html = [] +for attr in attributes_html: + builtins_html.append(attr + '=') + file = open('keywords.h', 'w') file.write('''// keywords for all languages ted supports // This file was auto-generated by keywords.py @@ -239,4 +273,5 @@ cpp_things.remove((SYNTAX_BUILTIN, 'wchar_t')) output_keywords(file, cpp_things, 'cpp') output_keywords(file, label(keywords_rust, SYNTAX_KEYWORD) + label(builtins_rust, SYNTAX_BUILTIN) + label(constants_rust, SYNTAX_CONSTANT), 'rust') output_keywords(file, label(keywords_python, SYNTAX_KEYWORD) + label(builtins_python, SYNTAX_BUILTIN), 'python') +output_keywords(file, label(builtins_html, SYNTAX_BUILTIN), 'html') file.close() @@ -1,4 +1,7 @@ -// HTML highlighting +// Highlighting for: +// - Config (cfg) +// :set-language command; -1 = default language for this extension + #include "base.h" no_warn_start #if _WIN32 @@ -21,6 +21,7 @@ char const *language_comment_start(Language l) { case LANG_CPP: return "// "; case LANG_PYTHON: return "# "; case LANG_TEX: return "% "; + case LANG_HTML: return "<!-- "; case LANG_NONE: case LANG_MARKDOWN: case LANG_COUNT: @@ -34,6 +35,8 @@ char const *language_comment_end(Language l) { switch (l) { case LANG_C: return " */"; + case LANG_HTML: + return " -->"; default: return ""; } @@ -54,10 +57,10 @@ ColorSetting syntax_char_type_to_color(SyntaxCharType t) { return COLOR_TEXT; } -static inline bool syntax_keyword_matches(char32_t *text, size_t len, char const *keyword) { +static inline bool syntax_keyword_matches(char32_t const *text, size_t len, char const *keyword) { if (len == strlen(keyword)) { bool matches = true; - char32_t *p = text; + char32_t const *p = text; // check if `p` starts with `keyword` for (char const *q = keyword; *q; ++p, ++q) { if (*p != (char32_t)*q) { @@ -98,7 +101,7 @@ bool syntax_is_opening_bracket(Language lang, char32_t c) { } // lookup the given string in the keywords table -static Keyword const *syntax_keyword_lookup(Keyword const *const *all_keywords, size_t n_all_keywords, char32_t *str, size_t len) { +static Keyword const *syntax_keyword_lookup(Keyword const *const *all_keywords, size_t n_all_keywords, char32_t const *str, size_t len) { if (!len) return NULL; if (str[0] >= n_all_keywords) return NULL; @@ -115,7 +118,7 @@ static Keyword const *syntax_keyword_lookup(Keyword const *const *all_keywords, } // does i continue the number literal from i-1 -static inline bool syntax_number_continues(char32_t *line, u32 line_len, u32 i) { +static inline bool syntax_number_continues(char32_t const *line, u32 line_len, u32 i) { if (line[i] == '.' && ((i && line[i-1] == '.') || (i < line_len-1 && line[i+1] == '.'))) return false; // can't have two .s in a row return (line[i] < CHAR_MAX && @@ -123,19 +126,34 @@ static inline bool syntax_number_continues(char32_t *line, u32 line_len, u32 i) || (i && line[i-1] == 'e' && (line[i] == '+' || line[i] == '-')))); } +static bool is_keyword(Language lang, char32_t c) { + if (is32_ident(c)) return true; + switch (lang) { + case LANG_RUST: + // Rust builtin macros + if (c == '!') + return true; + break; + case LANG_HTML: + if (c == '-' || c == '=') + return true; + break; + default: break; + } + return false; +} + // find how long this keyword would be (if this is a keyword) -static inline u32 syntax_keyword_len(Language lang, char32_t *line, u32 i, u32 line_len) { +static inline u32 syntax_keyword_len(Language lang, char32_t const *line, u32 i, u32 line_len) { u32 keyword_end; - for (keyword_end = i; - keyword_end < line_len - && (is32_ident(line[keyword_end]) - || (lang == LANG_RUST && line[keyword_end] == '!')) // for rust builtin macros - ; ++keyword_end); + for (keyword_end = i; keyword_end < line_len; ++keyword_end) { + if (!is_keyword(lang, line[keyword_end])) + break; + } return keyword_end - i; } - -static void syntax_highlight_c_cpp(SyntaxState *state_ptr, bool cpp, char32_t *line, u32 line_len, SyntaxCharType *char_types) { +static void syntax_highlight_c_cpp(SyntaxState *state_ptr, bool cpp, char32_t const *line, u32 line_len, SyntaxCharType *char_types) { SyntaxState state = *state_ptr; bool in_preprocessor = (state & SYNTAX_STATE_CPP_PREPROCESSOR) != 0; bool in_string = (state & SYNTAX_STATE_CPP_STRING) != 0; @@ -293,7 +311,7 @@ static void syntax_highlight_c_cpp(SyntaxState *state_ptr, bool cpp, char32_t *l ); } -static void syntax_highlight_rust(SyntaxState *state, char32_t *line, u32 line_len, SyntaxCharType *char_types) { +static void syntax_highlight_rust(SyntaxState *state, char32_t const *line, u32 line_len, SyntaxCharType *char_types) { u32 comment_depth = (((u32)*state & SYNTAX_STATE_RUST_COMMENT_DEPTH_MASK) / SYNTAX_STATE_RUST_COMMENT_DEPTH_MUL); bool in_string = (*state & SYNTAX_STATE_RUST_STRING) != 0; bool string_is_raw = (*state & SYNTAX_STATE_RUST_STRING_IS_RAW) != 0; @@ -446,7 +464,7 @@ static void syntax_highlight_rust(SyntaxState *state, char32_t *line, u32 line_l ); } -static void syntax_highlight_python(SyntaxState *state, char32_t *line, u32 line_len, SyntaxCharType *char_types) { +static void syntax_highlight_python(SyntaxState *state, char32_t const *line, u32 line_len, SyntaxCharType *char_types) { (void)state; bool in_string = (*state & SYNTAX_STATE_PYTHON_STRING) != 0; bool string_is_dbl_quoted = (*state & SYNTAX_STATE_PYTHON_STRING_DBL_QUOTED) != 0; @@ -559,7 +577,7 @@ static bool is_tex_ident(char32_t c) { return is32_ident(c) && !is32_digit(c); } -static void syntax_highlight_tex(SyntaxState *state, char32_t *line, u32 line_len, SyntaxCharType *char_types) { +static void syntax_highlight_tex(SyntaxState *state, char32_t const *line, u32 line_len, SyntaxCharType *char_types) { bool dollar = (*state & SYNTAX_STATE_TEX_DOLLAR) != 0; bool dollardollar = (*state & SYNTAX_STATE_TEX_DOLLARDOLLAR) != 0; bool verbatim = (*state & SYNTAX_STATE_TEX_VERBATIM) != 0; @@ -577,7 +595,7 @@ static void syntax_highlight_tex(SyntaxState *state, char32_t *line, u32 line_le if (is_tex_ident(line[i+1])) { // command, e.g. \begin String32 command_str = { - .str = line + i+1, + .str = (char32_t *)line + i+1, .len = line_len - (i+1), }; bool new_verbatim = false; @@ -658,7 +676,7 @@ static void syntax_highlight_tex(SyntaxState *state, char32_t *line, u32 line_le ); } -static void syntax_highlight_markdown(SyntaxState *state, char32_t *line, u32 line_len, SyntaxCharType *char_types) { +static void syntax_highlight_markdown(SyntaxState *state, char32_t const *line, u32 line_len, SyntaxCharType *char_types) { bool multiline_code = (*state & SYNTAX_STATE_MARKDOWN_CODE) != 0; *state = (multiline_code * SYNTAX_STATE_MARKDOWN_CODE); @@ -703,7 +721,7 @@ static void syntax_highlight_markdown(SyntaxState *state, char32_t *line, u32 li } String32 remains = { - .str = line + i, + .str = (char32_t *)line + i, .len = line_len - i }; if (!format_ending && str32_has_ascii_prefix(remains, "http")) { @@ -845,11 +863,121 @@ static void syntax_highlight_markdown(SyntaxState *state, char32_t *line, u32 li } +static bool is_html_tag_char(char32_t c) { + return c == '<' || c == '/' || c == '!' || is32_alnum(c); +} + +static void syntax_highlight_html(SyntaxState *state, char32_t const *line, u32 line_len, SyntaxCharType *char_types) { + bool comment = (*state & SYNTAX_STATE_HTML_COMMENT) != 0; + bool in_sgl_string = false; // 'string' + bool in_dbl_string = false; // "string" + int backslashes = 0; + for (u32 i = 0; i < line_len; ++i) { + String32 remains = { + .str = (char32_t *)line + i, + .len = line_len - i + }; + bool has_1_char = i + 1 < line_len; + + if (comment) { + if (str32_has_ascii_prefix(remains, "-->")) { + if (char_types) + memset(&char_types[i], SYNTAX_COMMENT, 3); + i += 2; + // (don't worry, comments can't nest in HTML) + comment = false; + } else { + if (char_types) char_types[i] = SYNTAX_COMMENT; + } + } else if (!in_sgl_string && !in_dbl_string && str32_has_ascii_prefix(remains, "<!--")) { + comment = true; + if (char_types) char_types[i] = SYNTAX_COMMENT; + } else if (in_sgl_string || in_dbl_string) { + if (char_types) + char_types[i] = SYNTAX_STRING; + if (line[i] == (in_sgl_string ? '\'' : '"') && backslashes % 2 == 0) + in_sgl_string = in_dbl_string = false; + } else { + if (char_types) char_types[i] = SYNTAX_NORMAL; + switch (line[i]) { + case '"': + if (i > 0 && line[i-1] == '=') { + in_dbl_string = true; + if (char_types) + char_types[i] = SYNTAX_STRING; + } + break; + case '\'': + if (i > 0 && line[i-1] == '=') { + in_sgl_string = true; + if (char_types) + char_types[i] = SYNTAX_STRING; + } + break; + case '&': + for (; i < line_len; ++i) { + if (char_types) + char_types[i] = SYNTAX_BUILTIN; + if (line[i] == ';') + break; + } + break; + case '<': + if (has_1_char && is_html_tag_char(line[i+1])) { + for (; i < line_len; ++i) { + if (!is_html_tag_char(line[i])) { + --i; + break; + } + if (char_types) + char_types[i] = SYNTAX_KEYWORD; + } + } + break; + case '>': + if (char_types) { + // we want to check if the character before it is a space so that + // > in JavaScript/PHP doesn't get picked up as a "tag". + if (i > 0 && !is32_space(line[i-1])) { + char_types[i] = SYNTAX_KEYWORD; + if (line[i-1] == '/') // tags like <thing+ /> + char_types[i-1] = SYNTAX_KEYWORD; + } + } + break; + default: + if (char_types) { + u32 keyword_len = syntax_keyword_len(LANG_HTML, line, i, line_len); + Keyword const *keyword = syntax_keyword_lookup(syntax_all_keywords_html, arr_count(syntax_all_keywords_html), + &line[i], keyword_len); + if (keyword) { + SyntaxCharType type = keyword->type; + for (size_t j = 0; j < keyword_len; ++j) { + char_types[i++] = type; + } + --i; // we'll increment i from the for loop + break; + } + } + break; + } + } + if (i < line_len) { + if (line[i] == '\\') + ++backslashes; + else + backslashes = 0; + } + } + + *state = (comment * SYNTAX_STATE_HTML_COMMENT); +} + // This is the main syntax highlighting function. It will determine which colors to use for each character. // Rather than returning colors, it returns a character type (e.g. comment) which can be converted to a color. // To highlight multiple lines, start out with a zeroed SyntaxState, and pass a pointer to it each time. // You can set char_types to NULL if you just want to advance the state, and don't care about the character types. -void syntax_highlight(SyntaxState *state, Language lang, char32_t *line, u32 line_len, SyntaxCharType *char_types) { +void syntax_highlight(SyntaxState *state, Language lang, char32_t const *line, u32 line_len, SyntaxCharType *char_types) { switch (lang) { case LANG_NONE: if (char_types) @@ -873,6 +1001,9 @@ void syntax_highlight(SyntaxState *state, Language lang, char32_t *line, u32 lin case LANG_MARKDOWN: syntax_highlight_markdown(state, line, line_len, char_types); break; + case LANG_HTML: + syntax_highlight_html(state, line, line_len, char_types); + break; case LANG_COUNT: assert(0); break; } } @@ -203,9 +203,10 @@ cursor-line-number = #ddf line-numbers-separator = #fff3 [extensions] -C = .c, .h +C = .c, .h, .glsl C++ = .cpp, .hpp, .C, .H, .cxx, .hxx, .cc, .hh Rust = .rs Python = .py Tex = .tex Markdown = .md +HTML = .html, .php @@ -37,6 +37,10 @@ enum { SYNTAX_STATE_MARKDOWN_CODE = 0x01u, // inside ``` ``` code section }; +enum { + SYNTAX_STATE_HTML_COMMENT = 0x01u +}; + typedef u8 SyntaxState; ENUM_U16 { @@ -47,6 +51,7 @@ ENUM_U16 { LANG_PYTHON, LANG_TEX, LANG_MARKDOWN, + LANG_HTML, LANG_COUNT } ENUM_U16_END(Language); @@ -63,6 +68,7 @@ static LanguageName const language_names[] = { {LANG_PYTHON, "Python"}, {LANG_TEX, "Tex"}, {LANG_MARKDOWN, "Markdown"}, + {LANG_HTML, "HTML"}, }; static_assert_if_possible(arr_count(language_names) == LANG_COUNT) |