summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLeo Tenenbaum <pommicket@gmail.com>2021-01-31 20:39:36 -0500
committerLeo Tenenbaum <pommicket@gmail.com>2021-01-31 20:39:36 -0500
commit442e0b92a09689cd75e2d20608572a279bef5390 (patch)
tree67d5e2a79ffcb1413ed10b49f70a185fef55ef68
parent68b3e3928e1bd05ebbb56810ae8b7a68ff5f12b0 (diff)
more syntax highlighting
-rw-r--r--buffer.c4
-rw-r--r--string32.c4
-rw-r--r--syntax.c71
-rw-r--r--ted.cfg1
4 files changed, 73 insertions, 7 deletions
diff --git a/buffer.c b/buffer.c
index 6f1b607..41dc216 100644
--- a/buffer.c
+++ b/buffer.c
@@ -1974,6 +1974,10 @@ void buffer_render(TextBuffer *buffer, Rect r) {
SyntaxState syntax_state = {0};
// dynamic array of character types, to be filled by syntax_highlight
SyntaxCharType *char_types = NULL;
+ for (u32 line_idx = 0; line_idx < start_line; ++line_idx) {
+ Line *line = &lines[line_idx];
+ syntax_highlight(&syntax_state, LANG_C, line->str, line->len, NULL);
+ }
for (u32 line_idx = start_line; line_idx < nlines; ++line_idx) {
Line *line = &lines[line_idx];
diff --git a/string32.c b/string32.c
index 381bf15..03f96c4 100644
--- a/string32.c
+++ b/string32.c
@@ -123,3 +123,7 @@ bool is32_alnum(char32_t c) {
return c <= WINT_MAX && iswalnum((wint_t)c);
}
+// could this character appear in a C-style identifier?
+bool is32_ident(char32_t c) {
+ return c <= WINT_MAX && (iswalnum((wint_t)c) || c == '_');
+}
diff --git a/syntax.c b/syntax.c
index 50e87d9..9a8ba75 100644
--- a/syntax.c
+++ b/syntax.c
@@ -52,6 +52,8 @@ static void syntax_highlight_c(SyntaxStateC *state, char32_t *line, u32 line_len
// are there 1/2 characters left in the line?
bool has_1_char = i + 1 < line_len;
+ bool dealt_with = false;
+
switch (line[i]) {
case '#':
if (!in_single_line_comment && !in_multi_line_comment)
@@ -87,17 +89,72 @@ static void syntax_highlight_c(SyntaxStateC *state, char32_t *line, u32 line_len
if (in_preprocessor && in_string)
in_string = false;
break;
+ default: {
+ // split keywords by starting letter to speed this up
+ static char const *const all_keywords[][10] = {
+ ['a'] = {"auto"},
+ ['b'] = {"break", "bool"},
+ ['c'] = {"case", "char", "const", "continue", "char8_t", "char16_t", "char32_t"},
+ ['d'] = {"default", "do", "double"},
+ ['e'] = {"else", "enum", "extern"},
+ ['f'] = {"float", "for"},
+ ['g'] = {"goto"},
+ ['i'] = {"if", "inline", "int", "int8_t", "int16_t", "int32_t", "int64_t"},
+ ['l'] = {"long"},
+ ['r'] = {"register", "restrict", "return"},
+ ['s'] = {"short", "signed", "sizeof", "static", "struct", "switch"},
+ ['t'] = {"typedef"},
+ ['u'] = {"union", "unsigned", "uint8_t", "uint16_t", "uint32_t", "uint64_t"},
+ ['v'] = {"void", "volatile"},
+ ['w'] = {"while", "wchar_t", "wint_t"},
+ ['_'] = {"_Alignas", "_Alignof", "_Atomic", "_Bool", "_Complex", "_Generic",
+ "_Imaginary", "_Noreturn", "_Static_assert", "_Thread_local"},
+ };
+
+ char const *const *keywords = line[i] < arr_count(all_keywords) ? all_keywords[line[i]] : NULL;
+ if (char_types && keywords && !in_single_line_comment && !in_multi_line_comment && !in_string && !in_preprocessor) {
+ // keywords don't matter for advancing the state
+ for (size_t k = 0; keywords[k]; ++k) {
+ bool matches = true;
+ char const *keyword = keywords[k];
+ size_t keyword_len = strlen(keyword);
+ if (i + keyword_len <= line_len) {
+ // make sure we don't catch "print" as containing the keyword "int"
+ bool separated = (i == 0 || !is32_ident(line[i-1])) && (i + keyword_len == line_len || !is32_ident(line[i + keyword_len]));
+ if (separated) {
+ char32_t *p = &line[i];
+ // check if `p` starts with `keyword`
+ for (char const *q = keyword; *q; ++p, ++q) {
+ if (*p != (char32_t)*q) {
+ matches = false;
+ break;
+ }
+ }
+ if (matches) {
+ for (size_t c = 0; keyword[c]; ++c) {
+ char_types[i++] = SYNTAX_KEYWORD;
+ }
+ --i; // we'll increment i from the for loop
+ dealt_with = true;
+ break;
+ }
+ }
+ }
+ }
+
+ }
+ } break;
}
if (line[i] != '\\') backslashes = 0;
- if (in_single_line_comment || in_multi_line_comment_now)
- type = SYNTAX_COMMENT;
- else if (in_string_now)
- type = SYNTAX_STRING;
- else if (in_preprocessor)
- type = SYNTAX_PREPROCESSOR;
+ if (char_types && !dealt_with) {
+ if (in_single_line_comment || in_multi_line_comment_now)
+ type = SYNTAX_COMMENT;
+ else if (in_string_now)
+ type = SYNTAX_STRING;
+ else if (in_preprocessor)
+ type = SYNTAX_PREPROCESSOR;
- if (char_types) {
char_types[i] = type;
}
}
diff --git a/ted.cfg b/ted.cfg
index 79c9e16..6fe6a8c 100644
--- a/ted.cfg
+++ b/ted.cfg
@@ -126,6 +126,7 @@ yes = #afa
no = #faa
cancel = #ffa
+keyword = #0c0
preprocessor = #77f
string = #f77
character = #f7f