From 35b1b0129e8791a9412173acf3756606d0cc397d Mon Sep 17 00:00:00 2001 From: pommicket Date: Fri, 22 Jul 2022 14:54:07 -0400 Subject: go syntax highlighting also :goto-line now clamps numbers < 1 and > nlines instead of rejecting them --- README.md | 4 +- keywords.h | 21 +++++++++ keywords.py | 24 ++++++++++ menu.c | 4 +- syntax.c | 147 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++- ted.cfg | 1 + ted.h | 7 +++ test.go | 18 ++++++++ 8 files changed, 222 insertions(+), 4 deletions(-) create mode 100644 test.go diff --git a/README.md b/README.md index da61dd5..bee145e 100644 --- a/README.md +++ b/README.md @@ -30,7 +30,7 @@ in other editors. - Multiple tabs, each with a different file - Split screen - Auto-indent -- Syntax highlighting for C, C++, HTML, Java, JavaScript, LaTeX, Markdown, Python, and Rust. +- Syntax highlighting for C, C++, Go, HTML, Java, JavaScript, LaTeX, Markdown, Python, and Rust. - Find and replace (with regular expressions!) - Run build command, go to errors - Run any shell command @@ -146,7 +146,7 @@ Then, open windows\_installer\\ted\\ted.sln, and build. 1.0r1 Windows-specific bugfixes, update to new version of PCRE2 2022 Jan 1 1.0r2 Various bugfixes involving closing tabs and windows 2022 Mar 26 1.0r3 Better TeX syntax highlighting, move to cursor on backspace/delete 2022 Jul 7 -1.1 Minor fixes, syntax highlighting for JavaScript and Java 2022 Jul 22 +1.1 Minor fixes, syntax highlighting for JavaScript, Java, and Go 2022 Jul 22 ## License diff --git a/keywords.h b/keywords.h index 723f8f6..6cf14f8 100644 --- a/keywords.h +++ b/keywords.h @@ -170,6 +170,27 @@ static Keyword const *const syntax_all_keywords_javascript[] = { ['A'] = syntax_keywords_javascript_A, ['B'] = syntax_keywords_javascript_B, ['D'] = syntax_keywords_javascript_D, ['E'] = syntax_keywords_javascript_E, ['F'] = syntax_keywords_javascript_F, ['G'] = syntax_keywords_javascript_G, ['I'] = syntax_keywords_javascript_I, ['J'] = syntax_keywords_javascript_J, ['M'] = syntax_keywords_javascript_M, ['N'] = syntax_keywords_javascript_N, ['O'] = syntax_keywords_javascript_O, ['P'] = syntax_keywords_javascript_P, ['R'] = syntax_keywords_javascript_R, ['S'] = syntax_keywords_javascript_S, ['T'] = syntax_keywords_javascript_T, ['U'] = syntax_keywords_javascript_U, ['W'] = syntax_keywords_javascript_W, ['a'] = syntax_keywords_javascript_a, ['b'] = syntax_keywords_javascript_b, ['c'] = syntax_keywords_javascript_c, ['d'] = syntax_keywords_javascript_d, ['e'] = syntax_keywords_javascript_e, ['f'] = syntax_keywords_javascript_f, ['g'] = syntax_keywords_javascript_g, ['i'] = syntax_keywords_javascript_i, ['l'] = syntax_keywords_javascript_l, ['n'] = syntax_keywords_javascript_n, ['p'] = syntax_keywords_javascript_p, ['r'] = syntax_keywords_javascript_r, ['s'] = syntax_keywords_javascript_s, ['t'] = syntax_keywords_javascript_t, ['u'] = syntax_keywords_javascript_u, ['v'] = syntax_keywords_javascript_v, ['w'] = syntax_keywords_javascript_w, ['y'] = syntax_keywords_javascript_y }; +static Keyword const syntax_keywords_go_a[2] = {{"append", SYNTAX_BUILTIN}}; +static Keyword const syntax_keywords_go_b[4] = {{"break", SYNTAX_KEYWORD},{"bool", SYNTAX_BUILTIN},{"byte", SYNTAX_BUILTIN}}; +static Keyword const syntax_keywords_go_c[12] = {{"case", SYNTAX_KEYWORD},{"chan", SYNTAX_KEYWORD},{"const", SYNTAX_KEYWORD},{"continue", SYNTAX_KEYWORD},{"cap", SYNTAX_BUILTIN},{"close", SYNTAX_BUILTIN},{"comparable", SYNTAX_BUILTIN},{"complex", SYNTAX_BUILTIN},{"complex128", SYNTAX_BUILTIN},{"complex64", SYNTAX_BUILTIN},{"copy", SYNTAX_BUILTIN}}; +static Keyword const syntax_keywords_go_d[4] = {{"default", SYNTAX_KEYWORD},{"defer", SYNTAX_KEYWORD},{"delete", SYNTAX_BUILTIN}}; +static Keyword const syntax_keywords_go_e[3] = {{"else", SYNTAX_KEYWORD},{"error", SYNTAX_BUILTIN}}; +static Keyword const syntax_keywords_go_f[7] = {{"fallthrough", SYNTAX_KEYWORD},{"for", SYNTAX_KEYWORD},{"func", SYNTAX_KEYWORD},{"false", SYNTAX_CONSTANT},{"float32", SYNTAX_BUILTIN},{"float64", SYNTAX_BUILTIN}}; +static Keyword const syntax_keywords_go_g[3] = {{"go", SYNTAX_KEYWORD},{"goto", SYNTAX_KEYWORD}}; +static Keyword const syntax_keywords_go_i[11] = {{"if", SYNTAX_KEYWORD},{"import", SYNTAX_KEYWORD},{"interface", SYNTAX_KEYWORD},{"iota", SYNTAX_CONSTANT},{"imag", SYNTAX_BUILTIN},{"int", SYNTAX_BUILTIN},{"int16", SYNTAX_BUILTIN},{"int32", SYNTAX_BUILTIN},{"int64", SYNTAX_BUILTIN},{"int8", SYNTAX_BUILTIN}}; +static Keyword const syntax_keywords_go_l[2] = {{"len", SYNTAX_BUILTIN}}; +static Keyword const syntax_keywords_go_m[3] = {{"map", SYNTAX_KEYWORD},{"make", SYNTAX_BUILTIN}}; +static Keyword const syntax_keywords_go_n[3] = {{"nil", SYNTAX_CONSTANT},{"new", SYNTAX_BUILTIN}}; +static Keyword const syntax_keywords_go_p[5] = {{"package", SYNTAX_KEYWORD},{"panic", SYNTAX_BUILTIN},{"print", SYNTAX_BUILTIN},{"println", SYNTAX_BUILTIN}}; +static Keyword const syntax_keywords_go_r[6] = {{"range", SYNTAX_KEYWORD},{"return", SYNTAX_KEYWORD},{"real", SYNTAX_BUILTIN},{"recover", SYNTAX_BUILTIN},{"rune", SYNTAX_BUILTIN}}; +static Keyword const syntax_keywords_go_s[5] = {{"select", SYNTAX_KEYWORD},{"struct", SYNTAX_KEYWORD},{"switch", SYNTAX_KEYWORD},{"string", SYNTAX_BUILTIN}}; +static Keyword const syntax_keywords_go_t[3] = {{"type", SYNTAX_KEYWORD},{"true", SYNTAX_CONSTANT}}; +static Keyword const syntax_keywords_go_u[7] = {{"uint", SYNTAX_BUILTIN},{"uint16", SYNTAX_BUILTIN},{"uint32", SYNTAX_BUILTIN},{"uint64", SYNTAX_BUILTIN},{"uint8", SYNTAX_BUILTIN},{"uintptr", SYNTAX_BUILTIN}}; +static Keyword const syntax_keywords_go_v[2] = {{"var", SYNTAX_KEYWORD}}; +static Keyword const *const syntax_all_keywords_go[] = { + ['a'] = syntax_keywords_go_a, ['b'] = syntax_keywords_go_b, ['c'] = syntax_keywords_go_c, ['d'] = syntax_keywords_go_d, ['e'] = syntax_keywords_go_e, ['f'] = syntax_keywords_go_f, ['g'] = syntax_keywords_go_g, ['i'] = syntax_keywords_go_i, ['l'] = syntax_keywords_go_l, ['m'] = syntax_keywords_go_m, ['n'] = syntax_keywords_go_n, ['p'] = syntax_keywords_go_p, ['r'] = syntax_keywords_go_r, ['s'] = syntax_keywords_go_s, ['t'] = syntax_keywords_go_t, ['u'] = syntax_keywords_go_u, ['v'] = syntax_keywords_go_v +}; + static Keyword const syntax_keywords_java_a[3] = {{"abstract", SYNTAX_KEYWORD},{"assert", SYNTAX_KEYWORD}}; static Keyword const syntax_keywords_java_b[4] = {{"boolean", SYNTAX_KEYWORD},{"break", SYNTAX_KEYWORD},{"byte", SYNTAX_KEYWORD}}; static Keyword const syntax_keywords_java_c[7] = {{"case", SYNTAX_KEYWORD},{"catch", SYNTAX_KEYWORD},{"char", SYNTAX_KEYWORD},{"class", SYNTAX_KEYWORD},{"const", SYNTAX_KEYWORD},{"continue", SYNTAX_KEYWORD}}; diff --git a/keywords.py b/keywords.py index b7f7432..1e345d9 100755 --- a/keywords.py +++ b/keywords.py @@ -300,6 +300,28 @@ constants_java = [ 'true', 'false', 'null' ] +keywords_go = [ + 'break', 'default', 'func', 'interface', 'select', + 'case', 'defer', 'go', 'map', 'struct', + 'chan', 'else', 'goto', 'package', 'switch', + 'const', 'fallthrough', 'if', 'range', 'type', + 'continue', 'for', 'import', 'return', 'var' +] + +constants_go = [ + 'true', 'false', 'iota', 'nil' +] + +builtins_go = [ + 'new', 'make', 'cap', 'len', 'close', 'append', 'copy', 'delete', + 'complex', 'real', 'imag', 'panic', 'recover', 'print', 'println', + 'bool', 'uint8', 'uint16', 'uint32', 'uint64', + 'int8', 'int16', 'int32', 'int64', + 'float32', 'float64', 'complex64', 'complex128', + 'byte', 'rune', 'uint', 'int', 'uintptr', 'string', 'error', + 'comparable' +] + file = open('keywords.h', 'w') file.write('''// keywords for all languages ted supports // This file was auto-generated by keywords.py @@ -325,6 +347,8 @@ output_keywords(file, cpp_things, 'cpp') output_keywords(file, label(keywords_rust, SYNTAX_KEYWORD) + label(builtins_rust, SYNTAX_BUILTIN) + label(constants_rust, SYNTAX_CONSTANT), 'rust') output_keywords(file, label(keywords_javascript, SYNTAX_KEYWORD) + label(builtins_javascript, SYNTAX_BUILTIN) + label(constants_javascript, SYNTAX_CONSTANT), 'javascript') +output_keywords(file, label(keywords_go, SYNTAX_KEYWORD) + label(builtins_go, SYNTAX_BUILTIN) + + label(constants_go, SYNTAX_CONSTANT), 'go') output_keywords(file, label(keywords_java, SYNTAX_KEYWORD) + label(constants_java, SYNTAX_CONSTANT), 'java') output_keywords(file, label(keywords_python, SYNTAX_KEYWORD) + label(builtins_python, SYNTAX_BUILTIN), 'python') output_keywords(file, label(builtins_html, SYNTAX_BUILTIN), 'html') diff --git a/menu.c b/menu.c index 6977552..dfb3db9 100644 --- a/menu.c +++ b/menu.c @@ -244,7 +244,9 @@ static void menu_update(Ted *ted) { char *end; long line_number = strtol(contents, &end, 0); TextBuffer *buffer = ted->prev_active_buffer; - if (line_number > 0 && *end == '\0' && line_number <= (long)buffer->nlines) { + if (*contents != '\0' && *end == '\0') { + if (line_number < 1) line_number = 1; + if (line_number > buffer->nlines) line_number = (long)buffer->nlines; BufferPos pos = {(u32)line_number - 1, 0}; if (line_buffer->line_buffer_submitted) { diff --git a/syntax.c b/syntax.c index 99aae45..e26560d 100644 --- a/syntax.c +++ b/syntax.c @@ -1169,7 +1169,7 @@ static void syntax_highlight_javascript(SyntaxState *state, char32_t const *line static void syntax_highlight_java(SyntaxState *state_ptr, char32_t const *line, u32 line_len, SyntaxCharType *char_types) { SyntaxState state = *state_ptr; bool in_string = false; - bool in_multiline_comment = (state & SYNTAX_STATE_CPP_MULTI_LINE_COMMENT) != 0; + bool in_multiline_comment = (state & SYNTAX_STATE_JAVA_MULTILINE_COMMENT) != 0; bool in_char = false; bool in_number = false; @@ -1291,6 +1291,148 @@ static void syntax_highlight_java(SyntaxState *state_ptr, char32_t const *line, ); } + +static void syntax_highlight_go(SyntaxState *state_ptr, char32_t const *line, u32 line_len, SyntaxCharType *char_types) { + SyntaxState state = *state_ptr; + bool string_is_raw = (state & SYNTAX_STATE_GO_RAW_STRING) != 0; + bool in_string = string_is_raw; + bool in_multiline_comment = (state & SYNTAX_STATE_GO_MULTILINE_COMMENT) != 0; + bool in_char = false; + bool in_number = false; + + int backslashes = 0; + for (u32 i = 0; i < line_len; ++i) { + + // are there 1/2 characters left in the line? + bool has_1_char = i + 1 < line_len; + + bool dealt_with = false; + + char32_t c = line[i]; + + switch (c) { + case '\\': + ++backslashes; + break; + case '/': + if (!in_multiline_comment && !in_string && !in_char && has_1_char) { + if (line[i + 1] == '/') { + if (char_types) memset(&char_types[i], SYNTAX_COMMENT, line_len - i); + i = line_len - 1; + dealt_with = true; + } else if (line[i + 1] == '*') { + in_multiline_comment = true; // /* + } + } else if (in_multiline_comment) { + if (i > 0 && line[i - 1] == '*') { + // */ + in_multiline_comment = false; + if (char_types) { + dealt_with = true; + char_types[i] = SYNTAX_COMMENT; + } + } + } + break; + case '"': + if (in_string && !string_is_raw && backslashes % 2 == 0) { + in_string = false; + if (char_types) { + dealt_with = true; + char_types[i] = SYNTAX_STRING; + } + } else if (!in_multiline_comment && !in_char) { + in_string = true; + string_is_raw = false; + } + break; + case '`': + if (in_string && string_is_raw) { + // end of raw string + in_string = false; + string_is_raw = false; + if (char_types) char_types[i] = SYNTAX_STRING; + dealt_with = true; + } else if (!in_string && !in_multiline_comment && !in_char) { + // start of raw string + in_string = true; + string_is_raw = true; + } + break; + case '\'': + if (in_char && backslashes % 2 == 0) { + in_char = false; + if (char_types) { + dealt_with = true; + char_types[i] = SYNTAX_CHARACTER; + } + } else if (!in_multiline_comment && !in_string) { + in_char = true; + } + break; + case ANY_DIGIT: + // a number! + if (char_types && !in_multiline_comment && !in_string && !in_number && !in_char) { + in_number = true; + if (i) { + if (line[i - 1] == '.') { + // support .6, for example + char_types[i - 1] = SYNTAX_CONSTANT; + } else if (is32_ident(line[i - 1])) { + // actually, this isn't a number. it's something like a*6* or u3*2*. + in_number = false; + } + } + } + break; + default: { + if ((i && is32_ident(line[i - 1])) || !is32_ident(c)) + break; // can't be a keyword on its own. + + // keywords don't matter for advancing the state + if (char_types && !in_multiline_comment && !in_number && !in_string && !in_char) { + u32 keyword_len = syntax_keyword_len(LANG_GO, line, i, line_len); + Keyword const *keyword = syntax_keyword_lookup(syntax_all_keywords_go, arr_count(syntax_all_keywords_go), + &line[i], keyword_len); + + + if (keyword) { + SyntaxCharType type = keyword->type; + for (size_t j = 0; j < keyword_len; ++j) { + char_types[i++] = type; + } + --i; // we'll increment i from the for loop + dealt_with = true; + break; + } + } + } break; + } + if (c != '\\') backslashes = 0; + if (in_number && !syntax_number_continues(line, line_len, i)) { + in_number = false; + } + + if (char_types && !dealt_with) { + SyntaxCharType type = SYNTAX_NORMAL; + if (in_multiline_comment) + type = SYNTAX_COMMENT; + else if (in_string) + type = SYNTAX_STRING; + else if (in_char) + type = SYNTAX_CHARACTER; + else if (in_number) + type = SYNTAX_CONSTANT; + + char_types[i] = type; + } + } + *state_ptr = (SyntaxState)( + (in_multiline_comment * SYNTAX_STATE_GO_MULTILINE_COMMENT) + | ((in_string && string_is_raw) * SYNTAX_STATE_GO_RAW_STRING) + ); +} + // This is the main syntax highlighting function. It will determine which colors to use for each character. // Rather than returning colors, it returns a character type (e.g. comment) which can be converted to a color. // To highlight multiple lines, start out with a zeroed SyntaxState, and pass a pointer to it each time. @@ -1331,6 +1473,9 @@ void syntax_highlight(SyntaxState *state, Language lang, char32_t const *line, u case LANG_JAVA: syntax_highlight_java(state, line, line_len, char_types); break; + case LANG_GO: + syntax_highlight_go(state, line, line_len, char_types); + break; case LANG_COUNT: assert(0); break; } } diff --git a/ted.cfg b/ted.cfg index 27b33e5..fa37904 100644 --- a/ted.cfg +++ b/ted.cfg @@ -229,3 +229,4 @@ HTML = .html, .php, .xml, .xhtml Config = .cfg Javascript = .js Java = .java +Go = .go diff --git a/ted.h b/ted.h index 2400aa0..01ad115 100644 --- a/ted.h +++ b/ted.h @@ -50,6 +50,11 @@ enum { SYNTAX_STATE_JAVA_MULTILINE_COMMENT = 0x01u }; +enum { + SYNTAX_STATE_GO_RAW_STRING = 0x01u, // backtick-enclosed string + SYNTAX_STATE_GO_MULTILINE_COMMENT = 0x02u +}; + typedef u8 SyntaxState; // If you are adding new languages, DO NOT change the constant values @@ -66,6 +71,7 @@ ENUM_U16 { LANG_CONFIG = 8, // .cfg files, e.g. ted.cfg LANG_JAVASCRIPT = 9, LANG_JAVA = 10, + LANG_GO = 11, LANG_COUNT } ENUM_U16_END(Language); @@ -86,6 +92,7 @@ static LanguageName const language_names[] = { {LANG_CONFIG, "Config"}, {LANG_JAVASCRIPT, "Javascript"}, {LANG_JAVA, "Java"}, + {LANG_GO, "Go"}, }; static_assert_if_possible(arr_count(language_names) == LANG_COUNT) diff --git a/test.go b/test.go new file mode 100644 index 0000000..98eea24 --- /dev/null +++ b/test.go @@ -0,0 +1,18 @@ +package main + +import "fmt" + +/* +what +a +wonderful +day +*///yes + +func main() { + var x []int = make([]int, 10) + fmt.Println(x != nil) + println(`hello + world\`) + println("yes\"\\") +} -- cgit v1.2.3