summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLeo Tenenbaum <pommicket@gmail.com>2021-02-02 14:36:55 -0500
committerLeo Tenenbaum <pommicket@gmail.com>2021-02-02 14:36:55 -0500
commite8b7d01ff68fed675d1397f4556f159f0c32246d (patch)
treeb8ae0532b5801b63f218ca3135997e911e3e2d9c
parent4ff4d669ccb658b8b48785d37946378a5b29688c (diff)
rust syntax highlighting
-rw-r--r--keywords.h24
-rw-r--r--keywords.py18
-rw-r--r--string32.c2
-rw-r--r--syntax.c250
-rw-r--r--ted.cfg3
-rw-r--r--ted.h3
-rw-r--r--test.rs8
7 files changed, 270 insertions, 38 deletions
diff --git a/keywords.h b/keywords.h
index c563316..4f4b1e6 100644
--- a/keywords.h
+++ b/keywords.h
@@ -62,3 +62,27 @@ static char const *const *const syntax_all_keywords_cpp[] = {
['a'] = syntax_keywords_cpp_a, ['b'] = syntax_keywords_cpp_b, ['c'] = syntax_keywords_cpp_c, ['d'] = syntax_keywords_cpp_d, ['e'] = syntax_keywords_cpp_e, ['f'] = syntax_keywords_cpp_f, ['m'] = syntax_keywords_cpp_m, ['n'] = syntax_keywords_cpp_n, ['o'] = syntax_keywords_cpp_o, ['p'] = syntax_keywords_cpp_p, ['r'] = syntax_keywords_cpp_r, ['s'] = syntax_keywords_cpp_s, ['t'] = syntax_keywords_cpp_t, ['u'] = syntax_keywords_cpp_u, ['v'] = syntax_keywords_cpp_v, ['x'] = syntax_keywords_cpp_x
};
+static char const *const syntax_keywords_rust_S[2] = {"Self"};
+static char const *const syntax_keywords_rust_a[9] = {"abstract","as","asm!","assert!","assert_eq!","assert_ne!","async","await"};
+static char const *const syntax_keywords_rust_b[4] = {"become","box","break"};
+static char const *const syntax_keywords_rust_c[9] = {"cfg!","column!","compile_error!","concat!","concat_idents!","const","continue","crate"};
+static char const *const syntax_keywords_rust_d[7] = {"dbg!","debug_assert!","debug_assert_eq!","debug_assert_ne!","do","dyn"};
+static char const *const syntax_keywords_rust_e[7] = {"else","enum","env!","eprint!","eprintln!","extern"};
+static char const *const syntax_keywords_rust_f[9] = {"false","file!","final","fn","for","format!","format_args!","format_args_nl!"};
+static char const *const syntax_keywords_rust_g[2] = {"global_asm!"};
+static char const *const syntax_keywords_rust_i[14] = {"if","impl","in","include!","include_bytes!","include_str!","is_aarch64_feature_detected!","is_arm_feature_detected!","is_mips64_feature_detected!","is_mips_feature_detected!","is_powerpc64_feature_detected!","is_powerpc_feature_detected!","is_x86_feature_detected!"};
+static char const *const syntax_keywords_rust_l[6] = {"let","line!","llvm_asm!","log_syntax!","loop"};
+static char const *const syntax_keywords_rust_m[8] = {"macro","match","matches!","mod","module_path!","move","mut"};
+static char const *const syntax_keywords_rust_o[3] = {"option_env!","override"};
+static char const *const syntax_keywords_rust_p[6] = {"panic!","print!","println!","priv","pub"};
+static char const *const syntax_keywords_rust_r[3] = {"ref","return"};
+static char const *const syntax_keywords_rust_s[6] = {"self","static","stringify!","struct","super"};
+static char const *const syntax_keywords_rust_t[10] = {"thread_local!","todo!","trace_macros!","trait","true","try","try!","type","typeof"};
+static char const *const syntax_keywords_rust_u[7] = {"unimplemented!","union","unreachable!","unsafe","unsized","use"};
+static char const *const syntax_keywords_rust_v[3] = {"vec!","virtual"};
+static char const *const syntax_keywords_rust_w[5] = {"where","while","write!","writeln!"};
+static char const *const syntax_keywords_rust_y[2] = {"yield"};
+static char const *const *const syntax_all_keywords_rust[] = {
+ ['S'] = syntax_keywords_rust_S, ['a'] = syntax_keywords_rust_a, ['b'] = syntax_keywords_rust_b, ['c'] = syntax_keywords_rust_c, ['d'] = syntax_keywords_rust_d, ['e'] = syntax_keywords_rust_e, ['f'] = syntax_keywords_rust_f, ['g'] = syntax_keywords_rust_g, ['i'] = syntax_keywords_rust_i, ['l'] = syntax_keywords_rust_l, ['m'] = syntax_keywords_rust_m, ['o'] = syntax_keywords_rust_o, ['p'] = syntax_keywords_rust_p, ['r'] = syntax_keywords_rust_r, ['s'] = syntax_keywords_rust_s, ['t'] = syntax_keywords_rust_t, ['u'] = syntax_keywords_rust_u, ['v'] = syntax_keywords_rust_v, ['w'] = syntax_keywords_rust_w, ['y'] = syntax_keywords_rust_y
+};
+
diff --git a/keywords.py b/keywords.py
index b8c5223..49c68bf 100644
--- a/keywords.py
+++ b/keywords.py
@@ -135,8 +135,26 @@ keywords_cpp = [
'xor', 'xor_eq',
]
assert not set(keywords_c).intersection(keywords_cpp)
+
+keywords_rust = [
+ "as", "break", "const", "continue", "crate", "else", "enum", "extern", "false", "fn", "for",
+ "if", "impl", "in", "let", "loop", "match", "mod", "move", "mut", "pub", "ref", "return",
+ "self", "Self", "static", "struct", "super", "trait", "true", "type", "unsafe", "use",
+ "where", "while", "async", "await", "dyn", "abstract", "become", "box", "do", "final",
+ "macro", "override", "priv", "typeof", "unsized", "virtual", "yield", "try", "union",
+ "asm!","concat_idents!","format_args_nl!","global_asm!","is_aarch64_feature_detected!",
+ "is_arm_feature_detected!","is_mips64_feature_detected!","is_mips_feature_detected!",
+ "is_powerpc64_feature_detected!","is_powerpc_feature_detected!","llvm_asm!","log_syntax!",
+ "trace_macros!","assert!","assert_eq!","assert_ne!","cfg!","column!","compile_error!",
+ "concat!","dbg!","debug_assert!","debug_assert_eq!","debug_assert_ne!","env!","eprint!",
+ "eprintln!","file!","format!","format_args!","include!","include_bytes!","include_str!",
+ "is_x86_feature_detected!","line!","matches!","module_path!","option_env!","panic!",
+ "print!","println!","stringify!","thread_local!","todo!","try!","unimplemented!",
+ "unreachable!","vec!","write!","writeln!",
+]
file = open('keywords.h', 'w')
output_keywords(file, keywords_c, 'c')
output_keywords(file, keywords_cpp, 'cpp')
+output_keywords(file, keywords_rust, 'rust')
file.close()
diff --git a/string32.c b/string32.c
index 387d15c..a6f446a 100644
--- a/string32.c
+++ b/string32.c
@@ -130,4 +130,4 @@ bool is32_digit(char32_t c) {
// could this character appear in a C-style identifier?
bool is32_ident(char32_t c) {
return c <= WINT_MAX && (iswalnum((wint_t)c) || c == '_');
-}
+} \ No newline at end of file
diff --git a/syntax.c b/syntax.c
index 4f3864c..d75412c 100644
--- a/syntax.c
+++ b/syntax.c
@@ -1,5 +1,9 @@
#include "keywords.h"
+// all characters that can appear in a number
+#define SYNTAX_DIGITS "0123456789.xXoObBlLuUiIabcdefABCDEF_"
+
+
// returns the language this string is referring to, or LANG_NONE if it's invalid.
Language language_from_str(char const *str) {
for (int i = 0; i < LANG_COUNT; ++i) {
@@ -40,6 +44,25 @@ static inline bool keyword_matches(char32_t *text, size_t len, char const *keywo
}
}
+// does i continue the number literal from i-1
+static inline bool syntax_number_continues(char32_t *line, u32 line_len, u32 i) {
+ if (line[i] == '.' && ((i && line[i-1] == '.') || (i < line_len-1 && line[i+1] == '.')))
+ return false; // can't have two .s in a row
+ return line[i] < CHAR_MAX && strchr(SYNTAX_DIGITS, (char)line[i])
+ || (i && line[i-1] == 'e' && (line[i] == '+' || line[i] == '-'));
+}
+
+// find how long this keyword would be (if this is a keyword)
+static inline u32 syntax_keyword_len(Language lang, char32_t *line, u32 i, u32 line_len) {
+ u32 keyword_end;
+ for (keyword_end = i;
+ keyword_end < line_len
+ && (is32_ident(line[keyword_end])
+ || (lang == LANG_RUST && line[keyword_end] == '!')) // for rust builtin macros
+ ; ++keyword_end);
+ return keyword_end - i;
+}
+
static void syntax_highlight_c_cpp(SyntaxState *state_ptr, bool cpp, char32_t *line, u32 line_len, SyntaxCharType *char_types) {
SyntaxState state = *state_ptr;
bool in_preprocessor = (state & SYNTAX_STATE_CPP_PREPROCESSOR) != 0;
@@ -53,10 +76,6 @@ static void syntax_highlight_c_cpp(SyntaxState *state_ptr, bool cpp, char32_t *l
int backslashes = 0;
for (u32 i = 0; i < line_len; ++i) {
- // necessary for the final " of a string to be highlighted
- bool in_string_now = in_string;
- bool in_char_now = in_char;
- bool in_multi_line_comment_now = in_multi_line_comment;
// are there 1/2 characters left in the line?
bool has_1_char = i + 1 < line_len;
@@ -88,25 +107,39 @@ static void syntax_highlight_c_cpp(SyntaxState *state_ptr, bool cpp, char32_t *l
if (line[i + 1] == '/')
in_single_line_comment = true; // //
else if (line[i + 1] == '*')
- in_multi_line_comment = in_multi_line_comment_now = true; // /*
+ in_multi_line_comment = true; // /*
} else if (in_multi_line_comment) {
if (i && line[i - 1] == '*') {
// */
in_multi_line_comment = false;
+ if (char_types) {
+ dealt_with = true;
+ char_types[i] = SYNTAX_COMMENT;
+ }
}
}
break;
case '"':
- if (in_string && backslashes % 2 == 0)
+ if (in_string && backslashes % 2 == 0) {
in_string = false;
- else if (!in_multi_line_comment && !in_single_line_comment && !in_char)
- in_string = in_string_now = true;
+ if (char_types) {
+ dealt_with = true;
+ char_types[i] = SYNTAX_STRING;
+ }
+ } else if (!in_multi_line_comment && !in_single_line_comment && !in_char) {
+ in_string = true;
+ }
break;
case '\'':
- if (in_char && backslashes % 2 == 0)
+ if (in_char && backslashes % 2 == 0) {
in_char = false;
- else if (!in_multi_line_comment && !in_single_line_comment && !in_string)
- in_char = in_char_now = true;
+ if (char_types) {
+ dealt_with = true;
+ char_types[i] = SYNTAX_CHARACTER;
+ }
+ } else if (!in_multi_line_comment && !in_single_line_comment && !in_string) {
+ in_char = true;
+ }
break;
case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': // don't you wish C had case ranges...
// a number!
@@ -127,7 +160,7 @@ static void syntax_highlight_c_cpp(SyntaxState *state_ptr, bool cpp, char32_t *l
if ((i && is32_ident(line[i - 1])) || !is32_ident(c))
break; // can't be a keyword on its own.
- if (c == 'R' && has_2_chars && line[i + 1] == '"' && line[i + 2] == '(') {
+ if (!in_single_line_comment && !in_multi_line_comment && !in_string && c == 'R' && has_2_chars && line[i + 1] == '"' && line[i + 2] == '(') {
// raw string
in_raw_string = true;
raw_string_ending = false;
@@ -136,24 +169,28 @@ static void syntax_highlight_c_cpp(SyntaxState *state_ptr, bool cpp, char32_t *l
// keywords don't matter for advancing the state
if (char_types && !in_single_line_comment && !in_multi_line_comment && !in_string && !in_preprocessor && !in_char) {
- u32 keyword_end;
- // find where this keyword would end (if this is a keyword)
- for (keyword_end = i; keyword_end < line_len && is32_ident(line[keyword_end]); ++keyword_end);
-
- u32 keyword_len = keyword_end - i;
+ u32 keyword_len = syntax_keyword_len(cpp ? LANG_CPP : LANG_C, line, i, line_len);
char const *const *keywords = c < arr_count(syntax_all_keywords_c) ? syntax_all_keywords_c[c] : NULL;
char const *keyword = NULL;
- if (keywords)
- for (size_t k = 0; keywords[k]; ++k)
- if (keyword_matches(&line[i], keyword_len, keywords[k]))
+ if (keywords) {
+ for (size_t k = 0; keywords[k]; ++k) {
+ if (keyword_matches(&line[i], keyword_len, keywords[k])) {
keyword = keywords[k];
+ break;
+ }
+ }
+ }
if (cpp && !keyword) {
// check C++'s keywords too!
keywords = c < arr_count(syntax_all_keywords_cpp) ? syntax_all_keywords_cpp[c] : NULL;
- if (keywords)
- for (size_t k = 0; keywords[k]; ++k)
- if (keyword_matches(&line[i], keyword_len, keywords[k]))
+ if (keywords) {
+ for (size_t k = 0; keywords[k]; ++k) {
+ if (keyword_matches(&line[i], keyword_len, keywords[k])) {
keyword = keywords[k];
+ break;
+ }
+ }
+ }
}
if (keyword) {
@@ -177,20 +214,18 @@ static void syntax_highlight_c_cpp(SyntaxState *state_ptr, bool cpp, char32_t *l
}
} break;
}
- if (line[i] != '\\') backslashes = 0;
- if (in_number && !(is32_digit(line[i]) || line[i] == '.'
- || (line[i] < CHAR_MAX && strchr("xXoObBlLuUabcdefABCDEF", (char)line[i]))
- || (i && line[i-1] == 'e' && (line[i] == '+' || line[i] == '-')))) {
+ if (c != '\\') backslashes = 0;
+ if (in_number && !syntax_number_continues(line, line_len, i)) {
in_number = false;
}
if (char_types && !dealt_with) {
SyntaxCharType type = SYNTAX_NORMAL;
- if (in_single_line_comment || in_multi_line_comment_now)
+ if (in_single_line_comment || in_multi_line_comment)
type = SYNTAX_COMMENT;
- else if (in_string_now)
+ else if (in_string)
type = SYNTAX_STRING;
- else if (in_char_now)
+ else if (in_char)
type = SYNTAX_CHARACTER;
else if (in_number)
type = SYNTAX_CONSTANT;
@@ -210,24 +245,166 @@ static void syntax_highlight_c_cpp(SyntaxState *state_ptr, bool cpp, char32_t *l
}
static void syntax_highlight_rust(SyntaxState *state, char32_t *line, u32 line_len, SyntaxCharType *char_types) {
- u8 comment_depth = (u8)((*state & SYNTAX_STATE_RUST_COMMENT_DEPTH_MASK) / SYNTAX_STATE_RUST_COMMENT_DEPTH_MUL);
+ u32 comment_depth = (((u32)*state & SYNTAX_STATE_RUST_COMMENT_DEPTH_MASK) / SYNTAX_STATE_RUST_COMMENT_DEPTH_MUL);
+ bool in_string = (*state & SYNTAX_STATE_RUST_STRING) != 0;
+ bool string_is_raw = (*state & SYNTAX_STATE_RUST_STRING_IS_RAW) != 0;
+ bool in_number = false;
+ uint backslashes = 0;
+
for (u32 i = 0; i < line_len; ++i) {
char32_t c = line[i];
bool dealt_with = false;
- switch (c) {
+ bool has_1_char = i + 1 < line_len;
+ bool has_2_chars = i + 2 < line_len;
+ switch (c) {
+ case '/':
+ if (!in_string) {
+ if (i && line[i-1] == '*') {
+ // */
+ if (comment_depth)
+ --comment_depth;
+ if (char_types) {
+ char_types[i] = SYNTAX_COMMENT;
+ dealt_with = true;
+ }
+ } else if (has_1_char && line[i+1] == '*') {
+ // /*
+ ++comment_depth;
+ } else if (!comment_depth && has_1_char && line[i+1] == '/') {
+ // //
+ // just handle it all now
+ if (char_types) {
+ for (u32 j = i; j < line_len; ++j)
+ char_types[j] = SYNTAX_COMMENT;
+ }
+ i = line_len - 1;
+ dealt_with = true;
+ break;
+ }
+ }
+ break;
+ case '"':
+ if (!comment_depth) {
+ if (in_string) {
+ if (backslashes % 2 == 0) {
+ if (!string_is_raw || (has_1_char && line[i+1] == '#')) {
+ // end of string literal
+ in_string = false;
+ if (char_types) {
+ char_types[i] = SYNTAX_STRING;
+ dealt_with = true;
+ }
+ string_is_raw = false;
+ }
+ }
+ } else {
+ // start of string literal
+ in_string = true;
+ if (i && line[i-1] == '#')
+ string_is_raw = true;
+ }
+ }
+ break;
+ case '\'': {
+ if (!comment_depth && !in_string && has_2_chars) {
+ // figure out if this is a character or a lifetime
+ u32 char_end;
+ backslashes = line[i+1] == '\\';
+ for (char_end = i + 2; char_end < line_len; ++char_end) {
+ if (line[char_end] == '\'' && backslashes % 2 == 0) {
+ break;
+ }
+ if (line[char_end] < CHAR_MAX
+ && line[char_end - 1] != '\\'
+ && !strchr("abcdefABCDEF0123456789", (char)line[char_end]))
+ break;
+ }
+ if (char_end < line_len && line[char_end] == '\'') {
+ // a character literal
+ if (char_types)
+ for (u32 j = i; j <= char_end; ++j)
+ char_types[j] = SYNTAX_CHARACTER;
+ dealt_with = true;
+ i = char_end;
+ } else {
+ // a lifetime or something else
+ }
+ }
+ } break;
+ case '\\':
+ ++backslashes;
+ break;
+ case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': // don't you wish C had case ranges...
+ // a number!
+ if (char_types && !comment_depth && !in_string && !in_number) {
+ in_number = true;
+ if (i && (is32_ident(line[i - 1])
+ || (line[i-1] == '.' && !(i >= 2 && line[i-2] == '.')))
+ ) {
+ // actually, this isn't a number. it's something like a*6* or u3*2*.
+ // also, don't highlight the 0 in tuple.0
+ in_number = false;
+ }
+ }
+ break;
+ default: {
+ if ((i && is32_ident(line[i - 1])) || !is32_ident(c))
+ break; // can't be a keyword on its own.
+
+ if (char_types && !in_string && !comment_depth && !in_number) {
+ u32 keyword_len = syntax_keyword_len(LANG_RUST, line, i, line_len);
+ char const *keyword = NULL;
+ char const *const *keywords = c < arr_count(syntax_all_keywords_rust) ? syntax_all_keywords_rust[c] : NULL;
+ if (keywords) {
+ for (size_t k = 0; keywords[k]; ++k) {
+ if (keyword_matches(&line[i], keyword_len, keywords[k])) {
+ keyword = keywords[k];
+ break;
+ }
+ }
+ if (keyword) {
+ SyntaxCharType type = SYNTAX_KEYWORD;
+ if ((keyword_len == 4 && streq(keyword, "true"))
+ || (keyword_len == 5 && streq(keyword, "false"))
+ ) {
+ type = SYNTAX_CONSTANT; // these are constants, not keywords
+ }
+ for (size_t j = 0; keyword[j]; ++j) {
+ char_types[i++] = type;
+ }
+ --i; // we'll increment i from the for loop
+ dealt_with = true;
+ }
+ }
+ }
+ } break;
}
+ if (c != '\\') backslashes = 0;
+ if (in_number && !syntax_number_continues(line, line_len, i))
+ in_number = false;
+
if (char_types && !dealt_with) {
SyntaxCharType type = SYNTAX_NORMAL;
+ if (comment_depth) {
+ type = SYNTAX_COMMENT;
+ } else if (in_string) {
+ type = SYNTAX_STRING;
+ } else if (in_number) {
+ type = SYNTAX_CONSTANT;
+ }
char_types[i] = type;
}
+
}
- uint max_comment_depth = (1u<<SYNTAX_STATE_RUST_COMMENT_DEPTH_BITS);
+ u32 max_comment_depth = ((u32)1<<SYNTAX_STATE_RUST_COMMENT_DEPTH_BITS);
if (comment_depth >= max_comment_depth)
- comment_depth = (u8)max_comment_depth;
+ comment_depth = max_comment_depth;
*state = (SyntaxState)(
- (comment_depth * SYNTAX_STATE_RUST_COMMENT_DEPTH_MUL)
+ (comment_depth * SYNTAX_STATE_RUST_COMMENT_DEPTH_MUL)
+ | (in_string * SYNTAX_STATE_RUST_STRING)
+ | (string_is_raw * SYNTAX_STATE_RUST_STRING_IS_RAW)
);
}
@@ -247,6 +424,9 @@ void syntax_highlight(SyntaxState *state, Language lang, char32_t *line, u32 lin
case LANG_CPP:
syntax_highlight_c_cpp(state, true, line, line_len, char_types);
break;
+ case LANG_RUST:
+ syntax_highlight_rust(state, line, line_len, char_types);
+ break;
case LANG_COUNT: assert(0); break;
}
}
diff --git a/ted.cfg b/ted.cfg
index 6ca9562..5158517 100644
--- a/ted.cfg
+++ b/ted.cfg
@@ -141,4 +141,5 @@ constant = #8ff
[extensions]
C = .c, .h
-C++ = .cpp, .hpp, .C, .H, .cxx, .hxx, .cc, .hh \ No newline at end of file
+C++ = .cpp, .hpp, .C, .H, .cxx, .hxx, .cc, .hh
+Rust = .rs \ No newline at end of file
diff --git a/ted.h b/ted.h
index 5018879..97a8c64 100644
--- a/ted.h
+++ b/ted.h
@@ -18,6 +18,7 @@ enum {
SYNTAX_STATE_RUST_COMMENT_DEPTH_MUL = 0x1u,
SYNTAX_STATE_RUST_COMMENT_DEPTH_BITS = 4, // number of bits we allocate for the comment depth.
SYNTAX_STATE_RUST_STRING = 0x10u,
+ SYNTAX_STATE_RUST_STRING_IS_RAW = 0x20u,
};
typedef u8 SyntaxState;
@@ -26,6 +27,7 @@ ENUM_U16 {
LANG_NONE,
LANG_C,
LANG_CPP,
+ LANG_RUST,
LANG_COUNT
} ENUM_U16_END(Language);
@@ -38,6 +40,7 @@ static LanguageName const language_names[] = {
{LANG_NONE, "None"},
{LANG_C, "C"},
{LANG_CPP, "C++"},
+ {LANG_RUST, "Rust"},
};
ENUM_U8 {
diff --git a/test.rs b/test.rs
index 56e8bf4..6c852d9 100644
--- a/test.rs
+++ b/test.rs
@@ -8,13 +8,19 @@ use std::io::{Result, BufRead, BufReader};
fn main() -> Result<()> {
let file = File::open("test.rs")?;
let mut reader = BufReader::new(file);
+ let mut lines = vec![];
+
loop {
let mut line = String::new();
if reader.read_line(&mut line)? == 0 {
// reached end of file
break;
}
- print!("{}", line);
+ line.pop();
+ lines.push(line);
+ }
+ for line in lines {
+ println!("{}", line);
}
print!("
string