summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLeo Tenenbaum <pommicket@gmail.com>2021-04-19 10:07:07 -0400
committerLeo Tenenbaum <pommicket@gmail.com>2021-04-19 10:07:07 -0400
commit61cd687d858b7a3811a168393f6b698c24cfff17 (patch)
treed4046d72fd49543238b64d1f411419e33db197cd
parentd1f3f28288dc35e515b5fe947e952e7e22c90fab (diff)
tex syntax highlighting
-rw-r--r--README.md2
-rw-r--r--string32.c42
-rw-r--r--syntax.c110
-rw-r--r--ted.cfg1
-rw-r--r--ted.h12
5 files changed, 164 insertions, 3 deletions
diff --git a/README.md b/README.md
index 280c1f3..2ef6bea 100644
--- a/README.md
+++ b/README.md
@@ -23,7 +23,7 @@ in other editors.
- Multiple tabs, each with a different file
- Split screen (default: Ctrl+\\, Ctrl+Shift+\\)
- Auto-indent
-- Syntax highlighting for C, C++, Rust, and Python.
+- Syntax highlighting for C, C++, Rust, Python, and LaTeX.
- Find and replace (with regular expressions!)
- Run build command (F4), go to errors
- Run any shell command (Ctrl+!)
diff --git a/string32.c b/string32.c
index 09874bc..9b88f0e 100644
--- a/string32.c
+++ b/string32.c
@@ -76,6 +76,43 @@ static char *str32_to_utf8_cstr(String32 s) {
return utf8;
}
+// compare s to the ASCII string `ascii`
+static int str32_cmp_ascii(String32 s, char const *ascii) {
+ for (size_t i = 0; i < s.len; ++i) {
+ assert((char32_t)ascii[i] < 128);
+ if ((char32_t)ascii[i] == '\0')
+ return -1; // ascii is a prefix of s
+ if (s.str[i] > (char32_t)ascii[i])
+ return +1;
+ if (s.str[i] < (char32_t)ascii[i])
+ return -1;
+ }
+ if (ascii[s.len]) {
+ // s is a prefix of ascii
+ return +1;
+ }
+ return 0;
+}
+
+// check if s starts with the ASCII string `ascii`
+static int str32_has_ascii_prefix(String32 s, char const *ascii) {
+ for (size_t i = 0; i < s.len; ++i) {
+ assert((char32_t)ascii[i] < 128);
+ if ((char32_t)ascii[i] == '\0')
+ return true; // ascii is a prefix of s
+ if (s.str[i] > (char32_t)ascii[i])
+ return false;
+ if (s.str[i] < (char32_t)ascii[i])
+ return false;
+ }
+ if (ascii[s.len]) {
+ // s is a prefix of ascii
+ return false;
+ }
+ // s is the same as ascii
+ return true;
+}
+
// returns the index of the given character in the string, or the length of the string if it's not found.
size_t str32chr(String32 s, char32_t c) {
for (size_t i = 0; i < s.len; ++i) {
@@ -127,8 +164,11 @@ bool is32_digit(char32_t c) {
return c <= WINT_MAX && iswdigit((wint_t)c);
}
+bool is32_graph(char32_t c) {
+ return c <= WINT_MAX && iswgraph((wint_t)c);
+}
+
// could this character appear in a C-style identifier?
bool is32_ident(char32_t c) {
return c <= WINT_MAX && (iswalnum((wint_t)c) || c == '_');
}
-
diff --git a/syntax.c b/syntax.c
index b8efa93..fdb2d6a 100644
--- a/syntax.c
+++ b/syntax.c
@@ -13,13 +13,14 @@ Language language_from_str(char const *str) {
return LANG_NONE;
}
-// start of single line comment for language l
+// start of single line comment for language l -- used for comment/uncomment selection
char const *language_comment_start(Language l) {
switch (l) {
case LANG_C: return "/* ";
case LANG_RUST:
case LANG_CPP: return "// ";
case LANG_PYTHON: return "# ";
+ case LANG_TEX: return "% ";
case LANG_NONE:
case LANG_COUNT:
break;
@@ -552,6 +553,110 @@ static void syntax_highlight_python(SyntaxState *state, char32_t *line, u32 line
}
}
+static bool is_tex_ident(char32_t c) {
+ // digits cannot appear in tex identifiers
+ return is32_ident(c) && !is32_digit(c);
+}
+
+static void syntax_highlight_tex(SyntaxState *state, char32_t *line, u32 line_len, SyntaxCharType *char_types) {
+ bool dollar = (*state & SYNTAX_STATE_TEX_DOLLAR) != 0;
+ bool dollardollar = (*state & SYNTAX_STATE_TEX_DOLLARDOLLAR) != 0;
+ bool verbatim = (*state & SYNTAX_STATE_TEX_VERBATIM) != 0;
+
+ for (u32 i = 0; i < line_len; ++i) {
+ char32_t c = line[i];
+ bool has_1_char = i + 1 < line_len;
+
+ if (char_types)
+ char_types[i] = dollar || dollardollar ? SYNTAX_MATH : SYNTAX_NORMAL;
+ switch (c) {
+ case '\\':
+ if (has_1_char) {
+ if (is32_graph(line[i+1])) {
+ if (is_tex_ident(line[i+1])) {
+ // command, e.g. \begin
+ String32 command_str = {
+ .str = line + i+1,
+ .len = line_len - (i+1),
+ };
+ bool new_verbatim = false;
+ if (!dollar && !dollardollar) {
+ if (!verbatim && str32_has_ascii_prefix(command_str, "begin{verbatim}")) {
+ new_verbatim = true;
+ } else if (verbatim && str32_has_ascii_prefix(command_str, "end{verbatim}")) {
+ verbatim = false;
+ }
+ }
+
+ if (!verbatim) {
+ if (char_types) char_types[i] = SYNTAX_KEYWORD;
+ for (++i; i < line_len; ++i) {
+ if (is_tex_ident(line[i])) {
+ if (char_types) char_types[i] = SYNTAX_KEYWORD;
+ } else {
+ --i;
+ break;
+ }
+ }
+ verbatim = new_verbatim;
+ }
+ } else if (!verbatim) {
+ // something like \\, \%, etc.
+ if (char_types) char_types[i] = SYNTAX_KEYWORD;
+ ++i;
+ if (char_types) char_types[i] = SYNTAX_KEYWORD;
+ }
+ }
+ }
+ break;
+ case '%':
+ // comment
+ if (!verbatim) {
+ for (; i < line_len; ++i) {
+ if (char_types)
+ char_types[i] = SYNTAX_COMMENT;
+ }
+ }
+ break;
+ case '&':
+ // table/matrix/etc. separator
+ if (char_types && !verbatim)
+ char_types[i] = SYNTAX_BUILTIN;
+ break;
+ case '$':
+ if (!verbatim) {
+ if (!dollar && has_1_char && line[i+1] == '$') {
+ // $$
+ if (dollardollar) {
+ if (char_types) char_types[i] = SYNTAX_MATH;
+ ++i;
+ if (char_types) char_types[i] = SYNTAX_MATH;
+ dollardollar = false;
+ } else {
+ if (char_types) char_types[i] = SYNTAX_MATH;
+ dollardollar = true;
+ }
+ } else if (!dollardollar) {
+ // single $
+ if (dollar) {
+ dollar = false;
+ } else {
+ dollar = true;
+ if (char_types) char_types[i] = SYNTAX_MATH;
+ }
+ }
+ }
+ break;
+ }
+ }
+
+ *state = (SyntaxState)(
+ (dollar * SYNTAX_STATE_TEX_DOLLAR)
+ | (dollardollar * SYNTAX_STATE_TEX_DOLLARDOLLAR)
+ | (verbatim * SYNTAX_STATE_TEX_VERBATIM)
+ );
+}
+
// This is the main syntax highlighting function. It will determine which colors to use for each character.
// Rather than returning colors, it returns a character type (e.g. comment) which can be converted to a color.
// To highlight multiple lines, start out with a zeroed SyntaxState, and pass a pointer to it each time.
@@ -574,6 +679,9 @@ void syntax_highlight(SyntaxState *state, Language lang, char32_t *line, u32 lin
case LANG_PYTHON:
syntax_highlight_python(state, line, line_len, char_types);
break;
+ case LANG_TEX:
+ syntax_highlight_tex(state, line, line_len, char_types);
+ break;
case LANG_COUNT: assert(0); break;
}
}
diff --git a/ted.cfg b/ted.cfg
index 8d6b251..1e01cc3 100644
--- a/ted.cfg
+++ b/ted.cfg
@@ -207,3 +207,4 @@ C = .c, .h
C++ = .cpp, .hpp, .C, .H, .cxx, .hxx, .cc, .hh
Rust = .rs
Python = .py
+Tex = .tex
diff --git a/ted.h b/ted.h
index 5410398..66648f5 100644
--- a/ted.h
+++ b/ted.h
@@ -27,6 +27,12 @@ enum {
SYNTAX_STATE_PYTHON_STRING_DBL_QUOTED = 0x02u, // is this a """ string, as opposed to a ''' string?
};
+enum {
+ SYNTAX_STATE_TEX_DOLLAR = 0x01u, // inside math $ ... $
+ SYNTAX_STATE_TEX_DOLLARDOLLAR = 0x02u, // inside math $$ ... $$
+ SYNTAX_STATE_TEX_VERBATIM = 0x04u, // inside \begin{verbatim} ... \end{verbatim}
+};
+
typedef u8 SyntaxState;
ENUM_U16 {
@@ -35,6 +41,7 @@ ENUM_U16 {
LANG_CPP,
LANG_RUST,
LANG_PYTHON,
+ LANG_TEX,
LANG_COUNT
} ENUM_U16_END(Language);
@@ -49,8 +56,11 @@ static LanguageName const language_names[] = {
{LANG_CPP, "C++"},
{LANG_RUST, "Rust"},
{LANG_PYTHON, "Python"},
+ {LANG_TEX, "Tex"},
};
+static_assert_if_possible(arr_count(language_names) == LANG_COUNT)
+
ENUM_U8 {
SYNTAX_NORMAL,
SYNTAX_KEYWORD,
@@ -62,6 +72,8 @@ ENUM_U8 {
SYNTAX_CONSTANT,
} ENUM_U8_END(SyntaxCharType);
+#define SYNTAX_MATH SYNTAX_STRING // for tex
+
typedef struct {
float cursor_blink_time_on, cursor_blink_time_off;
u32 colors[COLOR_COUNT];