summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLeo Tenenbaum <pommicket@gmail.com>2021-04-19 22:51:33 -0400
committerLeo Tenenbaum <pommicket@gmail.com>2021-04-19 22:53:35 -0400
commitda61efabb1b28c5500824a560c960a720a628de0 (patch)
tree618127733dd0a2923536f52b920671723e6471db
parent61cd687d858b7a3811a168393f6b698c24cfff17 (diff)
markdown highlighting
-rw-r--r--README.md4
-rw-r--r--base.h4
-rw-r--r--main.c1
-rw-r--r--string32.c19
-rw-r--r--syntax.c191
-rw-r--r--ted.cfg1
-rw-r--r--ted.h8
7 files changed, 226 insertions, 2 deletions
diff --git a/README.md b/README.md
index 2ef6bea..04eb29b 100644
--- a/README.md
+++ b/README.md
@@ -2,8 +2,8 @@
A text editor.
-**ted is still very new. There is no nice installer yet (if you want ted, you'll have to build it from source).
-I'll release installers after testing it a bit more to try to find any remaining bugs.**
+**ted is still very new. There is no nice installer yet (if you want ted, you'll have to build it from source).**
+**I'll release installers after testing it a bit more to try to find any remaining bugs.**
<img src="ted.png">
diff --git a/base.h b/base.h
index 94ff2bf..9e39f93 100644
--- a/base.h
+++ b/base.h
@@ -9,6 +9,10 @@
#define _GNU_SOURCE
#endif
+#if __GNUC__
+#define FALLTHROUGH __attribute__((fallthrough));
+#endif
+
#if _WIN32
#include <windows.h>
#include <shlobj.h>
diff --git a/main.c b/main.c
index 8cf3997..64fc96d 100644
--- a/main.c
+++ b/main.c
@@ -1,3 +1,4 @@
+// HTML highlighting
#include "base.h"
no_warn_start
#if _WIN32
diff --git a/string32.c b/string32.c
index 9b88f0e..1b4f1ac 100644
--- a/string32.c
+++ b/string32.c
@@ -148,6 +148,25 @@ size_t str32_remove_all_instances_of_char(String32 *s, char32_t c) {
return ndeleted;
}
+// returns the length of the longest prefix of `s` containing only
+// ASCII characters in the C-string `charset`.
+size_t str32_ascii_spn(String32 s, char const *charset) {
+ for (u32 i = 0; i < s.len; ++i) {
+ if (s.str[i] >= 128)
+ return i; // non-ASCII character in s, so that can't be in charset.
+ bool found = false;
+ for (char const *p = charset; *p; ++p) {
+ assert((char32_t)*p < 128);
+ if ((char32_t)*p == s.str[i]) {
+ found = true;
+ break;
+ }
+ }
+ if (!found) return i;
+ }
+ return s.len;
+}
+
bool is32_space(char32_t c) {
return c <= WINT_MAX && iswspace((wint_t)c);
}
diff --git a/syntax.c b/syntax.c
index fdb2d6a..0a4805d 100644
--- a/syntax.c
+++ b/syntax.c
@@ -22,6 +22,7 @@ char const *language_comment_start(Language l) {
case LANG_PYTHON: return "# ";
case LANG_TEX: return "% ";
case LANG_NONE:
+ case LANG_MARKDOWN:
case LANG_COUNT:
break;
}
@@ -657,6 +658,193 @@ static void syntax_highlight_tex(SyntaxState *state, char32_t *line, u32 line_le
);
}
+static void syntax_highlight_markdown(SyntaxState *state, char32_t *line, u32 line_len, SyntaxCharType *char_types) {
+ bool multiline_code = (*state & SYNTAX_STATE_MARKDOWN_CODE) != 0;
+
+ *state = (multiline_code * SYNTAX_STATE_MARKDOWN_CODE);
+
+ if (line_len >= 3 && line[0] == '`' && line[1] == '`' && line[2] == '`') {
+ if (multiline_code) {
+ // end of multi-line code
+ *state = 0;
+ } else {
+ // start of multi-line code
+ multiline_code = true;
+ *state = SYNTAX_STATE_MARKDOWN_CODE;
+ }
+ }
+
+ if (!char_types) {
+ return;
+ }
+
+ if (multiline_code) {
+ static_assert_if_possible(sizeof *char_types == 1)
+ memset(char_types, SYNTAX_CODE, line_len);
+ return;
+ }
+
+ bool start_of_line = true; // is this the start of the line (not counting whitespace)
+ int backslashes = 0;
+ char const *format_ending = NULL; // "**" if we are inside **bold**, etc.
+
+ for (u32 i = 0; i < line_len; ++i) {
+ char32_t c = line[i];
+ bool next_sol = start_of_line && is32_space(c);
+ bool has_1_char = i+1 < line_len;
+ bool next_is_space = has_1_char && is32_space(line[i+1]);
+
+ char_types[i] = SYNTAX_NORMAL;
+ if (format_ending) {
+ if (streq(format_ending, "`"))
+ char_types[i] = SYNTAX_CODE;
+ else
+ char_types[i] = SYNTAX_STRING;
+ }
+
+ String32 remains = {
+ .str = line + i,
+ .len = line_len - i
+ };
+ if (!format_ending && str32_has_ascii_prefix(remains, "http")) {
+ if (str32_has_ascii_prefix(remains, "http://")
+ || str32_has_ascii_prefix(remains, "https://")) {
+ // a link!
+ for (; i < line_len; ++i) {
+ if (is32_space(line[i]))
+ break;
+ char_types[i] = SYNTAX_LINK;
+ }
+ if (line[i-1] < 128 && strchr(".!,", (char)line[i-1])) {
+ // punctuation after URLs
+ char_types[i-1] = SYNTAX_NORMAL;
+ }
+ goto bottom;
+ }
+ }
+
+ switch (c) {
+ case '#':
+ if (start_of_line) {
+ memset(char_types + i, SYNTAX_STRING, line_len - i);
+ i = line_len;
+ }
+ break;
+ case '*':
+ if (start_of_line && next_is_space) {
+ // bullet list item
+ char_types[i] = SYNTAX_BUILTIN;
+ }
+ FALLTHROUGH
+ case '_':
+ if (backslashes % 2 == 1) {
+ // \* or \_
+ } else if (has_1_char && line[i+1] == c) {
+ // **bold** or __bold__
+ char const *end = c == '*' ? "**" : "__";
+ if (format_ending) {
+ if (streq(format_ending, end)) {
+ char_types[i++] = SYNTAX_STRING;
+ char_types[i] = SYNTAX_STRING;
+ format_ending = NULL;
+ }
+ } else if (!next_is_space) {
+ char_types[i++] = SYNTAX_STRING;
+ char_types[i] = SYNTAX_STRING;
+ format_ending = end;
+ }
+ } else {
+ // *italics* or _italics_
+ char const *end = c == '*' ? "*" : "_";
+ if (format_ending) {
+ if (streq(format_ending, end))
+ format_ending = NULL;
+ } else if (!next_is_space) {
+ char_types[i] = SYNTAX_STRING;
+ format_ending = end;
+ }
+ }
+ break;
+ case '`':
+ if (backslashes % 2 == 1) {
+ // \`
+ } else if (format_ending) {
+ if (streq(format_ending, "`"))
+ format_ending = NULL;
+ } else {
+ char_types[i] = SYNTAX_CODE;
+ format_ending = "`";
+ }
+ break;
+ case '-':
+ case '>':
+ if (start_of_line && next_is_space) {
+ // list item/blockquote
+ char_types[i] = SYNTAX_BUILTIN;
+ }
+ break;
+ case ANY_DIGIT:
+ if (start_of_line) {
+ size_t spn = str32_ascii_spn(remains, "0123456789");
+ size_t end = i + spn;
+ if (end < line_len && line[end] == '.') {
+ // numbered list item
+ for (; i <= end; ++i) {
+ char_types[i] = SYNTAX_BUILTIN;
+ }
+ }
+ }
+ break;
+ case '[': {
+ if (backslashes % 2 == 0) {
+ // [URLS](like-this.com)
+ u32 j;
+ for (j = i+1; j < line_len; ++j) {
+ if (line[j] == ']' && backslashes % 2 == 0)
+ break;
+ if (line[j] == '\\')
+ ++backslashes;
+ else
+ backslashes = 0;
+ }
+ backslashes = 0;
+ u32 closing_bracket = j;
+ if (closing_bracket+2 < line_len && line[closing_bracket+1] == '(') {
+ for (j = closing_bracket+2; j < line_len; ++j) {
+ if (line[j] == ')' && backslashes % 2 == 0)
+ break;
+ if (line[j] == '\\')
+ ++backslashes;
+ else
+ backslashes = 0;
+ }
+ u32 closing_parenthesis = j;
+ if (closing_parenthesis < line_len) {
+ // hooray!
+ if (i > 0 && line[i-1] == '!')
+ --i; // images are links, but with ! before them
+ memset(&char_types[i], SYNTAX_LINK, closing_parenthesis+1 - i);
+ i = closing_parenthesis;
+ }
+ backslashes = 0;
+
+ }
+ }
+ } break;
+ }
+ bottom:
+ if (i >= line_len) break;
+
+ if (line[i] != '\\')
+ backslashes = 0;
+ else
+ ++backslashes;
+
+ start_of_line = next_sol;
+ }
+
+}
+
// This is the main syntax highlighting function. It will determine which colors to use for each character.
// Rather than returning colors, it returns a character type (e.g. comment) which can be converted to a color.
// To highlight multiple lines, start out with a zeroed SyntaxState, and pass a pointer to it each time.
@@ -682,6 +870,9 @@ void syntax_highlight(SyntaxState *state, Language lang, char32_t *line, u32 lin
case LANG_TEX:
syntax_highlight_tex(state, line, line_len, char_types);
break;
+ case LANG_MARKDOWN:
+ syntax_highlight_markdown(state, line, line_len, char_types);
+ break;
case LANG_COUNT: assert(0); break;
}
}
diff --git a/ted.cfg b/ted.cfg
index 1e01cc3..1ed74cc 100644
--- a/ted.cfg
+++ b/ted.cfg
@@ -208,3 +208,4 @@ C++ = .cpp, .hpp, .C, .H, .cxx, .hxx, .cc, .hh
Rust = .rs
Python = .py
Tex = .tex
+Markdown = .md
diff --git a/ted.h b/ted.h
index 66648f5..c05f8d3 100644
--- a/ted.h
+++ b/ted.h
@@ -33,6 +33,10 @@ enum {
SYNTAX_STATE_TEX_VERBATIM = 0x04u, // inside \begin{verbatim} ... \end{verbatim}
};
+enum {
+ SYNTAX_STATE_MARKDOWN_CODE = 0x01u, // inside ``` ``` code section
+};
+
typedef u8 SyntaxState;
ENUM_U16 {
@@ -42,6 +46,7 @@ ENUM_U16 {
LANG_RUST,
LANG_PYTHON,
LANG_TEX,
+ LANG_MARKDOWN,
LANG_COUNT
} ENUM_U16_END(Language);
@@ -57,6 +62,7 @@ static LanguageName const language_names[] = {
{LANG_RUST, "Rust"},
{LANG_PYTHON, "Python"},
{LANG_TEX, "Tex"},
+ {LANG_MARKDOWN, "Markdown"},
};
static_assert_if_possible(arr_count(language_names) == LANG_COUNT)
@@ -73,6 +79,8 @@ ENUM_U8 {
} ENUM_U8_END(SyntaxCharType);
#define SYNTAX_MATH SYNTAX_STRING // for tex
+#define SYNTAX_CODE SYNTAX_PREPROCESSOR // for markdown
+#define SYNTAX_LINK SYNTAX_CONSTANT // for markdown
typedef struct {
float cursor_blink_time_on, cursor_blink_time_off;