summaryrefslogtreecommitdiff
path: root/syntax.c
diff options
context:
space:
mode:
authorLeo Tenenbaum <pommicket@gmail.com>2021-04-19 22:51:33 -0400
committerLeo Tenenbaum <pommicket@gmail.com>2021-04-19 22:53:35 -0400
commitda61efabb1b28c5500824a560c960a720a628de0 (patch)
tree618127733dd0a2923536f52b920671723e6471db /syntax.c
parent61cd687d858b7a3811a168393f6b698c24cfff17 (diff)
markdown highlighting
Diffstat (limited to 'syntax.c')
-rw-r--r--syntax.c191
1 files changed, 191 insertions, 0 deletions
diff --git a/syntax.c b/syntax.c
index fdb2d6a..0a4805d 100644
--- a/syntax.c
+++ b/syntax.c
@@ -22,6 +22,7 @@ char const *language_comment_start(Language l) {
case LANG_PYTHON: return "# ";
case LANG_TEX: return "% ";
case LANG_NONE:
+ case LANG_MARKDOWN:
case LANG_COUNT:
break;
}
@@ -657,6 +658,193 @@ static void syntax_highlight_tex(SyntaxState *state, char32_t *line, u32 line_le
);
}
+static void syntax_highlight_markdown(SyntaxState *state, char32_t *line, u32 line_len, SyntaxCharType *char_types) {
+ bool multiline_code = (*state & SYNTAX_STATE_MARKDOWN_CODE) != 0;
+
+ *state = (multiline_code * SYNTAX_STATE_MARKDOWN_CODE);
+
+ if (line_len >= 3 && line[0] == '`' && line[1] == '`' && line[2] == '`') {
+ if (multiline_code) {
+ // end of multi-line code
+ *state = 0;
+ } else {
+ // start of multi-line code
+ multiline_code = true;
+ *state = SYNTAX_STATE_MARKDOWN_CODE;
+ }
+ }
+
+ if (!char_types) {
+ return;
+ }
+
+ if (multiline_code) {
+ static_assert_if_possible(sizeof *char_types == 1)
+ memset(char_types, SYNTAX_CODE, line_len);
+ return;
+ }
+
+ bool start_of_line = true; // is this the start of the line (not counting whitespace)
+ int backslashes = 0;
+ char const *format_ending = NULL; // "**" if we are inside **bold**, etc.
+
+ for (u32 i = 0; i < line_len; ++i) {
+ char32_t c = line[i];
+ bool next_sol = start_of_line && is32_space(c);
+ bool has_1_char = i+1 < line_len;
+ bool next_is_space = has_1_char && is32_space(line[i+1]);
+
+ char_types[i] = SYNTAX_NORMAL;
+ if (format_ending) {
+ if (streq(format_ending, "`"))
+ char_types[i] = SYNTAX_CODE;
+ else
+ char_types[i] = SYNTAX_STRING;
+ }
+
+ String32 remains = {
+ .str = line + i,
+ .len = line_len - i
+ };
+ if (!format_ending && str32_has_ascii_prefix(remains, "http")) {
+ if (str32_has_ascii_prefix(remains, "http://")
+ || str32_has_ascii_prefix(remains, "https://")) {
+ // a link!
+ for (; i < line_len; ++i) {
+ if (is32_space(line[i]))
+ break;
+ char_types[i] = SYNTAX_LINK;
+ }
+ if (line[i-1] < 128 && strchr(".!,", (char)line[i-1])) {
+ // punctuation after URLs
+ char_types[i-1] = SYNTAX_NORMAL;
+ }
+ goto bottom;
+ }
+ }
+
+ switch (c) {
+ case '#':
+ if (start_of_line) {
+ memset(char_types + i, SYNTAX_STRING, line_len - i);
+ i = line_len;
+ }
+ break;
+ case '*':
+ if (start_of_line && next_is_space) {
+ // bullet list item
+ char_types[i] = SYNTAX_BUILTIN;
+ }
+ FALLTHROUGH
+ case '_':
+ if (backslashes % 2 == 1) {
+ // \* or \_
+ } else if (has_1_char && line[i+1] == c) {
+ // **bold** or __bold__
+ char const *end = c == '*' ? "**" : "__";
+ if (format_ending) {
+ if (streq(format_ending, end)) {
+ char_types[i++] = SYNTAX_STRING;
+ char_types[i] = SYNTAX_STRING;
+ format_ending = NULL;
+ }
+ } else if (!next_is_space) {
+ char_types[i++] = SYNTAX_STRING;
+ char_types[i] = SYNTAX_STRING;
+ format_ending = end;
+ }
+ } else {
+ // *italics* or _italics_
+ char const *end = c == '*' ? "*" : "_";
+ if (format_ending) {
+ if (streq(format_ending, end))
+ format_ending = NULL;
+ } else if (!next_is_space) {
+ char_types[i] = SYNTAX_STRING;
+ format_ending = end;
+ }
+ }
+ break;
+ case '`':
+ if (backslashes % 2 == 1) {
+ // \`
+ } else if (format_ending) {
+ if (streq(format_ending, "`"))
+ format_ending = NULL;
+ } else {
+ char_types[i] = SYNTAX_CODE;
+ format_ending = "`";
+ }
+ break;
+ case '-':
+ case '>':
+ if (start_of_line && next_is_space) {
+ // list item/blockquote
+ char_types[i] = SYNTAX_BUILTIN;
+ }
+ break;
+ case ANY_DIGIT:
+ if (start_of_line) {
+ size_t spn = str32_ascii_spn(remains, "0123456789");
+ size_t end = i + spn;
+ if (end < line_len && line[end] == '.') {
+ // numbered list item
+ for (; i <= end; ++i) {
+ char_types[i] = SYNTAX_BUILTIN;
+ }
+ }
+ }
+ break;
+ case '[': {
+ if (backslashes % 2 == 0) {
+ // [URLS](like-this.com)
+ u32 j;
+ for (j = i+1; j < line_len; ++j) {
+ if (line[j] == ']' && backslashes % 2 == 0)
+ break;
+ if (line[j] == '\\')
+ ++backslashes;
+ else
+ backslashes = 0;
+ }
+ backslashes = 0;
+ u32 closing_bracket = j;
+ if (closing_bracket+2 < line_len && line[closing_bracket+1] == '(') {
+ for (j = closing_bracket+2; j < line_len; ++j) {
+ if (line[j] == ')' && backslashes % 2 == 0)
+ break;
+ if (line[j] == '\\')
+ ++backslashes;
+ else
+ backslashes = 0;
+ }
+ u32 closing_parenthesis = j;
+ if (closing_parenthesis < line_len) {
+ // hooray!
+ if (i > 0 && line[i-1] == '!')
+ --i; // images are links, but with ! before them
+ memset(&char_types[i], SYNTAX_LINK, closing_parenthesis+1 - i);
+ i = closing_parenthesis;
+ }
+ backslashes = 0;
+
+ }
+ }
+ } break;
+ }
+ bottom:
+ if (i >= line_len) break;
+
+ if (line[i] != '\\')
+ backslashes = 0;
+ else
+ ++backslashes;
+
+ start_of_line = next_sol;
+ }
+
+}
+
// This is the main syntax highlighting function. It will determine which colors to use for each character.
// Rather than returning colors, it returns a character type (e.g. comment) which can be converted to a color.
// To highlight multiple lines, start out with a zeroed SyntaxState, and pass a pointer to it each time.
@@ -682,6 +870,9 @@ void syntax_highlight(SyntaxState *state, Language lang, char32_t *line, u32 lin
case LANG_TEX:
syntax_highlight_tex(state, line, line_len, char_types);
break;
+ case LANG_MARKDOWN:
+ syntax_highlight_markdown(state, line, line_len, char_types);
+ break;
case LANG_COUNT: assert(0); break;
}
}