C♯ syntax highlighting

author: pommicket <pommicket@gmail.com> 2025-06-12 14:52:54 -0400
committer: pommicket <pommicket@gmail.com> 2025-06-12 14:52:54 -0400
commit: 828741f64d04cde5cd013597e17e1fcc4c05bc9d (patch)
tree: 507fdf440445a95c26ce8cdba542dd66c56ac148
parent: 218b0f332bc7944483c6eaa944f61a34ed5ced80 (diff)
1 files changed, 176 insertions, 5 deletions
diff --git a/syntax.c b/syntax.c
index 6139ed5..82a7ede 100644
--- a/syntax.c
+++ b/syntax.c
@@ -75,6 +75,12 @@ enum {
 	SYNTAX_STATE_CSS_IN_BRACES = 0x02,
 };
 
+enum {
+	SYNTAX_STATE_CSHARP_STRING_RAW = 0x01,
+	SYNTAX_STATE_CSHARP_STRING_VERBATIM = 0x02,
+	SYNTAX_STATE_CSHARP_MULTILINE_COMMENT = 0x04,
+};
+
 typedef struct {
 	Language lang;
 	char *name;
@@ -280,6 +286,9 @@ static bool syntax_number_continues(Language lang, const char32_t *line, u32 lin
 	case LANG_TYPESCRIPT:
 		digits = "0123456789.xXoObBabcdefABCDEFn_";
 		break;
+	case LANG_CSHARP:
+		digits = "0123456789.xXbBabcdefABCDEF_ulULdDmM";
+		break;
 	default:
 		digits = "0123456789.xXoObBabcdefABCDEF_";
 		break;
@@ -2273,12 +2282,174 @@ static void syntax_highlight_gdscript(SyntaxState *state, const char32_t *line,
 
 
 static void syntax_highlight_csharp(SyntaxState *state, const char32_t *line, u32 line_len, SyntaxCharType *char_types) {
-	(void)state;
-	(void)line;
-	(void)line_len;
-	if (char_types) {
-		memset(char_types, 0, line_len);
+	bool string_is_raw = (*state & SYNTAX_STATE_CSHARP_STRING_RAW) != 0;
+	bool string_is_verbatim = (*state & SYNTAX_STATE_CSHARP_STRING_VERBATIM) != 0;
+	bool in_multiline_comment = (*state & SYNTAX_STATE_CSHARP_MULTILINE_COMMENT) != 0;
+	bool in_string = string_is_raw || string_is_verbatim;
+	bool in_number = false;
+	bool in_preprocessor = false;
+	u32 backslashes = 0;
+	
+	for (u32 i = 0; i < line_len; ++i) {
+		char32_t c = line[i];
+		bool dealt_with = false;
+		if (in_multiline_comment) {
+			if (char_types) char_types[i] = SYNTAX_COMMENT;
+			if (line[i] == '*' && i + 1 < line_len && line[i + 1] == '/') {
+				if (char_types)
+					char_types[i+1] = SYNTAX_COMMENT;
+				i++;
+				in_multiline_comment = false;
+			}
+			continue;
+		}
+		switch (c) {
+		case '#':
+			if (!in_string && !in_preprocessor) {
+				in_preprocessor = true;
+				for (i64 j = (i64)i-1; j >= 0; j--) {
+					if (!is32_space(line[j])) {
+						in_preprocessor = false;
+						break;
+					}
+				}
+			}
+			break;
+		case '@':
+			if (i + 1 >= line_len || in_string || in_preprocessor) break;
+			if (char_types && (is32_alpha(line[i+1]) || line[i+1] == '_')) {
+				// "verbatim identifier"
+				char_types[i] = SYNTAX_NORMAL;
+				char_types[i+1] = SYNTAX_NORMAL;
+				for (i += 2; is32_word(line[i]); i++)
+					char_types[i] = SYNTAX_NORMAL;
+				i--; // we'll increment i at the end of the for loop
+				dealt_with = true;
+			}
+			if (line[i+1] == '"') {
+				string_is_verbatim = true;
+				if (char_types) char_types[i] = SYNTAX_STRING;
+				dealt_with = true;
+			}
+			break;
+		case '"': {
+			if (in_string && string_is_raw) {
+				if (i + 2 < line_len && line[i+1] == '"' && line[i+2] == '"') {
+					in_string = false;
+					string_is_raw = string_is_verbatim = false;
+					if (char_types) char_types[i] = char_types[i+1] = char_types[i+2] = SYNTAX_STRING;
+					i += 2;
+					dealt_with = true;
+				}
+			} else if (in_string) {
+				if (backslashes % 2 == 0 || string_is_verbatim) {
+					in_string = false;
+					if (char_types) char_types[i] = SYNTAX_STRING;
+					dealt_with = true;
+				}
+			} else {
+				in_string = true;
+				if (!in_preprocessor && i + 2 < line_len && line[i+1] == '"' && line[i+2] == '"') {
+					string_is_raw = true;
+					if (char_types) char_types[i] = char_types[i+1] = SYNTAX_STRING;
+					i += 2;
+				}
+			}
+		} break;
+		case '\'':
+			if (!in_string) {
+				if (char_types) char_types[i] = SYNTAX_CHARACTER;
+				i++;
+				backslashes = 0;
+				for (; i < line_len; i++) {
+					if (line[i] == '\'' && backslashes % 2 == 0) {
+						break;
+					}
+					if (char_types) char_types[i] = SYNTAX_CHARACTER;
+					backslashes = line[i] == '\\' ? backslashes + 1 : 0;
+				}
+				if (i >= line_len) continue; // break out of for loop
+				if (char_types) char_types[i] = SYNTAX_CHARACTER;
+				dealt_with = true;
+			}
+			break;
+		case ANY_DIGIT:
+			if (char_types && !in_string && !in_number) {
+				in_number = true;
+				if (i) {
+					if (line[i - 1] == '.') {
+						// support .6, for example
+						char_types[i - 1] = SYNTAX_CONSTANT;
+					} else if (is32_word(line[i - 1])) {
+						// actually, this isn't a number. it's something like a*6* or u3*2*.
+						in_number = false;
+					}
+				}
+			}
+			break;
+		case '/':
+			if (!in_string && i + 1 < line_len) {
+				if (line[i+1] == '/') {
+					// single-line comment
+					if (char_types) {
+						for (; i < line_len; i++)
+							char_types[i] = SYNTAX_COMMENT;
+					}
+					i = line_len;
+					continue;
+				} else if (line[i+1] == '*') {
+					// multi-line comment
+					in_multiline_comment = true;
+					if (char_types) {
+						char_types[i] = char_types[i+1] = SYNTAX_COMMENT;
+					}
+					i++;
+					continue;
+				}
+			}
+			break;
+		case '\\':
+			++backslashes;
+			break;
+		default:
+			if ((i && is32_word(line[i - 1])) || !is32_word(c))
+				break; // can't be a keyword on its own.
+			
+			if (char_types && !in_string && !in_number && !in_preprocessor) {
+				u32 keyword_len = syntax_keyword_len(LANG_CSHARP, line, i, line_len);
+				Keyword const *keyword = syntax_keyword_lookup(syntax_all_keywords_csharp, &line[i], keyword_len);
+				if (keyword) {
+					SyntaxCharType type = keyword->type;
+					for (size_t j = 0; j < keyword_len; ++j) {
+						char_types[i++] = type;
+					}
+					--i; // we'll increment i from the for loop
+					dealt_with = true;
+					break;
+				}
+			}
+			break;
+		}
+		if (c != '\\') backslashes = 0;
+		if (in_number && !syntax_number_continues(LANG_CSHARP, line, line_len, i))
+			in_number = false;
+		
+		if (char_types && !dealt_with) {
+			SyntaxCharType type = SYNTAX_NORMAL;
+			if (in_string)
+				type = SYNTAX_STRING;
+			else if (in_number)
+				type = SYNTAX_CONSTANT;
+			else if (in_preprocessor)
+				type = SYNTAX_PREPROCESSOR;
+			char_types[i] = type;
+		}
 	}
+	*state = (SyntaxState)(
+		(SYNTAX_STATE_CSHARP_STRING_RAW * (in_string && string_is_raw))
+		| (SYNTAX_STATE_CSHARP_STRING_VERBATIM * (in_string && string_is_verbatim))
+		| (SYNTAX_STATE_CSHARP_MULTILINE_COMMENT * in_multiline_comment)
+	);
 }
 
 typedef struct {
author	pommicket <pommicket@gmail.com>	2025-06-12 14:52:54 -0400
committer	pommicket <pommicket@gmail.com>	2025-06-12 14:52:54 -0400
commit	828741f64d04cde5cd013597e17e1fcc4c05bc9d (patch)
tree	507fdf440445a95c26ce8cdba542dd66c56ac148
parent	218b0f332bc7944483c6eaa944f61a34ed5ced80 (diff)