summaryrefslogtreecommitdiff
path: root/syntax.c
diff options
context:
space:
mode:
authorpommicket <pommicket@gmail.com>2025-06-12 10:56:16 -0400
committerpommicket <pommicket@gmail.com>2025-06-12 11:08:54 -0400
commit45849c1a279c48a553654af3d55e9f5f52c027ef (patch)
tree40b8994cc34e56c6f47082ee8898cc229fda0a33 /syntax.c
parent0befcc8fd8015301242bd2e48a23f5d0157ba1de (diff)
Fix highlighting of f-strings in python
Except for set literals, string literals inside interpolation.
Diffstat (limited to 'syntax.c')
-rw-r--r--syntax.c126
1 files changed, 90 insertions, 36 deletions
diff --git a/syntax.c b/syntax.c
index 956f653..1a83ff5 100644
--- a/syntax.c
+++ b/syntax.c
@@ -9,67 +9,68 @@
// these all say "CPP" but really they're C/C++
enum {
- SYNTAX_STATE_CPP_MULTI_LINE_COMMENT = 0x1u, // are we in a multi-line comment? (delineated by /* */)
- SYNTAX_STATE_CPP_SINGLE_LINE_COMMENT = 0x2u, // if you add a \ to the end of a single-line comment, it is continued to the next line.
- SYNTAX_STATE_CPP_PREPROCESSOR = 0x4u, // similar to above
- SYNTAX_STATE_CPP_STRING = 0x8u,
- SYNTAX_STATE_CPP_RAW_STRING = 0x10u,
+ SYNTAX_STATE_CPP_MULTI_LINE_COMMENT = 0x1, // are we in a multi-line comment? (delineated by /* */)
+ SYNTAX_STATE_CPP_SINGLE_LINE_COMMENT = 0x2, // if you add a \ to the end of a single-line comment, it is continued to the next line.
+ SYNTAX_STATE_CPP_PREPROCESSOR = 0x4, // similar to above
+ SYNTAX_STATE_CPP_STRING = 0x8,
+ SYNTAX_STATE_CPP_RAW_STRING = 0x10,
};
enum {
- SYNTAX_STATE_RUST_COMMENT_DEPTH_MASK = 0xfu, // in rust, /* */ comments can nest.
- SYNTAX_STATE_RUST_COMMENT_DEPTH_MUL = 0x1u,
+ SYNTAX_STATE_RUST_COMMENT_DEPTH_MASK = 0xf, // in rust, /* */ comments can nest.
+ SYNTAX_STATE_RUST_COMMENT_DEPTH_MUL = 0x1,
SYNTAX_STATE_RUST_COMMENT_DEPTH_BITS = 4, // number of bits we allocate for the comment depth.
- SYNTAX_STATE_RUST_STRING = 0x10u,
- SYNTAX_STATE_RUST_STRING_IS_RAW = 0x20u,
+ SYNTAX_STATE_RUST_STRING = 0x10,
+ SYNTAX_STATE_RUST_STRING_IS_RAW = 0x20,
};
enum {
- SYNTAX_STATE_PYTHON_STRING = 0x01u, // multiline strings (''' and """)
- SYNTAX_STATE_PYTHON_STRING_DBL_QUOTED = 0x02u, // is this a """ string, as opposed to a ''' string?
+ SYNTAX_STATE_PYTHON_STRING = 0x01, // multiline strings (''' and """)
+ SYNTAX_STATE_PYTHON_STRING_DBL_QUOTED = 0x02, // is this a """ string, as opposed to a ''' string?
+ SYNTAX_STATE_PYTHON_FSTRING = 0x04, // is this a f''' or f""" string?
};
enum {
- SYNTAX_STATE_GDSCRIPT_STRING = 0x01u, // multiline strings (''' and """)
- SYNTAX_STATE_GDSCRIPT_STRING_DBL_QUOTED = 0x02u, // is this a """ string, as opposed to a ''' string?
+ SYNTAX_STATE_GDSCRIPT_STRING = 0x01, // multiline strings (''' and """)
+ SYNTAX_STATE_GDSCRIPT_STRING_DBL_QUOTED = 0x02, // is this a """ string, as opposed to a ''' string?
};
enum {
- SYNTAX_STATE_TEX_DOLLAR = 0x01u, // inside math $ ... $
- SYNTAX_STATE_TEX_DOLLARDOLLAR = 0x02u, // inside math $$ ... $$
- SYNTAX_STATE_TEX_VERBATIM = 0x04u, // inside \begin{verbatim} ... \end{verbatim}
+ SYNTAX_STATE_TEX_DOLLAR = 0x01, // inside math $ ... $
+ SYNTAX_STATE_TEX_DOLLARDOLLAR = 0x02, // inside math $$ ... $$
+ SYNTAX_STATE_TEX_VERBATIM = 0x04, // inside \begin{verbatim} ... \end{verbatim}
};
enum {
- SYNTAX_STATE_MARKDOWN_CODE = 0x01u, // inside ``` ``` code section
+ SYNTAX_STATE_MARKDOWN_CODE = 0x01, // inside ``` ``` code section
};
enum {
- SYNTAX_STATE_HTML_COMMENT = 0x01u
+ SYNTAX_STATE_HTML_COMMENT = 0x01
};
enum {
- SYNTAX_STATE_JAVASCRIPT_TEMPLATE_STRING = 0x01u,
- SYNTAX_STATE_JAVASCRIPT_MULTILINE_COMMENT = 0x02u,
+ SYNTAX_STATE_JAVASCRIPT_TEMPLATE_STRING = 0x01,
+ SYNTAX_STATE_JAVASCRIPT_MULTILINE_COMMENT = 0x02,
};
enum {
- SYNTAX_STATE_JAVA_MULTILINE_COMMENT = 0x01u
+ SYNTAX_STATE_JAVA_MULTILINE_COMMENT = 0x01,
};
enum {
- SYNTAX_STATE_GO_RAW_STRING = 0x01u, // backtick-enclosed string
- SYNTAX_STATE_GO_MULTILINE_COMMENT = 0x02u
+ SYNTAX_STATE_GO_RAW_STRING = 0x01, // backtick-enclosed string
+ SYNTAX_STATE_GO_MULTILINE_COMMENT = 0x02,
};
enum {
- SYNTAX_STATE_TED_CFG_STRING = 0x01u, // ` or "-delimited string
- SYNTAX_STATE_TED_CFG_STRING_BACKTICK = 0x02u, // `-delimited string
+ SYNTAX_STATE_TED_CFG_STRING = 0x01, // ` or "-delimited string
+ SYNTAX_STATE_TED_CFG_STRING_BACKTICK = 0x02, // `-delimited string
};
enum {
- SYNTAX_STATE_CSS_COMMENT = 0x01u,
- SYNTAX_STATE_CSS_IN_BRACES = 0x02u,
+ SYNTAX_STATE_CSS_COMMENT = 0x01,
+ SYNTAX_STATE_CSS_IN_BRACES = 0x02,
};
typedef struct {
@@ -740,9 +741,11 @@ static void syntax_highlight_python(SyntaxState *state, const char32_t *line, u3
(void)state;
bool in_string = (*state & SYNTAX_STATE_PYTHON_STRING) != 0;
bool string_is_dbl_quoted = (*state & SYNTAX_STATE_PYTHON_STRING_DBL_QUOTED) != 0;
+ bool string_is_fstring = (*state & SYNTAX_STATE_PYTHON_FSTRING) != 0;
+ bool interpolating = false;
bool string_is_multiline = true;
bool in_number = false;
- u32 backslashes = 0;
+ u32 backslashes = 0, lbraces = 0;
for (u32 i = 0; i < line_len; ++i) {
char32_t c = line[i];
@@ -759,18 +762,63 @@ static void syntax_highlight_python(SyntaxState *state, const char32_t *line, u3
i = line_len - 1;
}
break;
- case 'f':
- case 'r':
- case 'b':
- if (char_types && i+1 < line_len && (line[i+1] == '\'' || line[i+1] == '"')) {
- // format/raw/byte string
- // @TODO(eventually): we don't handle raw string highlighting correctly.
- char_types[i] = SYNTAX_STRING;
- dealt_with = true;
+ // format/raw/byte/unicode string
+ case 'f': case 'r': case 'b': case 'u':
+ case 'F': case 'R': case 'B': case 'U':
+ if (!in_string && !interpolating) {
+ bool is_string = true;
+ u32 j;
+ bool fstring = c == 'f' || c == 'F';
+ for (j = i+1; j < line_len; j++) {
+ char32_t d = line[j];
+ if (d == '\'' || d == '"')
+ break;
+ if (d == 'r' || d == 'f' || d == 'b' || d == 'u'
+ || d == 'R' || d == 'F' || d == 'B' || d == 'U') {
+ fstring |= d == 'f' || d == 'F';
+ continue;
+ }
+ is_string = false;
+ break;
+ }
+ is_string &= j < line_len;
+ if (is_string) {
+ if (char_types) {
+ for (; i < j; i++)
+ char_types[i] = SYNTAX_STRING;
+ }
+ i = j - 1; // NB: will increment i later due to for loop
+ string_is_fstring = fstring;
+ dealt_with = true;
+ break;
+ }
}
goto keyword_check;
+ case '{':
+ if (in_string && string_is_fstring) {
+ lbraces++;
+ if (lbraces % 2 == 1) {
+ in_string = false;
+ interpolating = true;
+ if (char_types) char_types[i] = SYNTAX_STRING;
+ dealt_with = true;
+ }
+ }
+ break;
+ case '}':
+ if (interpolating) {
+ in_string = true;
+ interpolating = false;
+ }
+ break;
case '\'':
case '"': {
+ if (interpolating) {
+ // string literal in interpolation - give up on rest of interpolation
+ in_string = true;
+ interpolating = false;
+ break;
+ }
bool dbl_quoted = c == '"';
bool is_triple = i+2 < line_len &&
line[i+1] == c && line[i+2] == c;
@@ -779,6 +827,9 @@ static void syntax_highlight_python(SyntaxState *state, const char32_t *line, u3
if (string_is_dbl_quoted == dbl_quoted && backslashes % 2 == 0) {
// end of string
in_string = false;
+ string_is_fstring =
+ string_is_dbl_quoted =
+ string_is_multiline = false;
if (char_types) {
char_types[i] = SYNTAX_STRING;
if (string_is_multiline) {
@@ -835,6 +886,7 @@ static void syntax_highlight_python(SyntaxState *state, const char32_t *line, u3
break;
}
if (c != '\\') backslashes = 0;
+ if (c != '{') lbraces = 0;
if (in_number && !syntax_number_continues(LANG_PYTHON, line, line_len, i))
in_number = false;
@@ -848,10 +900,12 @@ static void syntax_highlight_python(SyntaxState *state, const char32_t *line, u3
}
}
*state = 0;
+ in_string |= interpolating; // give up on highlighting rest of interpolation correctly
if (in_string && string_is_multiline) {
*state |= (SyntaxState)(
SYNTAX_STATE_PYTHON_STRING
| (SYNTAX_STATE_PYTHON_STRING_DBL_QUOTED * string_is_dbl_quoted)
+ | (SYNTAX_STATE_PYTHON_FSTRING * string_is_fstring)
);
}
}