diff options
author | Leo Tenenbaum <pommicket@gmail.com> | 2021-02-01 12:33:27 -0500 |
---|---|---|
committer | Leo Tenenbaum <pommicket@gmail.com> | 2021-02-01 12:33:27 -0500 |
commit | 11b50663dc489b7ec8cb1c8774d9bda2c7acac3b (patch) | |
tree | aa604b767286c86b21b6fef851f0c74e8dfc0acb /syntax.c | |
parent | dba51f5c1b7ac895239d2909358b323a92b77644 (diff) |
full C syntax highlighting
Diffstat (limited to 'syntax.c')
-rw-r--r-- | syntax.c | 151 |
1 files changed, 96 insertions, 55 deletions
@@ -20,7 +20,7 @@ ENUM_U8 { SYNTAX_PREPROCESSOR, SYNTAX_STRING, SYNTAX_CHARACTER, - SYNTAX_NUMBER + SYNTAX_CONSTANT } ENUM_U8_END(SyntaxCharType); // NOTE: returns the color setting, not the color @@ -32,8 +32,9 @@ ColorSetting syntax_char_type_to_color(SyntaxCharType t) { case SYNTAX_PREPROCESSOR: return COLOR_PREPROCESSOR; case SYNTAX_STRING: return COLOR_STRING; case SYNTAX_CHARACTER: return COLOR_CHARACTER; - case SYNTAX_NUMBER: return COLOR_NUMBER; + case SYNTAX_CONSTANT: return COLOR_CONSTANT; } + return COLOR_TEXT; } static void syntax_highlight_c(SyntaxStateC *state, char32_t *line, u32 line_len, SyntaxCharType *char_types) { @@ -60,14 +61,14 @@ static void syntax_highlight_c(SyntaxStateC *state, char32_t *line, u32 line_len switch (line[i]) { case '#': - if (!in_single_line_comment && !in_multi_line_comment) + if (!in_single_line_comment && !in_multi_line_comment && !in_char && !in_string) in_preprocessor = true; break; case '\\': ++backslashes; break; case '/': - if (!in_multi_line_comment && !in_single_line_comment && !in_string && has_1_char) { + if (!in_multi_line_comment && !in_single_line_comment && !in_string && !in_char && has_1_char) { if (line[i + 1] == '/') in_single_line_comment = true; // // else if (line[i + 1] == '*') @@ -82,17 +83,17 @@ static void syntax_highlight_c(SyntaxStateC *state, char32_t *line, u32 line_len case '"': if (in_string && backslashes % 2 == 0) in_string = false; - else if (!in_multi_line_comment && !in_single_line_comment) + else if (!in_multi_line_comment && !in_single_line_comment && !in_char) in_string = in_string_now = true; break; case '\'': if (in_char && backslashes % 2 == 0) in_char = false; - else if (!in_multi_line_comment && !in_single_line_comment) + else if (!in_multi_line_comment && !in_single_line_comment && !in_string) in_char = in_char_now = true; break; case '<': // preprocessor string, e.g. <stdio.h> - if (in_preprocessor) + if (in_preprocessor && !in_char) in_string = in_string_now = true; break; case '>': @@ -101,66 +102,105 @@ static void syntax_highlight_c(SyntaxStateC *state, char32_t *line, u32 line_len break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': // don't you wish C had case ranges... // a number! - if (!in_single_line_comment && !in_multi_line_comment && !in_string) { + if (!in_single_line_comment && !in_multi_line_comment && !in_string && !in_number && !in_char) { in_number = true; - if (i && line[i - 1] == '.') { - // support .6, for example - char_types[i - 1] = SYNTAX_NUMBER; + if (i) { + if (line[i - 1] == '.') { + // support .6, for example + char_types[i - 1] = SYNTAX_CONSTANT; + } else if (is32_ident(line[i - 1])) { + // actually, this isn't a number. it's something like a*6* or u3*2*. + in_number = false; + } } } break; default: { - // split keywords by starting letter to speed this up - static char const *const all_keywords[][10] = { - ['a'] = {"auto"}, - ['b'] = {"break", "bool"}, - ['c'] = {"case", "char", "const", "continue", "char8_t", "char16_t", "char32_t"}, - ['d'] = {"default", "do", "double"}, - ['e'] = {"else", "enum", "extern"}, - ['f'] = {"float", "for", "false"}, - ['g'] = {"goto"}, - ['i'] = {"if", "inline", "int", "int8_t", "int16_t", "int32_t", "int64_t"}, - ['l'] = {"long"}, - ['r'] = {"register", "restrict", "return"}, - ['s'] = {"short", "signed", "sizeof", "static", "struct", "switch"}, - ['t'] = {"typedef", "true"}, - ['u'] = {"union", "unsigned", "uint8_t", "uint16_t", "uint32_t", "uint64_t"}, - ['v'] = {"void", "volatile"}, - ['w'] = {"while", "wchar_t", "wint_t"}, - ['_'] = {"_Alignas", "_Alignof", "_Atomic", "_Bool", "_Complex", "_Generic", - "_Imaginary", "_Noreturn", "_Static_assert", "_Thread_local"}, + if ((i && is32_ident(line[i - 1])) || !is32_ident(line[i])) + break; // can't be a keyword on its own. + + // here are all the keywords! + static char const *const keywords_A[11] = {"ATOMIC_ADDRESS_LOCK_FREE","ATOMIC_CHAR16_T_LOCK_FREE","ATOMIC_CHAR32_T_LOCK_FREE","ATOMIC_CHAR_LOCK_FREE","ATOMIC_FLAG_LOCK_FREE","ATOMIC_INT_LOCK_FREE","ATOMIC_LLONG_LOCK_FREE","ATOMIC_LONG_LOCK_FREE","ATOMIC_SHORT_LOCK_FREE","ATOMIC_WCHAR_T_LOCK_FREE"}; + static char const *const keywords_B[2] = {"BUFSIZ"}; + static char const *const keywords_C[5] = {"CHAR_BIT","CHAR_MAX","CHAR_MIN","CLOCKS_PER_SEC"}; + static char const *const keywords_D[12] = {"DBL_DIG","DBL_EPSILON","DBL_HAS_SUBNORM","DBL_MANT_DIG","DBL_MAX","DBL_MAX_10_EXP","DBL_MAX_EXP","DBL_MIN","DBL_MIN_EXP","DBL_TRUE_MIN","DECIMAL_DIG"}; + static char const *const keywords_E[128] = {"E2BIG","EACCES","EADDRINUSE","EADDRNOTAVAIL","EADV","EAFNOSUPPORT","EAGAIN","EALREADY","EBADE","EBADF","EBADFD","EBADMSG","EBADR","EBADRQC","EBADSLT","EBFONT","EBUSY","ECHILD","ECHRNG","ECOMM","ECONNABORTED","ECONNREFUSED","ECONNRESET","EDEADLK","EDEADLOCK","EDESTADDRREQ","EDOM","EDOTDOT","EDQUOT","EEXIST","EFAULT","EFBIG","EHOSTDOWN","EHOSTUNREACH","EIDRM","EILSEQ","EINPROGRESS","EINTR","EINVAL","EIO","EISCONN","EISDIR","EISNAM","EL2HLT","EL2NSYNC","EL3HLT","EL3RST","ELIBACC","ELIBBAD","ELIBEXEC","ELIBMAX","ELIBSCN","ELNRNG","ELOOP","EMEDIUMTYPE","EMFILE","EMLINK","EMSGSIZE","EMULTIHOP","ENAMETOOLONG","ENAVAIL","ENETDOWN","ENETRESET","ENETUNREACH","ENFILE","ENOANO","ENOBUFS","ENOCSI","ENODATA","ENODEV","ENOENT","ENOEXEC","ENOLCK","ENOLINK","ENOMEDIUM","ENOMEM","ENOMSG","ENONET","ENOPKG","ENOPROTOOPT","ENOSPC","ENOSR","ENOSTR","ENOSYS","ENOTBLK","ENOTCONN","ENOTDIR","ENOTEMPTY","ENOTNAM","ENOTSOCK","ENOTTY","ENOTUNIQ","ENXIO","EOF","EOPNOTSUPP","EOVERFLOW","EPERM","EPFNOSUPPORT","EPIPE","EPROTO","EPROTONOSUPPORT","EPROTOTYPE","ERANGE","EREMCHG","EREMOTE","EREMOTEIO","ERESTART","EROFS","ESHUTDOWN","ESOCKTNOSUPPORT","ESPIPE","ESRCH","ESRMNT","ESTALE","ESTRPIPE","ETIME","ETIMEDOUT","ETOOMANYREFS","ETXTBSY","EUCLEAN","EUNATCH","EUSERS","EWOULDBLOCK","EXDEV","EXFULL","EXIT_FAILURE","EXIT_SUCCESS"}; + static char const *const keywords_F[39] = {"FE_ALL_EXCEPT","FE_DFL_ENV","FE_DIVBYZERO","FE_DOWNWARD","FE_INEXACT","FE_INVALID","FE_OVERFLOW","FE_TONEAREST","FE_TOWARDZERO","FE_UNDERFLOW","FE_UPWARD","FILE","FILENAME_MAX","FLT_DECIMAL_DIG","FLT_DIG","FLT_EVAL_METHOD","FLT_HAS_SUBNORM","FLT_MANT_DIG","FLT_MAX","FLT_MAX_10_EXP","FLT_MAX_EXP","FLT_MIN","FLT_MIN_10_EXP","FLT_MIN_EXP","FLT_RADIX","FLT_ROUNDS","FLT_TRUE_MIN","FOPEN_MAX","FP_FAST_FMA","FP_FAST_FMAF","FP_FAST_FMAL","FP_ILOGB0","FP_ILOGBNAN","FP_INFINITE","FP_NAN","FP_NORMAL","FP_SUBNORMAL","FP_ZERO"}; + static char const *const keywords_H[4] = {"HUGE_VAL","HUGE_VALF","HUGE_VALL"}; + static char const *const keywords_I[33] = {"I","INFINITY","INT16_MAX","INT16_MIN","INT32_MAX","INT32_MIN","INT64_MAX","INT64_MIN","INT8_MAX","INT8_MIN","INTMAX_MAX","INTMAX_MIN","INTPTR_MAX","INTPTR_MIN","INT_FAST16_MAX","INT_FAST16_MIN","INT_FAST32_MAX","INT_FAST32_MIN","INT_FAST64_MAX","INT_FAST64_MIN","INT_FAST8_MAX","INT_FAST8_MIN","INT_LEAST16_MAX","INT_LEAST16_MIN","INT_LEAST32_MAX","INT_LEAST32_MIN","INT_LEAST64_MAX","INT_LEAST64_MIN","INT_LEAST8_MAX","INT_LEAST8_MIN","INT_MAX","INT_MIN"}; + static char const *const keywords_L[23] = {"LC_ALL","LC_COLLATE","LC_CTYPE","LC_MONETARY","LC_NUMERIC","LC_TIME","LDBL_DECIMAL_DIG","LDBL_DIG","LDBL_EPSILON","LDBL_MANT_DIG","LDBL_MAX","LDBL_MAX_10_EXP","LDBL_MAX_EXP","LDBL_MIN","LDBL_MIN_10_EXP","LDBL_MIN_EXP","LDBL_TRUE_MIN","LLONG_MAX","LLONG_MIN","LONG_MAX","LONG_MIN","L_tmpnam"}; + static char const *const keywords_M[5] = {"MATH_ERREXCEPT","MATH_ERRNO","MB_CUR_MAX","MB_LEN_MAX"}; + static char const *const keywords_N[4] = {"NAN","NDEBUG","NULL"}; + static char const *const keywords_O[2] = {"ONCE_FLAG_INIT"}; + static char const *const keywords_P[87] = {"PRIX16","PRIX32","PRIX64","PRIX8","PRIXFAST16","PRIXFAST32","PRIXFAST64","PRIXFAST8","PRIXLEAST16","PRIXLEAST32","PRIXLEAST64","PRIXLEAST8","PRIXMAX","PRIXPTR","PRId16","PRId32","PRId64","PRId8","PRIdFAST16","PRIdFAST32","PRIdFAST64","PRIdFAST8","PRIdLEAST16","PRIdLEAST32","PRIdLEAST64","PRIdLEAST8","PRIdMAX","PRIdPTR","PRIi16","PRIi32","PRIi64","PRIi8","PRIiFAST16","PRIiFAST32","PRIiFAST64","PRIiFAST8","PRIiLEAST16","PRIiLEAST32","PRIiLEAST64","PRIiLEAST8","PRIiMAX","PRIiPTR","PRIo16","PRIo32","PRIo64","PRIo8","PRIoFAST16","PRIoFAST32","PRIoFAST64","PRIoFAST8","PRIoLEAST16","PRIoLEAST32","PRIoLEAST64","PRIoLEAST8","PRIoMAX","PRIoPTR","PRIu16","PRIu32","PRIu64","PRIu8","PRIuFAST16","PRIuFAST32","PRIuFAST64","PRIuFAST8","PRIuLEAST16","PRIuLEAST32","PRIuLEAST64","PRIuLEAST8","PRIuMAX","PRIuPTR","PRIx16","PRIx32","PRIx64","PRIx8","PRIxFAST16","PRIxFAST32","PRIxFAST64","PRIxFAST8","PRIxLEAST16","PRIxLEAST32","PRIxLEAST64","PRIxLEAST8","PRIxMAX","PRIxPTR","PTRDIFF_MAX","PTRDIFF_MIN"}; + static char const *const keywords_R[2] = {"RSIZE_MAX"}; + static char const *const keywords_S[122] = {"SCHAR_MAX","SCHAR_MIN","SCNd16","SCNd32","SCNd64","SCNd8","SCNdFAST16","SCNdFAST32","SCNdFAST64","SCNdFAST8","SCNdLEAST16","SCNdLEAST32","SCNdLEAST64","SCNdLEAST8","SCNdMAX","SCNdPTR","SCNi16","SCNi32","SCNi64","SCNi8","SCNiFAST16","SCNiFAST32","SCNiFAST64","SCNiFAST8","SCNiLEAST16","SCNiLEAST32","SCNiLEAST64","SCNiLEAST8","SCNiMAX","SCNiPTR","SCNo16","SCNo32","SCNo64","SCNo8","SCNoFAST16","SCNoFAST32","SCNoFAST64","SCNoFAST8","SCNoLEAST16","SCNoLEAST32","SCNoLEAST64","SCNoLEAST8","SCNoMAX","SCNoPTR","SCNu16","SCNu32","SCNu64","SCNu8","SCNuFAST16","SCNuFAST32","SCNuFAST64","SCNuFAST8","SCNuLEAST16","SCNuLEAST32","SCNuLEAST64","SCNuLEAST8","SCNuMAX","SCNuPTR","SCNx16","SCNx32","SCNx64","SCNx8","SCNxFAST16","SCNxFAST32","SCNxFAST64","SCNxFAST8","SCNxLEAST16","SCNxLEAST32","SCNxLEAST64","SCNxLEAST8","SCNxMAX","SCNxPTR","SEEK_CUR","SEEK_END","SEEK_SET","SHRT_MAX","SHRT_MIN","SIGABRT","SIGALRM","SIGBUS","SIGCHLD","SIGCLD","SIGCONT","SIGEMT","SIGFPE","SIGHUP","SIGILL","SIGINFO","SIGINT","SIGIO","SIGIOT","SIGKILL","SIGLOST","SIGPIPE","SIGPOLL","SIGPROF","SIGPWR","SIGQUIT","SIGSEGV","SIGSTKFLT","SIGSTOP","SIGSYS","SIGTERM","SIGTRAP","SIGTSTP","SIGTTIN","SIGTTOU","SIGUNUSED","SIGURG","SIGUSR1","SIGUSR2","SIGVTALRM","SIGWINCH","SIGXCPU","SIGXFSZ","SIG_ATOMIC_MAX","SIG_ATOMIC_MIN","SIG_DFL","SIG_ERR","SIG_IGN","SIZE_MAX"}; + static char const *const keywords_T[5] = {"TIME_UTC","TMP_MAX","TMP_MAX_S","TSS_DTOR_ITERATIONS"}; + static char const *const keywords_U[20] = {"UCHAR_MAX","UINT16_MAX","UINT32_MAX","UINT64_MAX","UINT8_MAX","UINTMAX_MAX","UINTPTR_MAX","UINT_FAST16_MAX","UINT_FAST32_MAX","UINT_FAST64_MAX","UINT_FAST8_MAX","UINT_LEAST16_MAX","UINT_LEAST32_MAX","UINT_LEAST64_MAX","UINT_LEAST8_MAX","UINT_MAX","ULLONG_MAX","ULONG_MAX","USHRT_MAX"}; + static char const *const keywords_W[6] = {"WCHAR_MAX","WCHAR_MIN","WEOF","WINT_MAX","WINT_MIN"}; + static char const *const keywords__[14] = {"_Alignas","_Alignof","_Atomic","_Bool","_Complex","_Generic","_IOFBF","_IOLBF","_IONBF","_Imaginary","_Noreturn","_Static_assert","_Thread_local"}; + static char const *const keywords_a[43] = {"alignas","alignof","atomic_address","atomic_bool","atomic_char","atomic_char16_t","atomic_char32_t","atomic_flag","atomic_int","atomic_int_fast16_t","atomic_int_fast32_t","atomic_int_fast64_t","atomic_int_fast8_t","atomic_int_least16_t","atomic_int_least32_t","atomic_int_least64_t","atomic_int_least8_t","atomic_intmax_t","atomic_intptr_t","atomic_llong","atomic_long","atomic_ptrdiff_t","atomic_schar","atomic_short","atomic_size_t","atomic_uchar","atomic_uint","atomic_uint_fast16_t","atomic_uint_fast32_t","atomic_uint_fast64_t","atomic_uint_fast8_t","atomic_uint_least16_t","atomic_uint_least32_t","atomic_uint_least64_t","atomic_uint_least8_t","atomic_uintmax_t","atomic_uintptr_t","atomic_ullong","atomic_ulong","atomic_ushort","atomic_wchar_t","auto"}; + static char const *const keywords_b[3] = {"bool","break"}; + static char const *const keywords_c[12] = {"case","char","char16_t","char32_t","char8_t","clock_t","cnd_t","complex","const","constraint_handler_t","continue"}; + static char const *const keywords_d[6] = {"default","div_t","do","double","double_t"}; + static char const *const keywords_e[5] = {"else","enum","errno_t","extern"}; + static char const *const keywords_f[8] = {"false","fenv_t","fexcept_t","float","float_t","for","fpos_t"}; + static char const *const keywords_g[2] = {"goto"}; + static char const *const keywords_i[11] = {"if","imaxdiv_t","inline","int","int16_t","int32_t","int64_t","int8_t","intmax_t","intptr_t"}; + static char const *const keywords_j[2] = {"jmp_buf"}; + static char const *const keywords_l[4] = {"ldiv_t","lldiv_t","long"}; + static char const *const keywords_m[16] = {"math_errhandling","max_align_t","mbstate_t","memory_order","memory_order_acq_rel","memory_order_acquire","memory_order_consume","memory_order_relaxed","memory_order_release","memory_order_seq_cst","mtx_plain","mtx_recursive","mtx_t","mtx_timed","mtx_try"}; + static char const *const keywords_n[2] = {"noreturn"}; + static char const *const keywords_o[3] = {"offsetof","once_flag"}; + static char const *const keywords_p[2] = {"ptrdiff_t"}; + static char const *const keywords_r[5] = {"register","restrict","return","rsize_t"}; + static char const *const keywords_s[13] = {"short","sig_atomic_t","signed","size_t","sizeof","static","static_assert","stderr","stdin","stdout","struct","switch"}; + static char const *const keywords_t[13] = {"thrd_busy","thrd_error","thrd_nomem","thrd_start_t","thrd_success","thrd_t","thrd_timeout","time_t","true","tss_dtor_t","tss_t","typedef"}; + static char const *const keywords_u[9] = {"uint16_t","uint32_t","uint64_t","uint8_t","uintmax_t","uintptr_t","union","unsigned"}; + static char const *const keywords_v[4] = {"va_list","void","volatile"}; + static char const *const keywords_w[6] = {"wchar_t","wctrans_t","wctype_t","while","wint_t"}; + static char const *const keywords_x[2] = {"xtime"}; + static char const *const *const all_keywords[] = { + ['A'] = keywords_A, ['B'] = keywords_B, ['C'] = keywords_C, ['D'] = keywords_D, ['E'] = keywords_E, ['F'] = keywords_F, ['H'] = keywords_H, ['I'] = keywords_I, ['L'] = keywords_L, ['M'] = keywords_M, ['N'] = keywords_N, ['O'] = keywords_O, ['P'] = keywords_P, ['R'] = keywords_R, ['S'] = keywords_S, ['T'] = keywords_T, ['U'] = keywords_U, ['W'] = keywords_W, ['_'] = keywords__, ['a'] = keywords_a, ['b'] = keywords_b, ['c'] = keywords_c, ['d'] = keywords_d, ['e'] = keywords_e, ['f'] = keywords_f, ['g'] = keywords_g, ['i'] = keywords_i, ['j'] = keywords_j, ['l'] = keywords_l, ['m'] = keywords_m, ['n'] = keywords_n, ['o'] = keywords_o, ['p'] = keywords_p, ['r'] = keywords_r, ['s'] = keywords_s, ['t'] = keywords_t, ['u'] = keywords_u, ['v'] = keywords_v, ['w'] = keywords_w, ['x'] = keywords_x }; - // keywords don't matter for advancing the state - if (char_types && !in_single_line_comment && !in_multi_line_comment && !in_string && !in_preprocessor) { + if (char_types && !in_single_line_comment && !in_multi_line_comment && !in_string && !in_preprocessor && !in_char) { + u32 keyword_end; + // find where this keyword would end (if this is a keyword) + for (keyword_end = i; keyword_end < line_len && is32_ident(line[keyword_end]); ++keyword_end); + + u32 keyword_len = keyword_end - i; char const *const *keywords = line[i] < arr_count(all_keywords) ? all_keywords[line[i]] : NULL; if (keywords) { for (size_t k = 0; keywords[k]; ++k) { bool matches = true; char const *keyword = keywords[k]; - size_t keyword_len = strlen(keyword); - if (i + keyword_len <= line_len) { - // make sure we don't catch "print" as containing the keyword "int" - bool separated = (i == 0 || !is32_ident(line[i-1])) && (i + keyword_len == line_len || !is32_ident(line[i + keyword_len])); - if (separated) { - char32_t *p = &line[i]; - // check if `p` starts with `keyword` - for (char const *q = keyword; *q; ++p, ++q) { - if (*p != (char32_t)*q) { - matches = false; - break; - } + if (keyword_len == strlen(keyword)) { + char32_t *p = &line[i]; + // check if `p` starts with `keyword` + for (char const *q = keyword; *q; ++p, ++q) { + if (*p != (char32_t)*q) { + matches = false; + break; } - if (matches) { - // it's a keyword - // let's highlight all of it now - for (size_t c = 0; keyword[c]; ++c) { - char_types[i++] = SYNTAX_KEYWORD; + } + if (matches) { + // it's a keyword + // let's highlight all of it now + type = SYNTAX_KEYWORD; + if (isupper(keyword[0]) || + (keyword_len == 4 && streq(keyword, "true")) || + (keyword_len == 5 && streq(keyword, "false")) || + (keyword_len == 6 && (streq(keyword, "stderr") || streq(keyword, "stdout"))) + ) { + type = SYNTAX_CONSTANT; // these are constants, not keywords } - --i; // we'll increment i from the for loop - dealt_with = true; - break; + for (size_t c = 0; keyword[c]; ++c) { + char_types[i++] = type; } + --i; // we'll increment i from the for loop + dealt_with = true; + break; } } } @@ -169,7 +209,8 @@ static void syntax_highlight_c(SyntaxStateC *state, char32_t *line, u32 line_len } break; } if (line[i] != '\\') backslashes = 0; - if (in_number && !(is32_digit(line[i]) || line[i] == '.' || line[i] == 'e' || (i && line[i-1] == 'e' && (line[i] == '+' || line[i] == '-')))) { + if (in_number && !(is32_digit(line[i]) || line[i] == '.' || line[i] == 'e' || + (i && line[i-1] == 'e' && (line[i] == '+' || line[i] == '-')))) { in_number = false; } @@ -180,10 +221,10 @@ static void syntax_highlight_c(SyntaxStateC *state, char32_t *line, u32 line_len type = SYNTAX_STRING; else if (in_char_now) type = SYNTAX_CHARACTER; + else if (in_number) + type = SYNTAX_CONSTANT; else if (in_preprocessor) type = SYNTAX_PREPROCESSOR; - else if (in_number) - type = SYNTAX_NUMBER; char_types[i] = type; } |