diff options
author | Leo Tenenbaum <pommicket@gmail.com> | 2021-02-02 10:00:29 -0500 |
---|---|---|
committer | Leo Tenenbaum <pommicket@gmail.com> | 2021-02-02 10:00:29 -0500 |
commit | e97adda9758dab0e12e7e54608dbf9557f0928c7 (patch) | |
tree | 79498ed20025f96a7a3e0ee0880361bf37041ea8 /syntax.c | |
parent | 5882fac07ab1215ec4bbc859c693b28b06e6b469 (diff) |
C++ highlighting, except for raw strings
Diffstat (limited to 'syntax.c')
-rw-r--r-- | syntax.c | 140 |
1 files changed, 59 insertions, 81 deletions
@@ -1,3 +1,5 @@ +#include "keywords.h" + // returns the language this string is referring to, or LANG_NONE if it's invalid. Language language_from_str(char const *str) { for (int i = 0; i < LANG_COUNT; ++i) { @@ -21,7 +23,24 @@ ColorSetting syntax_char_type_to_color(SyntaxCharType t) { return COLOR_TEXT; } -static void syntax_highlight_c(SyntaxState *state_ptr, char32_t *line, u32 line_len, SyntaxCharType *char_types) { +static inline bool keyword_matches(char32_t *text, size_t len, char const *keyword) { + if (len == strlen(keyword)) { + bool matches = true; + char32_t *p = text; + // check if `p` starts with `keyword` + for (char const *q = keyword; *q; ++p, ++q) { + if (*p != (char32_t)*q) { + matches = false; + break; + } + } + return matches; + } else { + return false; + } +} + +static void syntax_highlight_c_cpp(SyntaxState *state_ptr, bool cpp, char32_t *line, u32 line_len, SyntaxCharType *char_types) { SyntaxState state = *state_ptr; bool in_preprocessor = (state & SYNTAX_STATE_PREPROCESSOR) != 0; bool in_string = (state & SYNTAX_STATE_STRING) != 0; @@ -42,8 +61,9 @@ static void syntax_highlight_c(SyntaxState *state_ptr, char32_t *line, u32 line_ bool has_1_char = i + 1 < line_len; bool dealt_with = false; - - switch (line[i]) { + + char32_t c = line[i]; + switch (c) { case '#': if (!in_single_line_comment && !in_multi_line_comment && !in_char && !in_string) in_preprocessor = true; @@ -92,53 +112,10 @@ static void syntax_highlight_c(SyntaxState *state_ptr, char32_t *line, u32 line_ } break; default: { - if ((i && is32_ident(line[i - 1])) || !is32_ident(line[i])) + if ((i && is32_ident(line[i - 1])) || !is32_ident(c)) break; // can't be a keyword on its own. - // here are all the keywords! - static char const *const keywords_A[11] = {"ATOMIC_ADDRESS_LOCK_FREE","ATOMIC_CHAR16_T_LOCK_FREE","ATOMIC_CHAR32_T_LOCK_FREE","ATOMIC_CHAR_LOCK_FREE","ATOMIC_FLAG_LOCK_FREE","ATOMIC_INT_LOCK_FREE","ATOMIC_LLONG_LOCK_FREE","ATOMIC_LONG_LOCK_FREE","ATOMIC_SHORT_LOCK_FREE","ATOMIC_WCHAR_T_LOCK_FREE"}; - static char const *const keywords_B[2] = {"BUFSIZ"}; - static char const *const keywords_C[5] = {"CHAR_BIT","CHAR_MAX","CHAR_MIN","CLOCKS_PER_SEC"}; - static char const *const keywords_D[12] = {"DBL_DIG","DBL_EPSILON","DBL_HAS_SUBNORM","DBL_MANT_DIG","DBL_MAX","DBL_MAX_10_EXP","DBL_MAX_EXP","DBL_MIN","DBL_MIN_EXP","DBL_TRUE_MIN","DECIMAL_DIG"}; - static char const *const keywords_E[128] = {"E2BIG","EACCES","EADDRINUSE","EADDRNOTAVAIL","EADV","EAFNOSUPPORT","EAGAIN","EALREADY","EBADE","EBADF","EBADFD","EBADMSG","EBADR","EBADRQC","EBADSLT","EBFONT","EBUSY","ECHILD","ECHRNG","ECOMM","ECONNABORTED","ECONNREFUSED","ECONNRESET","EDEADLK","EDEADLOCK","EDESTADDRREQ","EDOM","EDOTDOT","EDQUOT","EEXIST","EFAULT","EFBIG","EHOSTDOWN","EHOSTUNREACH","EIDRM","EILSEQ","EINPROGRESS","EINTR","EINVAL","EIO","EISCONN","EISDIR","EISNAM","EL2HLT","EL2NSYNC","EL3HLT","EL3RST","ELIBACC","ELIBBAD","ELIBEXEC","ELIBMAX","ELIBSCN","ELNRNG","ELOOP","EMEDIUMTYPE","EMFILE","EMLINK","EMSGSIZE","EMULTIHOP","ENAMETOOLONG","ENAVAIL","ENETDOWN","ENETRESET","ENETUNREACH","ENFILE","ENOANO","ENOBUFS","ENOCSI","ENODATA","ENODEV","ENOENT","ENOEXEC","ENOLCK","ENOLINK","ENOMEDIUM","ENOMEM","ENOMSG","ENONET","ENOPKG","ENOPROTOOPT","ENOSPC","ENOSR","ENOSTR","ENOSYS","ENOTBLK","ENOTCONN","ENOTDIR","ENOTEMPTY","ENOTNAM","ENOTSOCK","ENOTTY","ENOTUNIQ","ENXIO","EOF","EOPNOTSUPP","EOVERFLOW","EPERM","EPFNOSUPPORT","EPIPE","EPROTO","EPROTONOSUPPORT","EPROTOTYPE","ERANGE","EREMCHG","EREMOTE","EREMOTEIO","ERESTART","EROFS","ESHUTDOWN","ESOCKTNOSUPPORT","ESPIPE","ESRCH","ESRMNT","ESTALE","ESTRPIPE","ETIME","ETIMEDOUT","ETOOMANYREFS","ETXTBSY","EUCLEAN","EUNATCH","EUSERS","EWOULDBLOCK","EXDEV","EXFULL","EXIT_FAILURE","EXIT_SUCCESS"}; - static char const *const keywords_F[39] = {"FE_ALL_EXCEPT","FE_DFL_ENV","FE_DIVBYZERO","FE_DOWNWARD","FE_INEXACT","FE_INVALID","FE_OVERFLOW","FE_TONEAREST","FE_TOWARDZERO","FE_UNDERFLOW","FE_UPWARD","FILE","FILENAME_MAX","FLT_DECIMAL_DIG","FLT_DIG","FLT_EVAL_METHOD","FLT_HAS_SUBNORM","FLT_MANT_DIG","FLT_MAX","FLT_MAX_10_EXP","FLT_MAX_EXP","FLT_MIN","FLT_MIN_10_EXP","FLT_MIN_EXP","FLT_RADIX","FLT_ROUNDS","FLT_TRUE_MIN","FOPEN_MAX","FP_FAST_FMA","FP_FAST_FMAF","FP_FAST_FMAL","FP_ILOGB0","FP_ILOGBNAN","FP_INFINITE","FP_NAN","FP_NORMAL","FP_SUBNORMAL","FP_ZERO"}; - static char const *const keywords_H[4] = {"HUGE_VAL","HUGE_VALF","HUGE_VALL"}; - static char const *const keywords_I[33] = {"I","INFINITY","INT16_MAX","INT16_MIN","INT32_MAX","INT32_MIN","INT64_MAX","INT64_MIN","INT8_MAX","INT8_MIN","INTMAX_MAX","INTMAX_MIN","INTPTR_MAX","INTPTR_MIN","INT_FAST16_MAX","INT_FAST16_MIN","INT_FAST32_MAX","INT_FAST32_MIN","INT_FAST64_MAX","INT_FAST64_MIN","INT_FAST8_MAX","INT_FAST8_MIN","INT_LEAST16_MAX","INT_LEAST16_MIN","INT_LEAST32_MAX","INT_LEAST32_MIN","INT_LEAST64_MAX","INT_LEAST64_MIN","INT_LEAST8_MAX","INT_LEAST8_MIN","INT_MAX","INT_MIN"}; - static char const *const keywords_L[23] = {"LC_ALL","LC_COLLATE","LC_CTYPE","LC_MONETARY","LC_NUMERIC","LC_TIME","LDBL_DECIMAL_DIG","LDBL_DIG","LDBL_EPSILON","LDBL_MANT_DIG","LDBL_MAX","LDBL_MAX_10_EXP","LDBL_MAX_EXP","LDBL_MIN","LDBL_MIN_10_EXP","LDBL_MIN_EXP","LDBL_TRUE_MIN","LLONG_MAX","LLONG_MIN","LONG_MAX","LONG_MIN","L_tmpnam"}; - static char const *const keywords_M[5] = {"MATH_ERREXCEPT","MATH_ERRNO","MB_CUR_MAX","MB_LEN_MAX"}; - static char const *const keywords_N[4] = {"NAN","NDEBUG","NULL"}; - static char const *const keywords_O[2] = {"ONCE_FLAG_INIT"}; - static char const *const keywords_P[87] = {"PRIX16","PRIX32","PRIX64","PRIX8","PRIXFAST16","PRIXFAST32","PRIXFAST64","PRIXFAST8","PRIXLEAST16","PRIXLEAST32","PRIXLEAST64","PRIXLEAST8","PRIXMAX","PRIXPTR","PRId16","PRId32","PRId64","PRId8","PRIdFAST16","PRIdFAST32","PRIdFAST64","PRIdFAST8","PRIdLEAST16","PRIdLEAST32","PRIdLEAST64","PRIdLEAST8","PRIdMAX","PRIdPTR","PRIi16","PRIi32","PRIi64","PRIi8","PRIiFAST16","PRIiFAST32","PRIiFAST64","PRIiFAST8","PRIiLEAST16","PRIiLEAST32","PRIiLEAST64","PRIiLEAST8","PRIiMAX","PRIiPTR","PRIo16","PRIo32","PRIo64","PRIo8","PRIoFAST16","PRIoFAST32","PRIoFAST64","PRIoFAST8","PRIoLEAST16","PRIoLEAST32","PRIoLEAST64","PRIoLEAST8","PRIoMAX","PRIoPTR","PRIu16","PRIu32","PRIu64","PRIu8","PRIuFAST16","PRIuFAST32","PRIuFAST64","PRIuFAST8","PRIuLEAST16","PRIuLEAST32","PRIuLEAST64","PRIuLEAST8","PRIuMAX","PRIuPTR","PRIx16","PRIx32","PRIx64","PRIx8","PRIxFAST16","PRIxFAST32","PRIxFAST64","PRIxFAST8","PRIxLEAST16","PRIxLEAST32","PRIxLEAST64","PRIxLEAST8","PRIxMAX","PRIxPTR","PTRDIFF_MAX","PTRDIFF_MIN"}; - static char const *const keywords_R[2] = {"RSIZE_MAX"}; - static char const *const keywords_S[122] = {"SCHAR_MAX","SCHAR_MIN","SCNd16","SCNd32","SCNd64","SCNd8","SCNdFAST16","SCNdFAST32","SCNdFAST64","SCNdFAST8","SCNdLEAST16","SCNdLEAST32","SCNdLEAST64","SCNdLEAST8","SCNdMAX","SCNdPTR","SCNi16","SCNi32","SCNi64","SCNi8","SCNiFAST16","SCNiFAST32","SCNiFAST64","SCNiFAST8","SCNiLEAST16","SCNiLEAST32","SCNiLEAST64","SCNiLEAST8","SCNiMAX","SCNiPTR","SCNo16","SCNo32","SCNo64","SCNo8","SCNoFAST16","SCNoFAST32","SCNoFAST64","SCNoFAST8","SCNoLEAST16","SCNoLEAST32","SCNoLEAST64","SCNoLEAST8","SCNoMAX","SCNoPTR","SCNu16","SCNu32","SCNu64","SCNu8","SCNuFAST16","SCNuFAST32","SCNuFAST64","SCNuFAST8","SCNuLEAST16","SCNuLEAST32","SCNuLEAST64","SCNuLEAST8","SCNuMAX","SCNuPTR","SCNx16","SCNx32","SCNx64","SCNx8","SCNxFAST16","SCNxFAST32","SCNxFAST64","SCNxFAST8","SCNxLEAST16","SCNxLEAST32","SCNxLEAST64","SCNxLEAST8","SCNxMAX","SCNxPTR","SEEK_CUR","SEEK_END","SEEK_SET","SHRT_MAX","SHRT_MIN","SIGABRT","SIGALRM","SIGBUS","SIGCHLD","SIGCLD","SIGCONT","SIGEMT","SIGFPE","SIGHUP","SIGILL","SIGINFO","SIGINT","SIGIO","SIGIOT","SIGKILL","SIGLOST","SIGPIPE","SIGPOLL","SIGPROF","SIGPWR","SIGQUIT","SIGSEGV","SIGSTKFLT","SIGSTOP","SIGSYS","SIGTERM","SIGTRAP","SIGTSTP","SIGTTIN","SIGTTOU","SIGUNUSED","SIGURG","SIGUSR1","SIGUSR2","SIGVTALRM","SIGWINCH","SIGXCPU","SIGXFSZ","SIG_ATOMIC_MAX","SIG_ATOMIC_MIN","SIG_DFL","SIG_ERR","SIG_IGN","SIZE_MAX"}; - static char const *const keywords_T[5] = {"TIME_UTC","TMP_MAX","TMP_MAX_S","TSS_DTOR_ITERATIONS"}; - static char const *const keywords_U[20] = {"UCHAR_MAX","UINT16_MAX","UINT32_MAX","UINT64_MAX","UINT8_MAX","UINTMAX_MAX","UINTPTR_MAX","UINT_FAST16_MAX","UINT_FAST32_MAX","UINT_FAST64_MAX","UINT_FAST8_MAX","UINT_LEAST16_MAX","UINT_LEAST32_MAX","UINT_LEAST64_MAX","UINT_LEAST8_MAX","UINT_MAX","ULLONG_MAX","ULONG_MAX","USHRT_MAX"}; - static char const *const keywords_W[6] = {"WCHAR_MAX","WCHAR_MIN","WEOF","WINT_MAX","WINT_MIN"}; - static char const *const keywords__[14] = {"_Alignas","_Alignof","_Atomic","_Bool","_Complex","_Generic","_IOFBF","_IOLBF","_IONBF","_Imaginary","_Noreturn","_Static_assert","_Thread_local"}; - static char const *const keywords_a[43] = {"alignas","alignof","atomic_address","atomic_bool","atomic_char","atomic_char16_t","atomic_char32_t","atomic_flag","atomic_int","atomic_int_fast16_t","atomic_int_fast32_t","atomic_int_fast64_t","atomic_int_fast8_t","atomic_int_least16_t","atomic_int_least32_t","atomic_int_least64_t","atomic_int_least8_t","atomic_intmax_t","atomic_intptr_t","atomic_llong","atomic_long","atomic_ptrdiff_t","atomic_schar","atomic_short","atomic_size_t","atomic_uchar","atomic_uint","atomic_uint_fast16_t","atomic_uint_fast32_t","atomic_uint_fast64_t","atomic_uint_fast8_t","atomic_uint_least16_t","atomic_uint_least32_t","atomic_uint_least64_t","atomic_uint_least8_t","atomic_uintmax_t","atomic_uintptr_t","atomic_ullong","atomic_ulong","atomic_ushort","atomic_wchar_t","auto"}; - static char const *const keywords_b[3] = {"bool","break"}; - static char const *const keywords_c[12] = {"case","char","char16_t","char32_t","char8_t","clock_t","cnd_t","complex","const","constraint_handler_t","continue"}; - static char const *const keywords_d[6] = {"default","div_t","do","double","double_t"}; - static char const *const keywords_e[5] = {"else","enum","errno_t","extern"}; - static char const *const keywords_f[8] = {"false","fenv_t","fexcept_t","float","float_t","for","fpos_t"}; - static char const *const keywords_g[2] = {"goto"}; - static char const *const keywords_i[11] = {"if","imaxdiv_t","inline","int","int16_t","int32_t","int64_t","int8_t","intmax_t","intptr_t"}; - static char const *const keywords_j[2] = {"jmp_buf"}; - static char const *const keywords_l[4] = {"ldiv_t","lldiv_t","long"}; - static char const *const keywords_m[16] = {"math_errhandling","max_align_t","mbstate_t","memory_order","memory_order_acq_rel","memory_order_acquire","memory_order_consume","memory_order_relaxed","memory_order_release","memory_order_seq_cst","mtx_plain","mtx_recursive","mtx_t","mtx_timed","mtx_try"}; - static char const *const keywords_n[2] = {"noreturn"}; - static char const *const keywords_o[3] = {"offsetof","once_flag"}; - static char const *const keywords_p[2] = {"ptrdiff_t"}; - static char const *const keywords_r[5] = {"register","restrict","return","rsize_t"}; - static char const *const keywords_s[13] = {"short","sig_atomic_t","signed","size_t","sizeof","static","static_assert","stderr","stdin","stdout","struct","switch"}; - static char const *const keywords_t[13] = {"thrd_busy","thrd_error","thrd_nomem","thrd_start_t","thrd_success","thrd_t","thrd_timeout","time_t","true","tss_dtor_t","tss_t","typedef"}; - static char const *const keywords_u[9] = {"uint16_t","uint32_t","uint64_t","uint8_t","uintmax_t","uintptr_t","union","unsigned"}; - static char const *const keywords_v[4] = {"va_list","void","volatile"}; - static char const *const keywords_w[6] = {"wchar_t","wctrans_t","wctype_t","while","wint_t"}; - static char const *const keywords_x[2] = {"xtime"}; - static char const *const *const all_keywords[] = { - ['A'] = keywords_A, ['B'] = keywords_B, ['C'] = keywords_C, ['D'] = keywords_D, ['E'] = keywords_E, ['F'] = keywords_F, ['H'] = keywords_H, ['I'] = keywords_I, ['L'] = keywords_L, ['M'] = keywords_M, ['N'] = keywords_N, ['O'] = keywords_O, ['P'] = keywords_P, ['R'] = keywords_R, ['S'] = keywords_S, ['T'] = keywords_T, ['U'] = keywords_U, ['W'] = keywords_W, ['_'] = keywords__, ['a'] = keywords_a, ['b'] = keywords_b, ['c'] = keywords_c, ['d'] = keywords_d, ['e'] = keywords_e, ['f'] = keywords_f, ['g'] = keywords_g, ['i'] = keywords_i, ['j'] = keywords_j, ['l'] = keywords_l, ['m'] = keywords_m, ['n'] = keywords_n, ['o'] = keywords_o, ['p'] = keywords_p, ['r'] = keywords_r, ['s'] = keywords_s, ['t'] = keywords_t, ['u'] = keywords_u, ['v'] = keywords_v, ['w'] = keywords_w, ['x'] = keywords_x - }; + // keywords don't matter for advancing the state if (char_types && !in_single_line_comment && !in_multi_line_comment && !in_string && !in_preprocessor && !in_char) { u32 keyword_end; @@ -146,40 +123,38 @@ static void syntax_highlight_c(SyntaxState *state_ptr, char32_t *line, u32 line_ for (keyword_end = i; keyword_end < line_len && is32_ident(line[keyword_end]); ++keyword_end); u32 keyword_len = keyword_end - i; - char const *const *keywords = line[i] < arr_count(all_keywords) ? all_keywords[line[i]] : NULL; - if (keywords) { - for (size_t k = 0; keywords[k]; ++k) { - bool matches = true; - char const *keyword = keywords[k]; - if (keyword_len == strlen(keyword)) { - char32_t *p = &line[i]; - // check if `p` starts with `keyword` - for (char const *q = keyword; *q; ++p, ++q) { - if (*p != (char32_t)*q) { - matches = false; - break; - } - } - if (matches) { - // it's a keyword - // let's highlight all of it now - type = SYNTAX_KEYWORD; - if (isupper(keyword[0]) || - (keyword_len == 4 && streq(keyword, "true")) || - (keyword_len == 5 && streq(keyword, "false")) || - (keyword_len == 6 && (streq(keyword, "stderr") || streq(keyword, "stdout"))) - ) { - type = SYNTAX_CONSTANT; // these are constants, not keywords - } - for (size_t c = 0; keyword[c]; ++c) { - char_types[i++] = type; - } - --i; // we'll increment i from the for loop - dealt_with = true; - break; - } + char const *const *keywords = c < arr_count(syntax_all_keywords_c) ? syntax_all_keywords_c[c] : NULL; + char const *keyword = NULL; + if (keywords) + for (size_t k = 0; keywords[k]; ++k) + if (keyword_matches(&line[i], keyword_len, keywords[k])) + keyword = keywords[k]; + if (cpp && !keyword) { + // check C++'s keywords too! + keywords = c < arr_count(syntax_all_keywords_cpp) ? syntax_all_keywords_cpp[c] : NULL; + if (keywords) + for (size_t k = 0; keywords[k]; ++k) + if (keyword_matches(&line[i], keyword_len, keywords[k])) + keyword = keywords[k]; + } + + if (keyword) { + // it's a keyword + // let's highlight all of it now + type = SYNTAX_KEYWORD; + if (isupper(keyword[0]) || + (keyword_len == 4 && streq(keyword, "true")) || + (keyword_len == 5 && streq(keyword, "false")) || + (keyword_len == 6 && (streq(keyword, "stderr") || streq(keyword, "stdout"))) + ) { + type = SYNTAX_CONSTANT; // these are constants, not keywords } + for (size_t j = 0; keyword[j]; ++j) { + char_types[i++] = type; } + --i; // we'll increment i from the for loop + dealt_with = true; + break; } } } break; @@ -224,7 +199,10 @@ void syntax_highlight(SyntaxState *state, Language lang, char32_t *line, u32 lin memset(char_types, 0, line_len * sizeof *char_types); break; case LANG_C: - syntax_highlight_c(state, line, line_len, char_types); + syntax_highlight_c_cpp(state, false, line, line_len, char_types); + break; + case LANG_CPP: + syntax_highlight_c_cpp(state, true, line, line_len, char_types); break; case LANG_COUNT: assert(0); break; } |