summaryrefslogtreecommitdiff
path: root/syntax.c
diff options
context:
space:
mode:
authorLeo Tenenbaum <pommicket@gmail.com>2021-02-02 10:00:29 -0500
committerLeo Tenenbaum <pommicket@gmail.com>2021-02-02 10:00:29 -0500
commite97adda9758dab0e12e7e54608dbf9557f0928c7 (patch)
tree79498ed20025f96a7a3e0ee0880361bf37041ea8 /syntax.c
parent5882fac07ab1215ec4bbc859c693b28b06e6b469 (diff)
C++ highlighting, except for raw strings
Diffstat (limited to 'syntax.c')
-rw-r--r--syntax.c140
1 files changed, 59 insertions, 81 deletions
diff --git a/syntax.c b/syntax.c
index 6ccfc84..2ab12ba 100644
--- a/syntax.c
+++ b/syntax.c
@@ -1,3 +1,5 @@
+#include "keywords.h"
+
// returns the language this string is referring to, or LANG_NONE if it's invalid.
Language language_from_str(char const *str) {
for (int i = 0; i < LANG_COUNT; ++i) {
@@ -21,7 +23,24 @@ ColorSetting syntax_char_type_to_color(SyntaxCharType t) {
return COLOR_TEXT;
}
-static void syntax_highlight_c(SyntaxState *state_ptr, char32_t *line, u32 line_len, SyntaxCharType *char_types) {
+static inline bool keyword_matches(char32_t *text, size_t len, char const *keyword) {
+ if (len == strlen(keyword)) {
+ bool matches = true;
+ char32_t *p = text;
+ // check if `p` starts with `keyword`
+ for (char const *q = keyword; *q; ++p, ++q) {
+ if (*p != (char32_t)*q) {
+ matches = false;
+ break;
+ }
+ }
+ return matches;
+ } else {
+ return false;
+ }
+}
+
+static void syntax_highlight_c_cpp(SyntaxState *state_ptr, bool cpp, char32_t *line, u32 line_len, SyntaxCharType *char_types) {
SyntaxState state = *state_ptr;
bool in_preprocessor = (state & SYNTAX_STATE_PREPROCESSOR) != 0;
bool in_string = (state & SYNTAX_STATE_STRING) != 0;
@@ -42,8 +61,9 @@ static void syntax_highlight_c(SyntaxState *state_ptr, char32_t *line, u32 line_
bool has_1_char = i + 1 < line_len;
bool dealt_with = false;
-
- switch (line[i]) {
+
+ char32_t c = line[i];
+ switch (c) {
case '#':
if (!in_single_line_comment && !in_multi_line_comment && !in_char && !in_string)
in_preprocessor = true;
@@ -92,53 +112,10 @@ static void syntax_highlight_c(SyntaxState *state_ptr, char32_t *line, u32 line_
}
break;
default: {
- if ((i && is32_ident(line[i - 1])) || !is32_ident(line[i]))
+ if ((i && is32_ident(line[i - 1])) || !is32_ident(c))
break; // can't be a keyword on its own.
- // here are all the keywords!
- static char const *const keywords_A[11] = {"ATOMIC_ADDRESS_LOCK_FREE","ATOMIC_CHAR16_T_LOCK_FREE","ATOMIC_CHAR32_T_LOCK_FREE","ATOMIC_CHAR_LOCK_FREE","ATOMIC_FLAG_LOCK_FREE","ATOMIC_INT_LOCK_FREE","ATOMIC_LLONG_LOCK_FREE","ATOMIC_LONG_LOCK_FREE","ATOMIC_SHORT_LOCK_FREE","ATOMIC_WCHAR_T_LOCK_FREE"};
- static char const *const keywords_B[2] = {"BUFSIZ"};
- static char const *const keywords_C[5] = {"CHAR_BIT","CHAR_MAX","CHAR_MIN","CLOCKS_PER_SEC"};
- static char const *const keywords_D[12] = {"DBL_DIG","DBL_EPSILON","DBL_HAS_SUBNORM","DBL_MANT_DIG","DBL_MAX","DBL_MAX_10_EXP","DBL_MAX_EXP","DBL_MIN","DBL_MIN_EXP","DBL_TRUE_MIN","DECIMAL_DIG"};
- static char const *const keywords_E[128] = {"E2BIG","EACCES","EADDRINUSE","EADDRNOTAVAIL","EADV","EAFNOSUPPORT","EAGAIN","EALREADY","EBADE","EBADF","EBADFD","EBADMSG","EBADR","EBADRQC","EBADSLT","EBFONT","EBUSY","ECHILD","ECHRNG","ECOMM","ECONNABORTED","ECONNREFUSED","ECONNRESET","EDEADLK","EDEADLOCK","EDESTADDRREQ","EDOM","EDOTDOT","EDQUOT","EEXIST","EFAULT","EFBIG","EHOSTDOWN","EHOSTUNREACH","EIDRM","EILSEQ","EINPROGRESS","EINTR","EINVAL","EIO","EISCONN","EISDIR","EISNAM","EL2HLT","EL2NSYNC","EL3HLT","EL3RST","ELIBACC","ELIBBAD","ELIBEXEC","ELIBMAX","ELIBSCN","ELNRNG","ELOOP","EMEDIUMTYPE","EMFILE","EMLINK","EMSGSIZE","EMULTIHOP","ENAMETOOLONG","ENAVAIL","ENETDOWN","ENETRESET","ENETUNREACH","ENFILE","ENOANO","ENOBUFS","ENOCSI","ENODATA","ENODEV","ENOENT","ENOEXEC","ENOLCK","ENOLINK","ENOMEDIUM","ENOMEM","ENOMSG","ENONET","ENOPKG","ENOPROTOOPT","ENOSPC","ENOSR","ENOSTR","ENOSYS","ENOTBLK","ENOTCONN","ENOTDIR","ENOTEMPTY","ENOTNAM","ENOTSOCK","ENOTTY","ENOTUNIQ","ENXIO","EOF","EOPNOTSUPP","EOVERFLOW","EPERM","EPFNOSUPPORT","EPIPE","EPROTO","EPROTONOSUPPORT","EPROTOTYPE","ERANGE","EREMCHG","EREMOTE","EREMOTEIO","ERESTART","EROFS","ESHUTDOWN","ESOCKTNOSUPPORT","ESPIPE","ESRCH","ESRMNT","ESTALE","ESTRPIPE","ETIME","ETIMEDOUT","ETOOMANYREFS","ETXTBSY","EUCLEAN","EUNATCH","EUSERS","EWOULDBLOCK","EXDEV","EXFULL","EXIT_FAILURE","EXIT_SUCCESS"};
- static char const *const keywords_F[39] = {"FE_ALL_EXCEPT","FE_DFL_ENV","FE_DIVBYZERO","FE_DOWNWARD","FE_INEXACT","FE_INVALID","FE_OVERFLOW","FE_TONEAREST","FE_TOWARDZERO","FE_UNDERFLOW","FE_UPWARD","FILE","FILENAME_MAX","FLT_DECIMAL_DIG","FLT_DIG","FLT_EVAL_METHOD","FLT_HAS_SUBNORM","FLT_MANT_DIG","FLT_MAX","FLT_MAX_10_EXP","FLT_MAX_EXP","FLT_MIN","FLT_MIN_10_EXP","FLT_MIN_EXP","FLT_RADIX","FLT_ROUNDS","FLT_TRUE_MIN","FOPEN_MAX","FP_FAST_FMA","FP_FAST_FMAF","FP_FAST_FMAL","FP_ILOGB0","FP_ILOGBNAN","FP_INFINITE","FP_NAN","FP_NORMAL","FP_SUBNORMAL","FP_ZERO"};
- static char const *const keywords_H[4] = {"HUGE_VAL","HUGE_VALF","HUGE_VALL"};
- static char const *const keywords_I[33] = {"I","INFINITY","INT16_MAX","INT16_MIN","INT32_MAX","INT32_MIN","INT64_MAX","INT64_MIN","INT8_MAX","INT8_MIN","INTMAX_MAX","INTMAX_MIN","INTPTR_MAX","INTPTR_MIN","INT_FAST16_MAX","INT_FAST16_MIN","INT_FAST32_MAX","INT_FAST32_MIN","INT_FAST64_MAX","INT_FAST64_MIN","INT_FAST8_MAX","INT_FAST8_MIN","INT_LEAST16_MAX","INT_LEAST16_MIN","INT_LEAST32_MAX","INT_LEAST32_MIN","INT_LEAST64_MAX","INT_LEAST64_MIN","INT_LEAST8_MAX","INT_LEAST8_MIN","INT_MAX","INT_MIN"};
- static char const *const keywords_L[23] = {"LC_ALL","LC_COLLATE","LC_CTYPE","LC_MONETARY","LC_NUMERIC","LC_TIME","LDBL_DECIMAL_DIG","LDBL_DIG","LDBL_EPSILON","LDBL_MANT_DIG","LDBL_MAX","LDBL_MAX_10_EXP","LDBL_MAX_EXP","LDBL_MIN","LDBL_MIN_10_EXP","LDBL_MIN_EXP","LDBL_TRUE_MIN","LLONG_MAX","LLONG_MIN","LONG_MAX","LONG_MIN","L_tmpnam"};
- static char const *const keywords_M[5] = {"MATH_ERREXCEPT","MATH_ERRNO","MB_CUR_MAX","MB_LEN_MAX"};
- static char const *const keywords_N[4] = {"NAN","NDEBUG","NULL"};
- static char const *const keywords_O[2] = {"ONCE_FLAG_INIT"};
- static char const *const keywords_P[87] = {"PRIX16","PRIX32","PRIX64","PRIX8","PRIXFAST16","PRIXFAST32","PRIXFAST64","PRIXFAST8","PRIXLEAST16","PRIXLEAST32","PRIXLEAST64","PRIXLEAST8","PRIXMAX","PRIXPTR","PRId16","PRId32","PRId64","PRId8","PRIdFAST16","PRIdFAST32","PRIdFAST64","PRIdFAST8","PRIdLEAST16","PRIdLEAST32","PRIdLEAST64","PRIdLEAST8","PRIdMAX","PRIdPTR","PRIi16","PRIi32","PRIi64","PRIi8","PRIiFAST16","PRIiFAST32","PRIiFAST64","PRIiFAST8","PRIiLEAST16","PRIiLEAST32","PRIiLEAST64","PRIiLEAST8","PRIiMAX","PRIiPTR","PRIo16","PRIo32","PRIo64","PRIo8","PRIoFAST16","PRIoFAST32","PRIoFAST64","PRIoFAST8","PRIoLEAST16","PRIoLEAST32","PRIoLEAST64","PRIoLEAST8","PRIoMAX","PRIoPTR","PRIu16","PRIu32","PRIu64","PRIu8","PRIuFAST16","PRIuFAST32","PRIuFAST64","PRIuFAST8","PRIuLEAST16","PRIuLEAST32","PRIuLEAST64","PRIuLEAST8","PRIuMAX","PRIuPTR","PRIx16","PRIx32","PRIx64","PRIx8","PRIxFAST16","PRIxFAST32","PRIxFAST64","PRIxFAST8","PRIxLEAST16","PRIxLEAST32","PRIxLEAST64","PRIxLEAST8","PRIxMAX","PRIxPTR","PTRDIFF_MAX","PTRDIFF_MIN"};
- static char const *const keywords_R[2] = {"RSIZE_MAX"};
- static char const *const keywords_S[122] = {"SCHAR_MAX","SCHAR_MIN","SCNd16","SCNd32","SCNd64","SCNd8","SCNdFAST16","SCNdFAST32","SCNdFAST64","SCNdFAST8","SCNdLEAST16","SCNdLEAST32","SCNdLEAST64","SCNdLEAST8","SCNdMAX","SCNdPTR","SCNi16","SCNi32","SCNi64","SCNi8","SCNiFAST16","SCNiFAST32","SCNiFAST64","SCNiFAST8","SCNiLEAST16","SCNiLEAST32","SCNiLEAST64","SCNiLEAST8","SCNiMAX","SCNiPTR","SCNo16","SCNo32","SCNo64","SCNo8","SCNoFAST16","SCNoFAST32","SCNoFAST64","SCNoFAST8","SCNoLEAST16","SCNoLEAST32","SCNoLEAST64","SCNoLEAST8","SCNoMAX","SCNoPTR","SCNu16","SCNu32","SCNu64","SCNu8","SCNuFAST16","SCNuFAST32","SCNuFAST64","SCNuFAST8","SCNuLEAST16","SCNuLEAST32","SCNuLEAST64","SCNuLEAST8","SCNuMAX","SCNuPTR","SCNx16","SCNx32","SCNx64","SCNx8","SCNxFAST16","SCNxFAST32","SCNxFAST64","SCNxFAST8","SCNxLEAST16","SCNxLEAST32","SCNxLEAST64","SCNxLEAST8","SCNxMAX","SCNxPTR","SEEK_CUR","SEEK_END","SEEK_SET","SHRT_MAX","SHRT_MIN","SIGABRT","SIGALRM","SIGBUS","SIGCHLD","SIGCLD","SIGCONT","SIGEMT","SIGFPE","SIGHUP","SIGILL","SIGINFO","SIGINT","SIGIO","SIGIOT","SIGKILL","SIGLOST","SIGPIPE","SIGPOLL","SIGPROF","SIGPWR","SIGQUIT","SIGSEGV","SIGSTKFLT","SIGSTOP","SIGSYS","SIGTERM","SIGTRAP","SIGTSTP","SIGTTIN","SIGTTOU","SIGUNUSED","SIGURG","SIGUSR1","SIGUSR2","SIGVTALRM","SIGWINCH","SIGXCPU","SIGXFSZ","SIG_ATOMIC_MAX","SIG_ATOMIC_MIN","SIG_DFL","SIG_ERR","SIG_IGN","SIZE_MAX"};
- static char const *const keywords_T[5] = {"TIME_UTC","TMP_MAX","TMP_MAX_S","TSS_DTOR_ITERATIONS"};
- static char const *const keywords_U[20] = {"UCHAR_MAX","UINT16_MAX","UINT32_MAX","UINT64_MAX","UINT8_MAX","UINTMAX_MAX","UINTPTR_MAX","UINT_FAST16_MAX","UINT_FAST32_MAX","UINT_FAST64_MAX","UINT_FAST8_MAX","UINT_LEAST16_MAX","UINT_LEAST32_MAX","UINT_LEAST64_MAX","UINT_LEAST8_MAX","UINT_MAX","ULLONG_MAX","ULONG_MAX","USHRT_MAX"};
- static char const *const keywords_W[6] = {"WCHAR_MAX","WCHAR_MIN","WEOF","WINT_MAX","WINT_MIN"};
- static char const *const keywords__[14] = {"_Alignas","_Alignof","_Atomic","_Bool","_Complex","_Generic","_IOFBF","_IOLBF","_IONBF","_Imaginary","_Noreturn","_Static_assert","_Thread_local"};
- static char const *const keywords_a[43] = {"alignas","alignof","atomic_address","atomic_bool","atomic_char","atomic_char16_t","atomic_char32_t","atomic_flag","atomic_int","atomic_int_fast16_t","atomic_int_fast32_t","atomic_int_fast64_t","atomic_int_fast8_t","atomic_int_least16_t","atomic_int_least32_t","atomic_int_least64_t","atomic_int_least8_t","atomic_intmax_t","atomic_intptr_t","atomic_llong","atomic_long","atomic_ptrdiff_t","atomic_schar","atomic_short","atomic_size_t","atomic_uchar","atomic_uint","atomic_uint_fast16_t","atomic_uint_fast32_t","atomic_uint_fast64_t","atomic_uint_fast8_t","atomic_uint_least16_t","atomic_uint_least32_t","atomic_uint_least64_t","atomic_uint_least8_t","atomic_uintmax_t","atomic_uintptr_t","atomic_ullong","atomic_ulong","atomic_ushort","atomic_wchar_t","auto"};
- static char const *const keywords_b[3] = {"bool","break"};
- static char const *const keywords_c[12] = {"case","char","char16_t","char32_t","char8_t","clock_t","cnd_t","complex","const","constraint_handler_t","continue"};
- static char const *const keywords_d[6] = {"default","div_t","do","double","double_t"};
- static char const *const keywords_e[5] = {"else","enum","errno_t","extern"};
- static char const *const keywords_f[8] = {"false","fenv_t","fexcept_t","float","float_t","for","fpos_t"};
- static char const *const keywords_g[2] = {"goto"};
- static char const *const keywords_i[11] = {"if","imaxdiv_t","inline","int","int16_t","int32_t","int64_t","int8_t","intmax_t","intptr_t"};
- static char const *const keywords_j[2] = {"jmp_buf"};
- static char const *const keywords_l[4] = {"ldiv_t","lldiv_t","long"};
- static char const *const keywords_m[16] = {"math_errhandling","max_align_t","mbstate_t","memory_order","memory_order_acq_rel","memory_order_acquire","memory_order_consume","memory_order_relaxed","memory_order_release","memory_order_seq_cst","mtx_plain","mtx_recursive","mtx_t","mtx_timed","mtx_try"};
- static char const *const keywords_n[2] = {"noreturn"};
- static char const *const keywords_o[3] = {"offsetof","once_flag"};
- static char const *const keywords_p[2] = {"ptrdiff_t"};
- static char const *const keywords_r[5] = {"register","restrict","return","rsize_t"};
- static char const *const keywords_s[13] = {"short","sig_atomic_t","signed","size_t","sizeof","static","static_assert","stderr","stdin","stdout","struct","switch"};
- static char const *const keywords_t[13] = {"thrd_busy","thrd_error","thrd_nomem","thrd_start_t","thrd_success","thrd_t","thrd_timeout","time_t","true","tss_dtor_t","tss_t","typedef"};
- static char const *const keywords_u[9] = {"uint16_t","uint32_t","uint64_t","uint8_t","uintmax_t","uintptr_t","union","unsigned"};
- static char const *const keywords_v[4] = {"va_list","void","volatile"};
- static char const *const keywords_w[6] = {"wchar_t","wctrans_t","wctype_t","while","wint_t"};
- static char const *const keywords_x[2] = {"xtime"};
- static char const *const *const all_keywords[] = {
- ['A'] = keywords_A, ['B'] = keywords_B, ['C'] = keywords_C, ['D'] = keywords_D, ['E'] = keywords_E, ['F'] = keywords_F, ['H'] = keywords_H, ['I'] = keywords_I, ['L'] = keywords_L, ['M'] = keywords_M, ['N'] = keywords_N, ['O'] = keywords_O, ['P'] = keywords_P, ['R'] = keywords_R, ['S'] = keywords_S, ['T'] = keywords_T, ['U'] = keywords_U, ['W'] = keywords_W, ['_'] = keywords__, ['a'] = keywords_a, ['b'] = keywords_b, ['c'] = keywords_c, ['d'] = keywords_d, ['e'] = keywords_e, ['f'] = keywords_f, ['g'] = keywords_g, ['i'] = keywords_i, ['j'] = keywords_j, ['l'] = keywords_l, ['m'] = keywords_m, ['n'] = keywords_n, ['o'] = keywords_o, ['p'] = keywords_p, ['r'] = keywords_r, ['s'] = keywords_s, ['t'] = keywords_t, ['u'] = keywords_u, ['v'] = keywords_v, ['w'] = keywords_w, ['x'] = keywords_x
- };
+
// keywords don't matter for advancing the state
if (char_types && !in_single_line_comment && !in_multi_line_comment && !in_string && !in_preprocessor && !in_char) {
u32 keyword_end;
@@ -146,40 +123,38 @@ static void syntax_highlight_c(SyntaxState *state_ptr, char32_t *line, u32 line_
for (keyword_end = i; keyword_end < line_len && is32_ident(line[keyword_end]); ++keyword_end);
u32 keyword_len = keyword_end - i;
- char const *const *keywords = line[i] < arr_count(all_keywords) ? all_keywords[line[i]] : NULL;
- if (keywords) {
- for (size_t k = 0; keywords[k]; ++k) {
- bool matches = true;
- char const *keyword = keywords[k];
- if (keyword_len == strlen(keyword)) {
- char32_t *p = &line[i];
- // check if `p` starts with `keyword`
- for (char const *q = keyword; *q; ++p, ++q) {
- if (*p != (char32_t)*q) {
- matches = false;
- break;
- }
- }
- if (matches) {
- // it's a keyword
- // let's highlight all of it now
- type = SYNTAX_KEYWORD;
- if (isupper(keyword[0]) ||
- (keyword_len == 4 && streq(keyword, "true")) ||
- (keyword_len == 5 && streq(keyword, "false")) ||
- (keyword_len == 6 && (streq(keyword, "stderr") || streq(keyword, "stdout")))
- ) {
- type = SYNTAX_CONSTANT; // these are constants, not keywords
- }
- for (size_t c = 0; keyword[c]; ++c) {
- char_types[i++] = type;
- }
- --i; // we'll increment i from the for loop
- dealt_with = true;
- break;
- }
+ char const *const *keywords = c < arr_count(syntax_all_keywords_c) ? syntax_all_keywords_c[c] : NULL;
+ char const *keyword = NULL;
+ if (keywords)
+ for (size_t k = 0; keywords[k]; ++k)
+ if (keyword_matches(&line[i], keyword_len, keywords[k]))
+ keyword = keywords[k];
+ if (cpp && !keyword) {
+ // check C++'s keywords too!
+ keywords = c < arr_count(syntax_all_keywords_cpp) ? syntax_all_keywords_cpp[c] : NULL;
+ if (keywords)
+ for (size_t k = 0; keywords[k]; ++k)
+ if (keyword_matches(&line[i], keyword_len, keywords[k]))
+ keyword = keywords[k];
+ }
+
+ if (keyword) {
+ // it's a keyword
+ // let's highlight all of it now
+ type = SYNTAX_KEYWORD;
+ if (isupper(keyword[0]) ||
+ (keyword_len == 4 && streq(keyword, "true")) ||
+ (keyword_len == 5 && streq(keyword, "false")) ||
+ (keyword_len == 6 && (streq(keyword, "stderr") || streq(keyword, "stdout")))
+ ) {
+ type = SYNTAX_CONSTANT; // these are constants, not keywords
}
+ for (size_t j = 0; keyword[j]; ++j) {
+ char_types[i++] = type;
}
+ --i; // we'll increment i from the for loop
+ dealt_with = true;
+ break;
}
}
} break;
@@ -224,7 +199,10 @@ void syntax_highlight(SyntaxState *state, Language lang, char32_t *line, u32 lin
memset(char_types, 0, line_len * sizeof *char_types);
break;
case LANG_C:
- syntax_highlight_c(state, line, line_len, char_types);
+ syntax_highlight_c_cpp(state, false, line, line_len, char_types);
+ break;
+ case LANG_CPP:
+ syntax_highlight_c_cpp(state, true, line, line_len, char_types);
break;
case LANG_COUNT: assert(0); break;
}