From bbd9bab7ad73b24cb97944649859054411463d81 Mon Sep 17 00:00:00 2001 From: Leo Tenenbaum Date: Sun, 19 Jan 2020 12:50:58 -0500 Subject: more work on #foreign --- tokenizer.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 50 insertions(+), 10 deletions(-) (limited to 'tokenizer.c') diff --git a/tokenizer.c b/tokenizer.c index 1ca3b3b..8c26769 100644 --- a/tokenizer.c +++ b/tokenizer.c @@ -130,7 +130,19 @@ static inline void tokr_nextchar(Tokenizer *t) { ++t->s; } -static char tokr_esc_seq(Tokenizer *t) { +/* returns -1 if not a hex digit, otherwise 0-15 */ +static inline int char_as_hex_digit(char c) { + if (c >= '0' && c <= '9') + return c - '0'; + if (c >= 'a' && c <= 'f') + return 10 + c - 'a'; + if (c >= 'A' && c <= 'F') + return 10 + c - 'A'; + return -1; +} + +/* returns -1 if escape sequence is invalid */ +static int tokr_esc_seq(Tokenizer *t) { /* TODO: add more of these incl. \x41, \100 */ switch (*t->s) { case '\'': @@ -145,8 +157,21 @@ static char tokr_esc_seq(Tokenizer *t) { case 'n': tokr_nextchar(t); return '\n'; + case '0': + tokr_nextchar(t); + return '\0'; + case 'x': { + int c1 = char_as_hex_digit(t->s[1]); + if (c1 == -1) return 0; + int c2 = char_as_hex_digit(t->s[2]); + if (c2 == -1) return 0; + tokr_nextchar(t); + tokr_nextchar(t); + tokr_nextchar(t); + return (char)(c1 * 16 + c2); + } default: - return 0; + return -1; } } @@ -159,8 +184,16 @@ static Location token_location(Token *t) { } /* for use during tokenization */ -static void tokenization_err(Tokenizer *t, const char *fmt, ...) { +static void tokenization_err_( +#if ERR_SHOW_SOURCE_LOCATION + const char *src_file, int src_line, +#endif + Tokenizer *t, const char *fmt, ...) { va_list args; + if (!t->err_ctx->enabled) return; +#if ERR_SHOW_SOURCE_LOCATION + err_fprint("Generated by line %d of %s:\n", src_line, src_file); +#endif va_start(args, fmt); err_text_err(t->err_ctx, "error"); err_fprint(" at line %lu of %s:\n", (unsigned long)t->line, t->err_ctx->filename); @@ -178,15 +211,21 @@ static void tokenization_err(Tokenizer *t, const char *fmt, ...) { } } +#if ERR_SHOW_SOURCE_LOCATION +#define tokenization_err(...) tokenization_err_(__FILE__, __LINE__, __VA_ARGS__) +#else +#define tokenization_err tokenization_err_ +#endif + /* for use after tokenization */ static void tokr_err_( #if ERR_SHOW_SOURCE_LOCATION const char *src_file, int src_line, #endif Tokenizer *t, const char *fmt, ...) { + if (!t->err_ctx->enabled) return; #if ERR_SHOW_SOURCE_LOCATION - if (!t->token->pos.ctx->enabled) return; - err_fprint("At line %d of %s:\n", src_line, src_file); + err_fprint("Generated by line %d of %s:\n", src_line, src_file); #endif va_list args; va_start(args, fmt); @@ -454,11 +493,12 @@ static bool tokenize_string(Tokenizer *t, char *str) { if (*t->s == '\\') { /* escape sequence */ tokr_nextchar(t); - c = tokr_esc_seq(t); - if (c == 0) { + int e = tokr_esc_seq(t); + if (e == -1) { tokenization_err(t, "Unrecognized escape character: '\\%c'.", *t->s); goto err; } + c = (char)e; } else { c = *t->s; tokr_nextchar(t); @@ -502,12 +542,12 @@ static bool tokenize_string(Tokenizer *t, char *str) { assert(*t->s); if (*t->s == '\\') { tokr_nextchar(t); - char c = tokr_esc_seq(t); - if (c == 0) { + int c = tokr_esc_seq(t); + if (c == -1) { tokenization_err(t, "Unrecognized escape character: '\\%c'.", *t->s); goto err; } - *strptr++ = c; + *strptr++ = (char)c; } else { *strptr++ = *t->s; tokr_nextchar(t); -- cgit v1.2.3