summaryrefslogtreecommitdiff
path: root/tokenizer.c
diff options
context:
space:
mode:
authorLeo Tenenbaum <pommicket@gmail.com>2020-01-19 12:50:58 -0500
committerLeo Tenenbaum <pommicket@gmail.com>2020-01-19 12:50:58 -0500
commitbbd9bab7ad73b24cb97944649859054411463d81 (patch)
tree7d4d562632378dbeecec5c73e53e19b7a179ffa8 /tokenizer.c
parenta66d9199eb6169265a103361b624fc1f8cb21364 (diff)
more work on #foreign
Diffstat (limited to 'tokenizer.c')
-rw-r--r--tokenizer.c60
1 files changed, 50 insertions, 10 deletions
diff --git a/tokenizer.c b/tokenizer.c
index 1ca3b3b..8c26769 100644
--- a/tokenizer.c
+++ b/tokenizer.c
@@ -130,7 +130,19 @@ static inline void tokr_nextchar(Tokenizer *t) {
++t->s;
}
-static char tokr_esc_seq(Tokenizer *t) {
+/* returns -1 if not a hex digit, otherwise 0-15 */
+static inline int char_as_hex_digit(char c) {
+ if (c >= '0' && c <= '9')
+ return c - '0';
+ if (c >= 'a' && c <= 'f')
+ return 10 + c - 'a';
+ if (c >= 'A' && c <= 'F')
+ return 10 + c - 'A';
+ return -1;
+}
+
+/* returns -1 if escape sequence is invalid */
+static int tokr_esc_seq(Tokenizer *t) {
/* TODO: add more of these incl. \x41, \100 */
switch (*t->s) {
case '\'':
@@ -145,8 +157,21 @@ static char tokr_esc_seq(Tokenizer *t) {
case 'n':
tokr_nextchar(t);
return '\n';
+ case '0':
+ tokr_nextchar(t);
+ return '\0';
+ case 'x': {
+ int c1 = char_as_hex_digit(t->s[1]);
+ if (c1 == -1) return 0;
+ int c2 = char_as_hex_digit(t->s[2]);
+ if (c2 == -1) return 0;
+ tokr_nextchar(t);
+ tokr_nextchar(t);
+ tokr_nextchar(t);
+ return (char)(c1 * 16 + c2);
+ }
default:
- return 0;
+ return -1;
}
}
@@ -159,8 +184,16 @@ static Location token_location(Token *t) {
}
/* for use during tokenization */
-static void tokenization_err(Tokenizer *t, const char *fmt, ...) {
+static void tokenization_err_(
+#if ERR_SHOW_SOURCE_LOCATION
+ const char *src_file, int src_line,
+#endif
+ Tokenizer *t, const char *fmt, ...) {
va_list args;
+ if (!t->err_ctx->enabled) return;
+#if ERR_SHOW_SOURCE_LOCATION
+ err_fprint("Generated by line %d of %s:\n", src_line, src_file);
+#endif
va_start(args, fmt);
err_text_err(t->err_ctx, "error");
err_fprint(" at line %lu of %s:\n", (unsigned long)t->line, t->err_ctx->filename);
@@ -178,15 +211,21 @@ static void tokenization_err(Tokenizer *t, const char *fmt, ...) {
}
}
+#if ERR_SHOW_SOURCE_LOCATION
+#define tokenization_err(...) tokenization_err_(__FILE__, __LINE__, __VA_ARGS__)
+#else
+#define tokenization_err tokenization_err_
+#endif
+
/* for use after tokenization */
static void tokr_err_(
#if ERR_SHOW_SOURCE_LOCATION
const char *src_file, int src_line,
#endif
Tokenizer *t, const char *fmt, ...) {
+ if (!t->err_ctx->enabled) return;
#if ERR_SHOW_SOURCE_LOCATION
- if (!t->token->pos.ctx->enabled) return;
- err_fprint("At line %d of %s:\n", src_line, src_file);
+ err_fprint("Generated by line %d of %s:\n", src_line, src_file);
#endif
va_list args;
va_start(args, fmt);
@@ -454,11 +493,12 @@ static bool tokenize_string(Tokenizer *t, char *str) {
if (*t->s == '\\') {
/* escape sequence */
tokr_nextchar(t);
- c = tokr_esc_seq(t);
- if (c == 0) {
+ int e = tokr_esc_seq(t);
+ if (e == -1) {
tokenization_err(t, "Unrecognized escape character: '\\%c'.", *t->s);
goto err;
}
+ c = (char)e;
} else {
c = *t->s;
tokr_nextchar(t);
@@ -502,12 +542,12 @@ static bool tokenize_string(Tokenizer *t, char *str) {
assert(*t->s);
if (*t->s == '\\') {
tokr_nextchar(t);
- char c = tokr_esc_seq(t);
- if (c == 0) {
+ int c = tokr_esc_seq(t);
+ if (c == -1) {
tokenization_err(t, "Unrecognized escape character: '\\%c'.", *t->s);
goto err;
}
- *strptr++ = c;
+ *strptr++ = (char)c;
} else {
*strptr++ = *t->s;
tokr_nextchar(t);