diff options
Diffstat (limited to 'tokenizer.c')
-rw-r--r-- | tokenizer.c | 74 |
1 files changed, 48 insertions, 26 deletions
diff --git a/tokenizer.c b/tokenizer.c index 14cfcae..2340496 100644 --- a/tokenizer.c +++ b/tokenizer.c @@ -145,7 +145,33 @@ static char tokr_esc_seq(Tokenizer *t) { } +static Location token_location(Token *t) { + Location loc; + loc.start = t; + loc.end = t + 1; + return loc; +} +/* for use during tokenization */ +static void tokenization_err(Tokenizer *t, const char *fmt, ...) { + va_list args; + va_start(args, fmt); + err_fprint(TEXT_ERROR("error") " at line %lu of %s:\n", (unsigned long)t->line, t->err_ctx->filename); + err_vfprint(fmt, args); + va_end(args); + err_fprint("\n"); + U32 pos = (U32)(t->s - t->err_ctx->str); + err_print_location_text_from_str(t->err_ctx->str, pos, pos + 1); + while (*t->s) { + if (*t->s == '\n') { + tokr_nextchar(t); + break; + } + ++t->s; + } +} + +/* for use after tokenization */ static void tokr_err_( #if ERR_SHOW_SOURCE_LOCATION const char *src_file, int src_line, @@ -155,13 +181,9 @@ static void tokr_err_( if (!t->token->pos.ctx->enabled) return; err_fprint("At line %d of %s:\n", src_line, src_file); #endif - Location where; - where.first = t->token; - where.last = t->token; - va_list args; va_start(args, fmt); - err_vprint(where, fmt, args); + err_vprint(token_location(t->token), fmt, args); va_end(args); } @@ -171,6 +193,7 @@ static void tokr_err_( #define tokr_err tokr_err_ #endif + static void tokr_put_start_pos(Tokenizer *tokr, Token *t) { t->pos.line = tokr->line; t->pos.ctx = tokr->err_ctx; @@ -244,7 +267,7 @@ static bool tokenize_string(Tokenizer *t, char *str) { ++comment_level; } else { if (*t->s == 0) { - tokr_err(t, "End of file reached inside multi-line comment."); + tokenization_err(t, "End of file reached inside multi-line comment."); return false; } @@ -261,16 +284,16 @@ static bool tokenize_string(Tokenizer *t, char *str) { if (*t->s == '#') { /* it's a directive */ - char *start_s = t->s; + Token token; + tokr_put_start_pos(t, &token); ++t->s; /* move past # */ Directive direct = tokenize_direct(&t->s); if (direct != DIRECT_COUNT) { /* it's a directive */ - Token *token = tokr_add(t); - tokr_put_start_pos(t, token); - token->where.pos = (U32)(start_s - token->where.ctx->str); - token->kind = TOKEN_DIRECT; - token->direct = direct; + tokr_put_end_pos(t, &token); + token.kind = TOKEN_DIRECT; + token.direct = direct; + *(Token *)arr_add(&t->tokens) = token; continue; } --t->s; /* go back to # */ @@ -279,15 +302,15 @@ static bool tokenize_string(Tokenizer *t, char *str) { } { - char *start_s = t->s; + Token token; + tokr_put_start_pos(t, &token); Keyword kw = tokenize_kw(&t->s); if (kw != KW_COUNT) { /* it's a keyword */ - Token *token = tokr_add(t); - tokr_put_location(t, token); - token->where.pos = (U32)(start_s - token->where.ctx->str); - token->kind = TOKEN_KW; - token->kw = kw; + tokr_put_end_pos(t, &token); + token.kind = TOKEN_KW; + token.kw = kw; + *(Token *)arr_add(&t->tokens) = token; continue; } } @@ -298,7 +321,6 @@ static bool tokenize_string(Tokenizer *t, char *str) { int base = 10; Floating decimal_pow10 = 0; Token *token = tokr_add(t); - tokr_put_location(t, token); NumLiteral *n = &token->num; n->kind = NUM_LITERAL_INT; n->intval = 0; @@ -412,7 +434,7 @@ static bool tokenize_string(Tokenizer *t, char *str) { } tokr_nextchar(t); } - + tokr_put_end_pos(t, token); token->kind = TOKEN_LITERAL_NUM; continue; } @@ -420,7 +442,6 @@ static bool tokenize_string(Tokenizer *t, char *str) { if (*t->s == '\'') { /* it's a character literal! */ Token *token = tokr_add(t); - tokr_put_location(t, token); tokr_nextchar(t); char c; if (*t->s == '\\') { @@ -442,13 +463,13 @@ static bool tokenize_string(Tokenizer *t, char *str) { tokr_nextchar(t); token->kind = TOKEN_LITERAL_CHAR; token->chr = c; + tokr_put_end_pos(t, token); continue; } if (*t->s == '"') { /* it's a string literal! */ Token *token = tokr_add(t); - tokr_put_location(t, token); tokr_nextchar(t); size_t len = 0; /* counts \n as 2 chars */ size_t backslashes = 0; @@ -456,8 +477,8 @@ static bool tokenize_string(Tokenizer *t, char *str) { if (*t->s == '\\') { ++backslashes; } else if (*t->s == 0) { - /* return t to opening " so that we go to the next line */ - tokr_get_location(t, token); + /* return t to opening " */ + tokr_get_start_pos(t, token); tokenization_err(t, "No matching \" found."); goto err; } else { @@ -468,7 +489,7 @@ static bool tokenize_string(Tokenizer *t, char *str) { } char *strlit = tokr_malloc(t, len + 1); char *strptr = strlit; - tokr_get_location(t, token); + tokr_get_start_pos(t, token); tokr_nextchar(t); /* past opening " */ while (*t->s != '"') { assert(*t->s); @@ -490,16 +511,17 @@ static bool tokenize_string(Tokenizer *t, char *str) { token->str.len = (size_t)(strptr - strlit); token->str.str = strlit; tokr_nextchar(t); /* move past closing " */ + tokr_put_end_pos(t, token); continue; } if (isident(*t->s)) { /* it's an identifier */ Token *token = tokr_add(t); - tokr_put_location(t, token); Identifier ident = ident_insert(t->idents, &t->s); token->kind = TOKEN_IDENT; token->ident = ident; + tokr_put_end_pos(t, token); continue; } tokenization_err(t, "Token not recognized"); |