diff options
author | Leo Tenenbaum <pommicket@gmail.com> | 2019-08-21 20:42:16 -0400 |
---|---|---|
committer | Leo Tenenbaum <pommicket@gmail.com> | 2019-08-21 20:42:16 -0400 |
commit | d63a28aa4d227544b912b3edc2479de622d18a32 (patch) | |
tree | 42aac6845611365d4243dcc7f9b36214da3dcd09 | |
parent | 1c214758924bbfcd0cbafb6a4240210f057de007 (diff) |
started identification
-rw-r--r-- | blocks.c | 49 | ||||
-rw-r--r-- | identifiers.c | 11 | ||||
-rw-r--r-- | parse.c | 71 | ||||
-rw-r--r-- | test.toc | 7 | ||||
-rw-r--r-- | toc.c | 1 | ||||
-rw-r--r-- | tokenizer.c | 15 | ||||
-rw-r--r-- | util/arr.c | 5 | ||||
-rw-r--r-- | util/err.c | 44 |
8 files changed, 165 insertions, 38 deletions
diff --git a/blocks.c b/blocks.c new file mode 100644 index 0000000..5c06a6e --- /dev/null +++ b/blocks.c @@ -0,0 +1,49 @@ +/* identifies identifiers in this block */ +static bool block_enter(Block *b) { + bool ret = true; + arr_foreach(&b->stmts, Statement, stmt) { + if (stmt->kind == STMT_DECL) { + Declaration *decl = &stmt->decl; + arr_foreach(&decl->idents, Identifier, ident) { + Array *decls = &(*ident)->decls; + if (decls->item_sz) { + /* check that it hasn't been declared in this block */ + IdentDecl *prev = decls->last; + if (prev->scope == b) { + err_print(decl->where, "Re-declaration of identifier in the same block."); + info_print(prev->decl->where, "Previous declaration was here."); + ret = false; + continue; + } + } else { + /* array not initialized yet */ + arr_create(&(*ident)->decls, sizeof(IdentDecl)); + } + IdentDecl *ident_decl = arr_add(decls); + ident_decl->decl = decl; + ident_decl->scope = b; + } + } + } + return ret; +} + +/* de-identifies identifiers in this block */ +static bool block_exit(Block *b) { + /* OPTIM: figure out some way of not re-iterating over everything */ + bool ret = true; + arr_foreach(&b->stmts, Statement, stmt) { + if (stmt->kind == STMT_DECL) { + Declaration *decl = &stmt->decl; + arr_foreach(&decl->idents, Identifier, ident) { + Array *decls = &(*ident)->decls; + assert(decls->item_sz); + IdentDecl *last_decl = decls->last; + if (last_decl->scope == b) + arr_remove_last(decls); /* remove that declaration */ + + } + } + } + return ret; +} diff --git a/identifiers.c b/identifiers.c index 572aa4e..30fd755 100644 --- a/identifiers.c +++ b/identifiers.c @@ -16,20 +16,27 @@ static int ident_char_index(int c) { /* can this character be used in an identifier? */ static int isident(int c) { - return ident_char_index(c) != -1; /* OPTIM: Write separate function */ + /* NOTE: . is only used internally in identifiers */ + return ident_char_index(c) != -1 && c != '.'; /* OPTIM: Write separate function */ } /* can this character be used as the first character in an identifier? */ static int isidentstart(int c) { - return isident(c) && c != '.'; + return isident(c); } +typedef struct { + struct Block *scope; /* NULL for file scope */ + struct Declaration *decl; +} IdentDecl; + typedef struct IdentTree { /* zero value is an empty trie */ long id; int len; /* length of identifier = depth in tree */ struct IdentTree *children; struct IdentTree *parent; + Array decls; /* array of declarations of this identifier */ } IdentTree; typedef IdentTree *Identifier; @@ -1,3 +1,9 @@ +/* + TODO: + all of these functions should leave the tokenizer at a "reasonable" place + for parsing to continue. +*/ + typedef enum { TYPE_VOID, TYPE_BUILTIN @@ -33,7 +39,7 @@ typedef struct { Type type; } Param; -typedef struct { +typedef struct Block { Array stmts; } Block; @@ -95,7 +101,9 @@ typedef struct Expression { #define DECL_FLAG_INFER_TYPE 0x01 #define DECL_FLAG_CONST 0x02 #define DECL_FLAG_HAS_EXPR 0x04 -typedef struct { + +/* OPTIM: Instead of using dynamic arrays, do two passes. */ +typedef struct Declaration { Location where; Array idents; Type type; @@ -103,8 +111,6 @@ typedef struct { uint16_t flags; } Declaration; -/* OPTIM: Instead of using dynamic arrays, do two passes. */ - typedef enum { STMT_DECL, STMT_EXPR @@ -126,6 +132,7 @@ typedef struct { typedef struct { Tokenizer *tokr; BlockArr exprs; /* a dynamic array of expressions, so that we don't need to call malloc every time we make an expression */ + Block *block; /* which block are we in? NULL = file scope */ } Parser; /* @@ -218,19 +225,23 @@ static bool stmt_parse(Statement *s, Parser *p); static bool block_parse(Block *b, Parser *p) { Tokenizer *t = p->tokr; + Block *prev_block = p->block; + p->block = b; if (!token_is_kw(t->token, KW_LBRACE)) { tokr_err(t, "Expected '{' to open block."); return false; } t->token++; /* move past { */ arr_create(&b->stmts, sizeof(Statement)); - + bool ret = true; if (!token_is_kw(t->token, KW_RBRACE)) { /* non-empty function body */ while (1) { Statement *stmt = arr_add(&b->stmts); - if (!stmt_parse(stmt, p)) - return false; + if (!stmt_parse(stmt, p)) { + ret = false; + continue; + } if (token_is_kw(t->token, KW_RBRACE)) break; if (t->token->kind == TOKEN_EOF) { tokr_err(t, "Expected '}' to close function body."); @@ -240,7 +251,8 @@ static bool block_parse(Block *b, Parser *p) { } t->token++; /* move past } */ - return true; + p->block = prev_block; + return ret; } static bool fn_expr_parse(FnExpr *f, Parser *p) { @@ -652,6 +664,7 @@ static bool expr_parse(Expression *e, Parser *p, Token *end) { static bool decl_parse(Declaration *d, Parser *p) { Tokenizer *t = p->tokr; /* OPTIM: Maybe don't use a dynamic array or use parser allocator. */ + d->where = t->token->where; arr_create(&d->idents, sizeof(Identifier)); while (1) { @@ -661,6 +674,24 @@ static bool decl_parse(Declaration *d, Parser *p) { return false; } *ident = t->token->ident; + /* + only keep track of file scoped declarations--- + blocks.c will handle the rest + */ + if (p->block == NULL) { + if ((*ident)->decls.len) { + /* this was already declared! */ + IdentDecl *prev = (*ident)->decls.data; + tokr_err(t, "Re-declaration of identifier in global scope."); + info_print(prev->decl->where, "Previous declaration was here."); + return false; + } + assert(!(*ident)->decls.item_sz); + arr_create(&(*ident)->decls, sizeof(IdentDecl)); + IdentDecl *ident_decl = arr_add(&(*ident)->decls); + ident_decl->decl = d; + ident_decl->scope = NULL; + } t->token++; if (token_is_kw(t->token, KW_COMMA)) { t->token++; @@ -670,7 +701,8 @@ static bool decl_parse(Declaration *d, Parser *p) { t->token++; break; } - tokr_err(t, "Expected ',' to continue listing variables or ':' to indicate type."); + tokr_err(t, "Expected ',' to continue listing variables or ':' to indicate type."); + return false; } d->flags = 0; @@ -724,13 +756,29 @@ static bool stmt_parse(Statement *s, Parser *p) { */ if (token_is_kw(t->token + 1, KW_COLON) || token_is_kw(t->token + 1, KW_COMMA)) { s->kind = STMT_DECL; - return decl_parse(&s->decl, p); + if (!decl_parse(&s->decl, p)) { + /* move to next statement */ + /* TODO: This might cause unhelpful errors if the first semicolon is inside a block, etc. */ + while (!token_is_kw(t->token, KW_SEMICOLON)) { + if (t->token->kind == TOKEN_EOF) { + /* don't bother continuing */ + return false; + } + t->token++; + } + return false; + } + return true; } else { s->kind = STMT_EXPR; - if (!expr_parse(&s->expr, p, expr_find_end(p, EXPR_END_SEMICOLON))) + Token *end = expr_find_end(p, EXPR_END_SEMICOLON); + if (!expr_parse(&s->expr, p, end)) { + t->token = end; return false; + } if (!token_is_kw(t->token, KW_SEMICOLON)) { tokr_err(t, "Expected ';' at end of statement."); + t->token = end; return false; } t->token++; /* move past ; */ @@ -740,6 +788,7 @@ static bool stmt_parse(Statement *s, Parser *p) { static void parser_from_tokenizer(Parser *p, Tokenizer *t) { p->tokr = t; + p->block = NULL; block_arr_create(&p->exprs, 10, sizeof(Expression)); /* block size = 1024 */ } @@ -1,3 +1,4 @@ -main :- (fn(){ - f(1,2+3---4-3-34-+a+-234-234); -})(1,2,3,4,5--23423-2342); +main :- fn(){ + printf("Hello!\n"); +}; +main := 7;
\ No newline at end of file @@ -15,3 +15,4 @@ #include "identifiers.c" #include "tokenizer.c" #include "parse.c" +#include "blocks.c" diff --git a/tokenizer.c b/tokenizer.c index 2f022e0..7f6dcf6 100644 --- a/tokenizer.c +++ b/tokenizer.c @@ -76,12 +76,7 @@ typedef struct { size_t len; } StrLiteral; -typedef struct { - LineNo line; - char *code; -} Location; - -/* NOTE: LineNo is typedef'd in util/err.c */ +/* NOTE: Location is typedef'd in util/err.c */ typedef struct { TokenKind kind; Location where; @@ -178,10 +173,10 @@ static char tokr_esc_seq(Tokenizer *t) { /* to be used during tokenization */ static void tokenization_err(Tokenizer *t, const char *fmt, ...) { va_list args; + Location where = {t->line, t->s}; va_start(args, fmt); - err_vprint(t->line, t->s, fmt, args); + err_vprint(where, fmt, args); va_end(args); - char *end_of_line = strchr(t->s, '\n'); if (end_of_line) { t->s = end_of_line; @@ -194,12 +189,12 @@ static void tokenization_err(Tokenizer *t, const char *fmt, ...) { /* to be used after tokenization */ static void tokr_err_(const char *src_file, int src_line, Tokenizer *t, const char *fmt, ...) { - LineNo line = t->token->where.line; err_fprint("At line %d of %s:\n", src_line, src_file); /* RELEASE: Remove this */ va_list args; va_start(args, fmt); - err_vprint(line, t->token->where.code, fmt, args); + err_vprint(t->token->where, fmt, args); va_end(args); + LineNo line = t->token->where.line; while (1) { if (t->token->where.line != line) break; if (t->token->kind == TOKEN_EOF) break; @@ -29,6 +29,11 @@ void *arr_add(Array *arr) { return item; } +void arr_remove_last(Array *arr) { + arr->len--; + /* OPTIM (memory): Shorten array. */ +} + void arr_free(Array *arr) { free(arr->data); } @@ -2,14 +2,21 @@ #if USE_COLORED_TEXT #define TEXT_ERROR(x) "\x1b[91m" x "\x1b[0m" +#define TEXT_INFO(x) "\x1b[94m" x "\x1b[0m" #define TEXT_IMPORTANT(x) "\x1b[1m" x "\x1b[0m" #else #define TEXT_ERROR(x) x +#define TEXT_INFO(x) x #define TEXT_IMPORTANT(x) x #endif typedef uint32_t LineNo; +typedef struct { + LineNo line; + char *code; +} Location; + /* file name of file being processed */ static const char *err_filename; @@ -33,6 +40,10 @@ static void err_print_header_(LineNo line) { err_fprint(TEXT_ERROR("error:") " at line %lu of %s:\n", (unsigned long)line, err_filename); } +static void info_print_header_(LineNo line) { + err_fprint(TEXT_INFO("info:") " at line %lu of %s:\n", (unsigned long)line, err_filename); +} + static void err_print_footer_(const char *context) { err_fprint("\n\there --> "); const char *end = strchr(context, '\n'); @@ -48,21 +59,30 @@ static void err_print_footer_(const char *context) { /* Write nicely-formatted errors to the error file */ -/* static void err_print(LineNo line, const char *context, const char *fmt, ...) { */ -/* err_print_header_(line); */ -/* va_list args; */ -/* va_start(args, fmt); */ -/* err_vfprint(fmt, args); */ -/* va_end(args); */ -/* err_print_footer_(context); */ -/* } */ - -static void err_vprint(LineNo line, const char *context, const char *fmt, va_list args) { - err_print_header_(line); + +static void err_vprint(Location where, const char *fmt, va_list args) { + err_print_header_(where.line); + err_vfprint(fmt, args); + err_print_footer_(where.code); +} + +static void err_print(Location where, const char *fmt, ...) { + va_list args; + va_start(args, fmt); + err_vprint(where, fmt, args); + va_end(args); +} + +static void info_print(Location where, const char *fmt, ...) { + va_list args; + va_start(args, fmt); + info_print_header_(where.line); err_vfprint(fmt, args); - err_print_footer_(context); + err_print_footer_(where.code); + va_end(args); } + static void *err_malloc(size_t size) { void *ret = malloc(size); if (!ret) { |