From 4ab1fd88714fe4314bef7b1cc2429c655e482540 Mon Sep 17 00:00:00 2001 From: Leo Tenenbaum Date: Wed, 24 Jun 2020 20:19:33 -0400 Subject: started moving #include to parsing; not working yet --- cgen.c | 3 - copy.c | 15 +++-- development.md | 1 - eval.c | 3 - main.c | 4 +- misc.c | 5 ++ parse.c | 184 +++++++++++++++++++++++++++++++++++++++++++++++++++------ test.toc | 4 +- types.c | 172 +---------------------------------------------------- types.h | 18 +----- 10 files changed, 187 insertions(+), 222 deletions(-) diff --git a/cgen.c b/cgen.c index 64f0306..ede4604 100644 --- a/cgen.c +++ b/cgen.c @@ -1882,9 +1882,6 @@ static void cgen_stmt(CGenerator *g, Statement *s) { case STMT_BLOCK: cgen_block(g, s->block, 0); break; - case STMT_INCLUDE: - assert(0); - break; } } diff --git a/copy.c b/copy.c index b6716f5..e96b2ac 100644 --- a/copy.c +++ b/copy.c @@ -348,11 +348,6 @@ static void copy_stmt(Copier *c, Statement *out, Statement *in) { if (in->ret->flags & RET_HAS_EXPR) copy_expr(c, &out->ret->expr, &in->ret->expr); break; - case STMT_INCLUDE: - out->inc = copier_malloc(c, sizeof *out->inc); - *out->inc = *in->inc; - copy_expr(c, &out->inc->filename, &in->inc->filename); - break; case STMT_EXPR: out->expr = copy_expr_(c, in->expr); break; @@ -422,9 +417,13 @@ static void copy_stmt(Copier *c, Statement *out, Statement *in) { case STMT_BLOCK: copy_block(c, out->block = copier_malloc(c, sizeof *out->block), in->block, 0); break; - case STMT_INLINE_BLOCK: - assert(0); /* only exists after typing */ - break; + case STMT_INLINE_BLOCK: { + size_t nstmts = arr_len(in->inline_block); + arr_set_lena(out->inline_block, nstmts, c->allocr); + for (size_t i = 0; i < nstmts; ++i) { + copy_stmt(c, &out->inline_block[i], &in->inline_block[i]); + } + } break; } } diff --git a/development.md b/development.md index 6795987..daa0735 100644 --- a/development.md +++ b/development.md @@ -36,4 +36,3 @@ declaration). It is assumed that the number of identifiers in a declaration, or parameters to a function will fit in an int, since a function with (at least) 32768 parameters is ridiculous. - diff --git a/eval.c b/eval.c index 501f267..6e26be9 100644 --- a/eval.c +++ b/eval.c @@ -1691,9 +1691,6 @@ static Status eval_stmt(Evaluator *ev, Statement *stmt) { case STMT_BLOCK: if (!eval_block(ev, stmt->block)) return false; break; - case STMT_INCLUDE: - assert(0); - break; } return true; } diff --git a/main.c b/main.c index e54659d..7dec0fe 100644 --- a/main.c +++ b/main.c @@ -216,7 +216,7 @@ int main(int argc, char **argv) { if (verbose) printf("Parsing...\n"); Parser p; - parser_create(&p, &globals, &t, &main_allocr); + parser_create(&p, &globals, &t, &main_allocr, &file); ParsedFile f; if (!parse_file(&p, &f)) { err_text_important(&err_ctx, "Errors occured during parsing.\n"); @@ -234,7 +234,7 @@ int main(int argc, char **argv) { Typer tr; Evaluator ev; evalr_create(&ev, &tr, &main_allocr); - typer_create(&tr, &ev, &err_ctx, &main_allocr, &globals, &file); + typer_create(&tr, &ev, &err_ctx, &main_allocr, &globals);; if (!types_file(&tr, &f)) { err_text_important(&err_ctx, "Errors occured while determining types.\n"); diff --git a/misc.c b/misc.c index de9a49f..352deb2 100644 --- a/misc.c +++ b/misc.c @@ -66,6 +66,11 @@ static inline char *str_to_cstr(String s) { return cstr(s.str, s.len); } +static void print_str(String s) { + fwrite(s.str, 1, s.len, stdout); + printf("\n"); +} + static inline bool str_eq_cstr(String s, const char *str) { return strncmp(s.str, str, s.len) == 0; } diff --git a/parse.c b/parse.c index d8c20eb..1f0bdd7 100644 --- a/parse.c +++ b/parse.c @@ -3,6 +3,8 @@ This file is part of toc. toc is distributed under version 3 of the GNU General Public License, without any warranty whatsoever. You should have received a copy of the GNU General Public License along with toc. If not, see . */ +static void parser_create(Parser *p, Identifiers *globals, Tokenizer *t, Allocator *allocr, File *main_file); +static Status parse_file(Parser *p, ParsedFile *f); static Status parse_expr(Parser *p, Expression *e, Token *end); static Status parse_stmt(Parser *p, Statement *s, bool *was_a_statement); enum { @@ -338,6 +340,10 @@ static inline void parser_put_end(Parser *p, Location *l) { parser_set_end_to_token(p, l, p->tokr->token); } +static inline Identifiers *parser_get_idents(Parser *p) { + return p->block == NULL ? p->globals : &p->block->idents; +} + #define parser_arr_add_ptr(p, a) arr_adda_ptr(a, p->allocr) #define parser_arr_add(p, a, x) arr_adda(a, x, p->allocr) #define parser_arr_set_len(p, a, l) arr_set_lena(a, l, p->allocr) @@ -1340,8 +1346,13 @@ static Status parse_expr(Parser *p, Expression *e, Token *end) { Namespace *n = e->nms = parser_calloc(p, 1, sizeof *n); e->kind = EXPR_NMS; ++t->token; - if (!parse_block(p, &n->body, 0)) + Namespace *prev = p->nms; + p->nms = n; + if (!parse_block(p, &n->body, 0)) { + p->nms = prev; return false; + } + p->nms = prev; n->body.kind = BLOCK_NMS; goto success; } @@ -2195,6 +2206,33 @@ static bool is_decl(Tokenizer *t) { } } +/* introduce identifiers from stmts into current scope, setting their "nms" field to nms */ +static Status include_stmts_link_to_nms(Parser *p, Namespace *nms, Statement *stmts) { + Identifiers *idents = parser_get_idents(p); + arr_foreach(stmts, Statement, s) { + if (s->kind == STMT_INLINE_BLOCK) { + if (!include_stmts_link_to_nms(p, nms, s->inline_block)) + return false; + } else if (s->kind == STMT_DECL) { + Declaration *d = s->decl; + arr_foreach(d->idents, Identifier, ident) { + /* @OPTIM: only hash once */ + Identifier preexisting = ident_translate(*ident, idents); + if (preexisting && preexisting->decl != d) { + char *istr = ident_to_str(preexisting); + err_print(d->where, "Redeclaration of identifier %s.", istr); + info_print(preexisting->decl->where, "%s was first declared here.", istr); + free(istr); + } + Identifier i = ident_translate_forced(*ident, idents); + i->nms = nms; + i->decl = d; + } + } + } + return true; +} + /* sets *was_a_statement to false if s was not filled, but the token was advanced */ static Status parse_stmt(Parser *p, Statement *s, bool *was_a_statement) { Tokenizer *t = p->tokr; @@ -2452,28 +2490,28 @@ static Status parse_stmt(Parser *p, Statement *s, bool *was_a_statement) { } else if (t->token->kind == TOKEN_DIRECT) { switch (t->token->direct) { case DIRECT_INCLUDE: { - Include *i = s->inc = parser_malloc(p, sizeof *i); ++t->token; - s->kind = STMT_INCLUDE; - i->flags = 0; + s->kind = STMT_INLINE_BLOCK; + bool forced = false; if (token_is_direct(t->token, DIRECT_FORCE)) { - i->flags |= INC_FORCED; + forced = true; ++t->token; } - if (!parse_expr(p, &i->filename, expr_find_end(p, EXPR_CAN_END_WITH_COMMA))) { - tokr_skip_semicolon(t); + if (t->token->kind != TOKEN_LITERAL_STR) { + tokr_err(t, "#include file name must be string literal."); return false; } + char *filename = str_to_cstr(t->token->str); + ++t->token; + Identifier nms_ident = NULL; /* identifier of namespace to include to */ if (token_is_kw(t->token, KW_COMMA)) { ++t->token; if (t->token->kind != TOKEN_IDENT) { tokr_err(t, "Expected identifier after , in #include (to specify include namespace)."); return false; } - i->nms = t->token->ident; + nms_ident = parser_ident_insert(p, t->token->ident); ++t->token; - } else { - i->nms = NULL; } if (!token_is_kw(t->token, KW_SEMICOLON)) { tokr_err(t, "Expected ; after #include directive"); @@ -2481,6 +2519,122 @@ static Status parse_stmt(Parser *p, Statement *s, bool *was_a_statement) { return false; } ++t->token; + + Block *prev_block = p->block; + Namespace *prev_nms = p->nms; + IncludedFile *inc_f = NULL; + Namespace *inc_nms = NULL; /* non-NULL if this is an include to nms */ + bool success = true; + if (nms_ident) { + inc_nms = parser_calloc(p, 1, sizeof *inc_nms); + Block *body = &inc_nms->body; + body->kind = BLOCK_NMS; + body->where = s->where; + idents_create(&body->idents, p->allocr, body); + body->parent = prev_block; + /* turn #include "foo", bar into bar ::= nms { ... } */ + s->kind = STMT_DECL; + Declaration *d = s->decl = parser_calloc(p, 1, sizeof *d); + d->flags = DECL_HAS_EXPR | DECL_IS_CONST; + d->type.kind = TYPE_BUILTIN; + d->type.builtin = BUILTIN_NMS; + Identifier i = nms_ident; + if (i->decl) { + Declaration *d2 = i->decl; + /* maybe they included it twice into one namespace */ + if ((d2->flags & DECL_HAS_EXPR) && (d2->expr.kind == EXPR_NMS) && + (d2->expr.nms->inc_file == inc_f)) { + /* that's okay; get rid of this declaration */ + s->kind = STMT_INLINE_BLOCK; + s->inline_block = NULL; + break; + } else { + char *istr = ident_to_str(i); + err_print(s->where, "Redeclaration of identifier %s.", istr); + info_print(ident_decl_location(i), "Previous declaration was here."); + free(istr); + return false; /* NOT goto inc_fail; */ + } + } + parser_arr_add(p, d->idents, i); + i->decl = d; + d->expr.kind = EXPR_NMS; + d->expr.nms = inc_nms; + d->expr.flags = EXPR_FOUND_TYPE; + d->expr.type = d->type; + d->where = d->expr.where = s->where; + /* we need to be in the block to parse it properly */ + p->block = &inc_nms->body; + p->nms = inc_nms; + } else { + s->kind = STMT_INLINE_BLOCK; + } + + if (!forced) { + size_t filename_len = strlen(filename); + if (streq(filename, p->main_file->filename)) { + err_print(s->where, "Circular #include detected. You can add #force to this #include to force it to be included."); + success = false; goto nms_done; + } + inc_f = str_hash_table_get(&p->included_files, filename, filename_len); + if (inc_f) { + /* has already been included */ + if (inc_f->flags & INC_FILE_INCLUDING) { + err_print(s->where, "Circular #include detected. You can add #force to this #include to force it to be included."); + success = false; goto nms_done; + } + if (s->kind == STMT_INLINE_BLOCK) s->inline_block = NULL; /* nothing needed here */ + /* just set ident declarations */ + if (!include_stmts_link_to_nms(p, inc_f->main_nms, inc_f->stmts)) { + success = false; goto nms_done; + } + goto nms_done; + } + inc_f = str_hash_table_insert(&p->included_files, filename, filename_len); + inc_f->flags |= INC_FILE_INCLUDING; + inc_f->main_nms = p->nms; + } + { + char *contents = read_file_contents(p->allocr, filename, s->where); + if (!contents) { + success = false; goto nms_done; + } + + Tokenizer tokr; + tokr_create(&tokr, p->file->ctx, p->allocr); + File *file = parser_calloc(p, 1, sizeof *file); + file->filename = filename; + file->contents = contents; + file->ctx = p->file->ctx; + + if (!tokenize_file(&tokr, file)) { + success = false; goto nms_done; + } + Parser parser; + parser_create(&parser, p->globals, &tokr, p->allocr, p->main_file); + parser.block = p->block; + parser.nms = p->nms; + ParsedFile parsed_file; + if (!parse_file(&parser, &parsed_file)) { + success = false; goto nms_done; + } + Statement *stmts_inc = parsed_file.stmts; + if (inc_f) { + inc_f->stmts = stmts_inc; + } + if (s->kind == STMT_INLINE_BLOCK) s->inline_block = stmts_inc; + if (inc_nms) { + inc_nms->body.stmts = stmts_inc; + } + } + nms_done: + if (inc_nms) { + p->nms = prev_nms; + p->block = prev_block; + } + if (inc_f) inc_f->flags &= (IncFileFlags)~(IncFileFlags)INC_FILE_INCLUDING; + if (!success) return false; + free(filename); } break; case DIRECT_IF: goto if_stmt; @@ -2585,11 +2739,13 @@ static Status parse_stmt(Parser *p, Statement *s, bool *was_a_statement) { return true; } -static void parser_create(Parser *p, Identifiers *globals, Tokenizer *t, Allocator *allocr) { +static void parser_create(Parser *p, Identifiers *globals, Tokenizer *t, Allocator *allocr, File *main_file) { p->tokr = t; p->block = NULL; p->globals = globals; p->allocr = allocr; + p->main_file = main_file; + str_hash_table_create(&p->included_files, sizeof(IncludedFile), p->allocr); } static Status parse_file(Parser *p, ParsedFile *f) { @@ -2871,12 +3027,6 @@ static void fprint_stmt(FILE *out, Statement *s) { fprint_expr(out, &r->expr); fprintf(out, ";\n"); } break; - case STMT_INCLUDE: { - Include *i = s->inc; - fprintf(out, "#include "); - fprint_expr(out, &i->filename); - fprintf(out, ";\n"); - } break; case STMT_MESSAGE: { Message *m = s->message; switch (m->kind) { diff --git a/test.toc b/test.toc index 438efc3..ca38680 100644 --- a/test.toc +++ b/test.toc @@ -7,8 +7,8 @@ #init(-42) init(4); init ::= fn(a: int) { - //writes("Initializing... #"); - //writei(a); + writes("Initializing... #"); + writei(a); x = 5; } diff --git a/types.c b/types.c index 1ceec1b..da95847 100644 --- a/types.c +++ b/types.c @@ -2977,10 +2977,6 @@ static Status types_block(Typer *tr, Block *b) { arr_foreach(b->stmts, Statement, s) { if (!types_stmt(tr, s)) { success = false; - if (tr->had_include_err) { - /* stop immediately; prevent too many "undeclared identifier" errors */ - break; - } continue; } } @@ -3240,33 +3236,6 @@ static Status fix_ident_decls_inline_block(Typer *tr, Statement *stmts) { return true; } -/* introduce identifiers from stmts into current scope, setting their "nms" field to nms */ -static Status include_stmts_link_to_nms(Typer *tr, Namespace *nms, Statement *stmts) { - Identifiers *idents = typer_get_idents(tr); - arr_foreach(stmts, Statement, s) { - if (s->kind == STMT_INLINE_BLOCK) { - if (!include_stmts_link_to_nms(tr, nms, s->inline_block)) - return false; - } else if (s->kind == STMT_DECL) { - Declaration *d = s->decl; - arr_foreach(d->idents, Identifier, ident) { - /* @OPTIM: only hash once */ - Identifier preexisting = ident_translate(*ident, idents); - if (preexisting && preexisting->decl != d) { - char *istr = ident_to_str(preexisting); - err_print(d->where, "Redeclaration of identifier %s.", istr); - info_print(preexisting->decl->where, "%s was first declared here.", istr); - free(istr); - } - Identifier i = ident_translate_forced(*ident, idents); - i->nms = nms; - i->decl = d; - } - } - } - return true; -} - static Status types_stmt(Typer *tr, Statement *s) { top: if (s->flags & STMT_TYPED) return true; @@ -3752,139 +3721,6 @@ top: } } } break; - case STMT_INCLUDE: { - Include *inc = s->inc; - char *filename = eval_expr_as_cstr(tr, &inc->filename, "import filename"); - if (!filename) - return false; - Namespace *prev_nms = tr->nms; - Block *prev_block = tr->block; - IncludedFile *inc_f = NULL; - Namespace *inc_nms = NULL; /* non-NULL if this is an include to nms */ - bool success = true; - if (inc->nms) { - inc_nms = typer_calloc(tr, 1, sizeof *inc_nms); - - Block *body = &inc_nms->body; - body->kind = BLOCK_NMS; - body->where = s->where; - idents_create(&body->idents, tr->allocr, body); - body->parent = tr->block; - - inc_nms->inc_file = inc_f; - /* turn #include "foo", bar into bar ::= nms { ... } */ - s->kind = STMT_DECL; - Declaration *d = s->decl = typer_calloc(tr, 1, sizeof *d); - d->flags = DECL_FOUND_TYPE | DECL_HAS_EXPR | DECL_IS_CONST | DECL_FOUND_VAL; - construct_resolved_builtin_type(&d->type, BUILTIN_NMS); - char *ident_str = inc->nms; - Identifier i = ident_insert(typer_get_idents(tr), &ident_str); - if (i->decl) { - Declaration *d2 = i->decl; - /* maybe they included it twice into one namespace */ - if ((d2->flags & DECL_HAS_EXPR) && (d2->expr.kind == EXPR_NMS) && - (d2->expr.nms->inc_file == inc_f)) { - /* that's okay; get rid of this declaration */ - s->kind = STMT_INLINE_BLOCK; - s->inline_block = NULL; - break; - } else { - char *istr = ident_to_str(i); - err_print(s->where, "Redeclaration of identifier %s.", istr); - info_print(ident_decl_location(i), "Previous declaration was here."); - free(istr); - return false; /* NOT goto inc_fail; */ - } - } - typer_arr_add(tr, d->idents, i); - i->decl = d; - if (is_at_top_level(tr)) inc_nms->associated_ident = i; - typer_gen_nms_prefix(tr, inc_nms); - - d->expr.kind = EXPR_NMS; - d->expr.nms = inc_nms; - d->expr.flags = EXPR_FOUND_TYPE; - d->expr.type = d->type; - d->val.nms = inc_nms; - d->where = d->expr.where = s->where; - - /* go inside namespace and block (it'll help to be there later on) */ - tr->nms = inc_nms; - typer_block_enter(tr, &inc_nms->body); - } else { - s->kind = STMT_INLINE_BLOCK; - } - - if (!(inc->flags & INC_FORCED)) { - size_t filename_len = strlen(filename); - if (streq(filename, tr->main_file->filename)) { - err_print(s->where, "Circular #include detected. You can add #force to this #include to force it to be included."); - success = false; goto nms_done; - } - inc_f = str_hash_table_get(&tr->included_files, filename, filename_len); - if (inc_f) { - /* has already been included */ - if (inc_f->flags & INC_FILE_INCLUDING) { - err_print(s->where, "Circular #include detected. You can add #force to this #include to force it to be included."); - success = false; goto nms_done; - } - if (s->kind == STMT_INLINE_BLOCK) s->inline_block = NULL; /* nothing needed here */ - /* just set ident declarations */ - if (!include_stmts_link_to_nms(tr, inc_f->main_nms, inc_f->stmts)) { - success = false; goto nms_done; - } - goto nms_done; - } - inc_f = str_hash_table_insert(&tr->included_files, filename, filename_len); - inc_f->flags |= INC_FILE_INCLUDING; - inc_f->main_nms = tr->nms; - } - { - char *contents = read_file_contents(tr->allocr, filename, s->where); - if (!contents) { - tr->had_include_err = true; - success = false; goto nms_done; - } - - Tokenizer tokr; - tokr_create(&tokr, tr->err_ctx, tr->allocr); - File *file = typer_calloc(tr, 1, sizeof *file); - file->filename = filename; - file->contents = contents; - file->ctx = tr->err_ctx; - - if (!tokenize_file(&tokr, file)) { - success = false; goto nms_done; - } - Parser parser; - parser_create(&parser, tr->globals, &tokr, tr->allocr); - parser.block = tr->block; - ParsedFile parsed_file; - if (!parse_file(&parser, &parsed_file)) { - success = false; goto nms_done; - } - Statement *stmts_inc = parsed_file.stmts; - if (inc_f) { - inc_f->stmts = stmts_inc; - } - if (s->kind == STMT_INLINE_BLOCK) s->inline_block = stmts_inc; - arr_foreach(stmts_inc, Statement, s_incd) { - if (!types_stmt(tr, s_incd)) { - success = false; goto nms_done; - } - } - if (inc_nms) { - inc_nms->body.stmts = stmts_inc; - } - } - nms_done: - if (inc_nms) { - tr->nms = prev_nms; - tr->block = prev_block; - } - if (inc_f) inc_f->flags &= (IncFileFlags)~(IncFileFlags)INC_FILE_INCLUDING; - if (!success) return false; - } break; case STMT_MESSAGE: { Message *m = s->message; char *text = eval_expr_as_cstr(tr, &m->text, "message"); @@ -3965,14 +3801,12 @@ success: return true; } -static void typer_create(Typer *tr, Evaluator *ev, ErrCtx *err_ctx, Allocator *allocr, Identifiers *idents, File *main_file) { +static void typer_create(Typer *tr, Evaluator *ev, ErrCtx *err_ctx, Allocator *allocr, Identifiers *idents) { memset(tr, 0, sizeof *tr); tr->evalr = ev; - tr->main_file = main_file; tr->err_ctx = err_ctx; tr->allocr = allocr; tr->globals = idents; - str_hash_table_create(&tr->included_files, sizeof(IncludedFile), tr->allocr); } static int compare_inits(const void *av, const void *bv) { @@ -3998,10 +3832,6 @@ static Status types_file(Typer *tr, ParsedFile *f) { f->inits = NULL; arr_foreach(f->stmts, Statement, s) { if (!types_stmt(tr, s)) { - if (tr->had_include_err) { - /* stop immediately; prevent too many "undeclared identifier" errors */ - return false; - } ret = false; } } diff --git a/types.h b/types.h index 922a7bf..35e0be4 100644 --- a/types.h +++ b/types.h @@ -915,12 +915,6 @@ enum { INC_TO_NMS = 0x02 }; -typedef struct { - U8 flags; - Expression filename; - char *nms; /* NULL if this is just a plain old #include, otherwise string which can be used with ident_get */ -} Include; - typedef enum { MESSAGE_ERROR, MESSAGE_WARN, @@ -943,7 +937,6 @@ typedef enum { STMT_RET, STMT_BREAK, STMT_CONT, - STMT_INCLUDE, /* turns into STMT_INLINE_BLOCK after typing */ STMT_MESSAGE, STMT_DEFER, STMT_USE, @@ -964,7 +957,6 @@ typedef struct Statement { Declaration *decl; /* we want the pointer to be fixed so that we can refer to it from an identifier */ Expression *expr; Return *ret; - Include *inc; Message *message; /* #error, #warn, #info */ Block *referring_to; /* for break/continue; set during typing */ struct Statement *defer; @@ -1005,8 +997,11 @@ typedef struct Parser { Allocator *allocr; Identifiers *globals; File *file; + File *main_file; /* this is the file which the compiler is invoked on. needed for checking for circular includes. */ Block *block; /* which block are we in? NULL = file scope */ + Namespace *nms; ParsedFile *parsed_file; + StrHashTable included_files; /* maps to IncludedFile */ } Parser; typedef struct { @@ -1044,7 +1039,6 @@ typedef struct Typer { Allocator *allocr; Evaluator *evalr; Identifiers *globals; - File *main_file; /* this is the file which the compiler is invoked on. needed for checking for circular includes. */ Use **uses; /* global used things */ Declaration **in_decls; /* array of declarations we are currently inside */ Block *block; @@ -1057,12 +1051,6 @@ typedef struct Typer { DeclWithCtx *all_globals; /* includes stuff in namespaces, as long as it's not in a function */ IdentID lbl_counter; unsigned long nms_counter; /* counter for namespace IDs */ - StrHashTable included_files; /* maps to IncludedFile */ - /* - have we had an error because we couldn't find a file that was #include'd - (so that we can stop compiling immediately) - */ - bool had_include_err; } Typer; typedef struct CGenerator { -- cgit v1.2.3