From cc7d494226f41d76208bd2a0613a26435247cc87 Mon Sep 17 00:00:00 2001 From: Leo Tenenbaum Date: Sun, 1 Sep 2019 15:21:25 -0400 Subject: improved the way identifiers worked (now they use a single block array) --- abbrevs.txt | 1 + base_cgen.c | 23 ++------ build.sh | 15 +++++- cgen.c | 18 +++++++ eval.c | 1 + identifiers.c | 160 ++++++++++++++++++++++++++++++++++---------------------- main.c | 5 +- out.c | 4 +- out.h | 2 +- parse.c | 15 +++--- test.toc | 8 ++- tokenizer.c | 2 +- types.c | 10 ++-- util/blockarr.c | 15 +++--- util/err.c | 16 +++--- 15 files changed, 175 insertions(+), 120 deletions(-) diff --git a/abbrevs.txt b/abbrevs.txt index 89ba9b4..57d5361 100644 --- a/abbrevs.txt +++ b/abbrevs.txt @@ -6,4 +6,5 @@ stmt - statement tokr - tokenizer str - string num - number +fn - function eof - end of file diff --git a/base_cgen.c b/base_cgen.c index 1e79aaa..125146b 100644 --- a/base_cgen.c +++ b/base_cgen.c @@ -12,6 +12,7 @@ typedef struct { int indent_level; bool indent_next; /* should the next thing written be indented? */ CGenWritingTo writing_to; + Identifier main_ident; } CGenerator; static FILE *cgen_writing_to(CGenerator *g) { @@ -79,23 +80,6 @@ static void cgen_write_line_comment(CGenerator *g, const char *fmt, ...) { cgen_write(g, " */\n"); } -static void cgen_create(CGenerator *g, FILE *c_out, FILE *h_out, const char *h_filename) { - g->c_out = c_out; - g->h_out = h_out; - g->anon_fn_count = 0; - g->indent_level = 0; - g->block = NULL; - g->indent_next = true; - - g->writing_to = CGEN_WRITING_TO_H; - cgen_write(g, "#include \n" - "#include \n"); - - g->writing_to = CGEN_WRITING_TO_C; - cgen_write(g, "#include \"%s\"\n", h_filename); - cgen_writeln(g, ""); /* extra newline between includes and code */ -} - /* Pass NULL for where if you don't want to check if it's declared */ static bool cgen_fn_name(CGenerator *g, FnExpr *f, Location *where); @@ -247,10 +231,11 @@ static void cgen_type_post(CGenerator *g, Type *t) { static bool cgen_fn_name(CGenerator *g, FnExpr *f, Location *where) { if (f->name) { - if (ident_eq_str(f->name, "main")) + if (f->name == g->main_ident) { cgen_write(g, "main__"); - else + } else { return cgen_ident(g, f->name, where); + } } else { cgen_write(g, "a___"); } diff --git a/build.sh b/build.sh index 2cce5bb..bf4b3d8 100755 --- a/build.sh +++ b/build.sh @@ -1,3 +1,16 @@ #!/bin/bash CC=gcc -$CC -o toc main.c -O0 -g3 -Wall -Wextra -Wpedantic -Wconversion -Wshadow -std=c11 || exit 1 + +# Possible extra build flags +# these are for compiling the compiler, and NOT for compiling the program itself. +# -DNONZERO_NULL_PTRS +# - must be set if the zero value of a pointer (as might be set by calloc/memset) +# is not the NULL pointer. + +ADDITIONAL_FLAGS= + +WARNINGS='-Wall -Wextra -Wpedantic -Wconversion -Wshadow' +DEBUG_FLAGS="-O0 -g3 $WARNINGS -std=c11" +RELEASE_FLAGS="-O3 -DNDEBUG $WARNINGS -std=c11" + +$CC $DEBUG_FLAGS $ADDITIONAL_FLAGS -o toc main.c || exit 1 diff --git a/cgen.c b/cgen.c index c42cbe4..9353084 100644 --- a/cgen.c +++ b/cgen.c @@ -1,3 +1,21 @@ +static void cgen_create(CGenerator *g, Identifiers *ids, FILE *c_out, FILE *h_out, const char *h_filename) { + g->c_out = c_out; + g->h_out = h_out; + g->anon_fn_count = 0; + g->indent_level = 0; + g->block = NULL; + g->indent_next = true; + g->main_ident = ident_get(ids, "main"); + + g->writing_to = CGEN_WRITING_TO_H; + cgen_write(g, "#include \n" + "#include \n"); + + g->writing_to = CGEN_WRITING_TO_C; + cgen_write(g, "#include \"%s\"\n", h_filename); + cgen_writeln(g, ""); /* extra newline between includes and code */ +} + static bool cgen_expr(CGenerator *g, Expression *e) { switch (e->kind) { case EXPR_INT_LITERAL: diff --git a/eval.c b/eval.c index 6e6195d..a131c03 100644 --- a/eval.c +++ b/eval.c @@ -86,6 +86,7 @@ static bool eval_expr_as_int(Expression *e, Integer *i) { info_print(d->where, "Declaration was here."); return false; } + /* TODO: tuples */ eval_expr_as_int(&d->expr, i); return true; diff --git a/identifiers.c b/identifiers.c index a960fb7..b7a2b3c 100644 --- a/identifiers.c +++ b/identifiers.c @@ -1,121 +1,155 @@ -/* OPTIM: This is not ideal. There should be one dynamic array of tree nodes. */ - typedef struct { struct Block *scope; /* NULL for file scope */ struct Declaration *decl; } IdentDecl; +/* +The way you search an identifier in a tree is: +root.children[low part of 1st char].children[high part of 1st char] +.children[low part of 2nd char]... + +*/ + +#define TREE_NCHILDREN 16 typedef struct IdentTree { /* zero value is an empty trie */ - long id; - int len; /* length of identifier = depth in tree */ - struct IdentTree *children; + uint16_t depth; + unsigned char index_in_parent; /* index of this in .parent.children */ struct IdentTree *parent; + struct IdentTree *children[TREE_NCHILDREN]; Array decls; /* array of declarations of this identifier */ unsigned long c_fn_reps; /* number of repetitions of this identifier in the C output--only used for functions */ - size_t depth; } IdentTree; typedef IdentTree *Identifier; -/* MUST be zero-initialized before use */ typedef struct { - IdentTree tree_root; - long curr_id; + BlockArr trees; + IdentTree *root; } Identifiers; -static const char identifier_chars[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_"; - -#define NIDENTIFIER_CHARS ((int)((sizeof identifier_chars) - 1)) /* -1 for null char */ +#if CHAR_MAX - CHAR_MIN > 255 +#error "Currently only systems with 8-bit characters can compile toc." +/* TODO: maybe do a run-time error for large characters? */ +#endif -/* returns -1 if c is not a valid identifier character, its index in identifier_chars otherwise */ -static int ident_char_index(int c) { +/* can this character be used in an identifier? */ +static int isident(int c) { if (c >= 'a' && c <= 'z') - return c - 'a'; + return 1; if (c >= 'A' && c <= 'Z') - return c - 'A' + 26; + return 1; if (c >= '0' && c <= '9') - return c - '0' + 52; - if (c == '_') return 62; - return -1; + return 1; + if (c == '_') return 1; +#if CHAR_MIN < 0 + if (c < 0) /* on systems where char = signed char, UTF-8 characters are probably < 0? */ + return 1; +#endif + if (c > 127) /* UTF-8 */ + return 1; + return 0; } -/* can this character be used in an identifier? */ -static int isident(int c) { - return ident_char_index(c) != -1; /* OPTIM: Write separate function */ + +/* used internally to allocate identifiers */ +static Identifier ident_new(Identifiers *ids, Identifier parent, unsigned char index_in_parent) { + IdentTree *tree = block_arr_add(&ids->trees); + memset(tree, 0, sizeof *tree); /* use zero value of IdentTree */ +#if NONZERO_NULL_PTRS + tree->parent = NULL; + for (size_t i = 0; i < TREE_NCHILDREN; i++) + tree->children[i] = NULL; +#endif + tree->parent = parent; + tree->index_in_parent = index_in_parent; + return tree; +} + +/* Initialize Identifiers. */ +static void idents_create(Identifiers *ids) { + block_arr_create(&ids->trees, 10, sizeof(IdentTree)); /* blocks of 1 << 10 = 1024 */ + ids->root = ident_new(ids, NULL, 0); /* create root tree */ } + /* moves s to the char after the identifier */ /* inserts if does not exist. reads until non-ident char is found. */ /* advances past identifier */ static Identifier ident_insert(Identifiers *ids, char **s) { - IdentTree *t = &ids->tree_root; + IdentTree *tree = ids->root; while (1) { - char c = **s; - if (!isident(c)) { - if (t->id == 0) t->id = ++ids->curr_id; - return t; + if (!isident(**s)) { + return tree; } + int c = (**s) - CHAR_MIN; + assert(c >= 0 && c <= 255); + unsigned char c_low = (unsigned char)(c & 0xf); + unsigned char c_high = (unsigned char)(c >> 4); + if (!tree->children[c_low]) { + tree->children[c_low] = ident_new(ids, tree, c_low); + } + tree = tree->children[c_low]; - if (!t->children) { - /* allocate children */ - t->children = err_calloc(NIDENTIFIER_CHARS, sizeof *t->children); - for (int i = 0; i < NIDENTIFIER_CHARS; i++) { - t->children[i].parent = t; /* child's parent = self */ - t->children[i].depth = t->depth + 1; - } + if (!tree->children[c_high]) { + tree->children[c_high] = ident_new(ids, tree, c_high); } - t = &t->children[ident_char_index(c)]; + tree = tree->children[c_high]; (*s)++; } } static void fprint_ident(FILE *out, Identifier id) { + assert(id); if (id->parent == NULL) return; /* at root */ - /* OPTIM: Use malloc(id->len)???? would probably use less mem for long idents, but - it's on the heap */ - fprint_ident(out, id->parent); - fputc(identifier_chars[id - id->parent->children /* index of self in parent */], out); + fprint_ident(out, id->parent->parent); /* to go up one character, we need to go to the grandparent */ + int c_low = id->parent->index_in_parent; + int c_high = id->index_in_parent; + int c = c_low + (c_high << 4) + CHAR_MIN; + fputc(c, out); } -static bool ident_eq_str(Identifier i, const char *s) { - size_t len = strlen(s); - if (i->depth != len) return false; - s += len - 1; - while (i->parent != NULL) { - if (identifier_chars[i - i->parent->children /* index of self in parent */] != *s) - return false; - i = i->parent; - if (i->parent != NULL) - s--; +/* NULL = no such identifier */ +static Identifier ident_get(Identifiers *ids, const char *s) { + IdentTree *tree = ids->root; + while (*s) { + int c = (*s) - CHAR_MIN; + assert(c >= 0 && c <= 255); + unsigned char c_low = (unsigned char)(c & 0xf); + unsigned char c_high = (unsigned char)(c >> 4); + tree = tree->children[c_low]; + if (!tree) return NULL; + tree = tree->children[c_high]; + if (!tree) return NULL; + s++; } - return true; + return tree; } static char *ident_to_str(Identifier i) { - char *str = malloc(i->depth + 1); - str += i->depth; + size_t i_len = (size_t)(i->depth / 2); /* length = depth / 2 */ + char *str = malloc(i_len + 1); + str += i_len; *str = 0; - while (i->parent != NULL) { + + while (i) { str--; - *str = identifier_chars[i - i->parent->children]; - i = i->parent; + unsigned char c_high = i->index_in_parent; + unsigned char c_low = i->parent->index_in_parent; + char c = (char)(CHAR_MIN + (int)(c_low + (c_high << 4))); + *str = c; + i = i->parent->parent; /* go to grandparent (prev char) */ } + return str; } static IdentDecl *ident_decl(Identifier i) { + assert(i->decls.item_sz); return (IdentDecl*)arr_last(&i->decls); } -static void idents_free_tree(IdentTree *tree) { - if (!tree->children) return; - for (int i = 0; i < NIDENTIFIER_CHARS; i++) - idents_free_tree(&tree->children[i]); - free(tree->children); -} - static void idents_free(Identifiers *ids) { - idents_free_tree(&ids->tree_root); + block_arr_free(&ids->trees); } diff --git a/main.c b/main.c index babecc7..1721269 100644 --- a/main.c +++ b/main.c @@ -36,7 +36,8 @@ int main(int argc, char **argv) { fclose(in); err_filename = in_filename; - Identifiers file_idents = {0}; + Identifiers file_idents; + idents_create(&file_idents); Tokenizer t; tokr_create(&t, &file_idents); if (!tokenize_string(&t, contents)) { @@ -72,7 +73,7 @@ int main(int argc, char **argv) { FILE *c_out = fopen(c_out_filename, "w"); FILE *h_out = fopen(h_out_filename, "w"); CGenerator cgen; - cgen_create(&cgen, c_out, h_out, h_out_filename); + cgen_create(&cgen, &file_idents, c_out, h_out, h_out_filename); if (!cgen_file(&cgen, &f)) { err_fprint(TEXT_IMPORTANT("Errors occured while generating C code.\n")); return EXIT_FAILURE; diff --git a/out.c b/out.c index d3c1aa0..d363960 100644 --- a/out.c +++ b/out.c @@ -1,10 +1,10 @@ #include "out.h" /* toc */ -void foo(float a[10], int64_t (*out__)[3]) { +void foo(void) { } void main__(void) { - void (*asdkofhj)(int64_t (*out__)[3]) = NULL; + foo(); } int main(void) { diff --git a/out.h b/out.h index 0e7be5f..8d1ba2b 100644 --- a/out.h +++ b/out.h @@ -1,4 +1,4 @@ #include #include -void foo(float a[10], int64_t (*out__)[3]); +void foo(void); void main__(void); diff --git a/parse.c b/parse.c index f12c683..e7f1016 100644 --- a/parse.c +++ b/parse.c @@ -561,7 +561,7 @@ static bool parse_fn_expr(Parser *p, FnExpr *f) { Tokenizer *t = p->tokr; /* only called when token is fn */ assert(token_is_kw(t->token, KW_FN)); - f->name = NULL; + f->name = 0; t->token++; if (!token_is_kw(t->token, KW_LPAREN)) { tokr_err(t, "Expected '(' after 'fn'."); @@ -947,17 +947,18 @@ static bool parse_single_type_in_decl(Parser *p, Declaration *d) { only keep track of file scoped declarations--- block enter/exit code will handle the rest */ + IdentTree *ident_info = *ident; if (p->block == NULL) { - if ((*ident)->decls.len) { + if (ident_info->decls.len) { /* this was already declared! */ - IdentDecl *prev = (*ident)->decls.data; + IdentDecl *prev = ident_info->decls.data; tokr_err(t, "Re-declaration of identifier in global scope."); info_print(prev->decl->where, "Previous declaration was here."); return false; } - assert(!(*ident)->decls.item_sz); - arr_create(&(*ident)->decls, sizeof(IdentDecl)); - IdentDecl *ident_decl = arr_add(&(*ident)->decls); + assert(!ident_info->decls.item_sz); /* we shouldn't have created this array already */ + arr_create(&ident_info->decls, sizeof(IdentDecl)); + IdentDecl *ident_decl = arr_add(&ident_info->decls); ident_decl->decl = d; ident_decl->scope = NULL; } @@ -1179,7 +1180,7 @@ static void fprint_param(FILE *out, Param *p) { static void fprint_stmt(FILE *out, Statement *s); -static void fprint_block(FILE *out, Block *b) { +static void fprint_block(FILE *out, Block *b) { fprintf(out, "{\n"); arr_foreach(&b->stmts, Statement, stmt) { fprint_stmt(out, stmt); diff --git a/test.toc b/test.toc index e468430..10e160d 100644 --- a/test.toc +++ b/test.toc @@ -1,7 +1,5 @@ -foo @= fn(a: [10]float) [3]int { +foo @= fn() { }; - -main @ fn() = fn() { -#C #C #C - asdkofhj : fn() [3]int; +main @= fn() { + foo(); }; diff --git a/tokenizer.c b/tokenizer.c index abf6477..032085a 100644 --- a/tokenizer.c +++ b/tokenizer.c @@ -145,7 +145,7 @@ static void fprint_token(FILE *out, Token *t) { fprintf(out, "keyword: %s", keywords[t->kw]); break; case TOKEN_IDENT: - fprintf(out, "identifier: %ld:", t->ident->id); + fprintf(out, "identifier: %p: ", (void*)t->ident); fprint_ident(out, t->ident); break; case TOKEN_NUM_LITERAL: diff --git a/types.c b/types.c index 0b8c052..0bfc3ff 100644 --- a/types.c +++ b/types.c @@ -4,7 +4,8 @@ static bool block_enter(Block *b) { if (stmt->kind == STMT_DECL) { Declaration *decl = &stmt->decl; arr_foreach(&decl->idents, Identifier, ident) { - Array *decls = &(*ident)->decls; + IdentTree *id_info = *ident; + Array *decls = &id_info->decls; if (decls->len) { /* check that it hasn't been declared in this block */ IdentDecl *prev = arr_last(decls); @@ -16,7 +17,7 @@ static bool block_enter(Block *b) { } } else { /* array not initialized yet */ - arr_create(&(*ident)->decls, sizeof(IdentDecl)); + arr_create(decls, sizeof(IdentDecl)); } IdentDecl *ident_decl = arr_add(decls); @@ -35,7 +36,8 @@ static bool block_exit(Block *b) { if (stmt->kind == STMT_DECL) { Declaration *decl = &stmt->decl; arr_foreach(&decl->idents, Identifier, ident) { - Array *decls = &(*ident)->decls; + IdentTree *id_info = *ident; + Array *decls = &id_info->decls; assert(decls->item_sz); IdentDecl *last_decl = arr_last(decls); if (last_decl->scope == b) { @@ -249,7 +251,7 @@ static bool type_of_expr(Expression *e, Type *t) { Type fn_type; if (f->kind == EXPR_IDENT) { /* allow calling a function before declaring it */ - if (!type_of_ident(e->where, e->ident, t, true)) return false; + if (!type_of_ident(f->where, f->ident, &fn_type, true)) return false; } else { if (!type_of_expr(f, &fn_type)) return false; } diff --git a/util/blockarr.c b/util/blockarr.c index c85df5a..03d44b2 100644 --- a/util/blockarr.c +++ b/util/blockarr.c @@ -21,13 +21,13 @@ typedef struct { Note: the block size must be a power of 2, to use right shifting instead of division (for optimization)! */ -void block_arr_create(BlockArr *arr, int lg_block_sz, size_t item_sz) { +static void block_arr_create(BlockArr *arr, int lg_block_sz, size_t item_sz) { arr_create(&arr->blocks, sizeof(ArrBlock)); arr->item_sz = item_sz; arr->lg_block_sz = lg_block_sz; } -void *block_arr_add(BlockArr *arr) { +static void *block_arr_add(BlockArr *arr) { ArrBlock *last_block; last_block = arr_last(&arr->blocks); @@ -46,12 +46,13 @@ void *block_arr_add(BlockArr *arr) { } } -/* Don't need this yet. */ -/* void *block_arr_get(BlockArr *arr, size_t index) { */ - -/* } */ +static inline void *block_arr_get(BlockArr *arr, size_t index) { + size_t block_index = index >> arr->lg_block_sz; + ArrBlock *block = (ArrBlock*)arr->blocks.data + block_index; + return (char*)block->data + arr->item_sz * index; +} -void block_arr_free(BlockArr *arr) { +static void block_arr_free(BlockArr *arr) { arr_foreach(&arr->blocks, ArrBlock, block) { free(block->data); } diff --git a/util/err.c b/util/err.c index e53b16f..1a05e9d 100644 --- a/util/err.c +++ b/util/err.c @@ -111,14 +111,14 @@ static void *err_malloc(size_t size) { return ret; } -static void *err_calloc(size_t n, size_t size) { - void *ret = calloc(n, size); - if (!ret) { - fprintf(stderr, "Error: Out of memory.\n"); - abort(); - } - return ret; -} +/* static void *err_calloc(size_t n, size_t size) { */ +/* void *ret = calloc(n, size); */ +/* if (!ret) { */ +/* fprintf(stderr, "Error: Out of memory.\n"); */ +/* abort(); */ +/* } */ +/* return ret; */ +/* } */ static void *err_realloc(void *data, size_t new_size) { void *ret = realloc(data, new_size); -- cgit v1.2.3