diff options
-rw-r--r-- | blockarr.c | 5 | ||||
-rw-r--r-- | identifiers.c | 255 | ||||
-rw-r--r-- | package.c | 8 | ||||
-rw-r--r-- | scope.c | 2 | ||||
-rw-r--r-- | types.c | 8 | ||||
-rw-r--r-- | types.h | 27 |
6 files changed, 145 insertions, 160 deletions
@@ -30,12 +30,9 @@ static void *block_arr_add(BlockArr *arr) { block->n = 1; size_t bytes = arr->item_sz << arr->lg_block_sz; block->data = err_malloc(bytes); - block->last = block->data; return block->data; } else { - last_block->last = (char*)last_block->last + arr->item_sz; - ++last_block->n; - return last_block->last; + return (char *)last_block->data + arr->item_sz * (last_block->n++); } } diff --git a/identifiers.c b/identifiers.c index c562d52..6c17cd7 100644 --- a/identifiers.c +++ b/identifiers.c @@ -5,7 +5,7 @@ */ #if CHAR_MAX - CHAR_MIN > 255 #error "Currently only systems with 8-bit characters can compile toc." -/* TODO: maybe do a run-time error for large characters? */ +/* TODO: not necessary anymore */ #endif /* can this character be used in an identifier? */ @@ -26,102 +26,134 @@ static int is_ident(int c) { return 0; } - -static Identifier ident_new_anonymous(Identifiers *ids) { - IdentTree *node = block_arr_add(&ids->trees); - memset(node, 0, sizeof *node); - node->anonymous = true; - return node; -} - -/* used internally to allocate identifiers */ -static Identifier ident_new(Identifiers *ids, Identifier parent, unsigned char index_in_parent) { - IdentTree *node = ident_new_anonymous(ids); - node->parent = parent; - if (parent) - node->depth = (uint16_t)(parent->depth + 1); - node->index_in_parent = index_in_parent; - node->anonymous = false; - return node; -} - - /* Initialize Identifiers. */ static void idents_create(Identifiers *ids) { - block_arr_create(&ids->trees, 10, sizeof(IdentTree)); /* blocks of 1 << 10 = 1024 */ - ids->root = ident_new(ids, NULL, 0); /* create root tree */ + ids->slots = NULL; ids->nidents = 0; + ids->rseed = 0x27182818; } #if CHAR_MIN < 0 -#define ident_char_to_uchar(c) ((c) < 0 ? (256 + (c)) : (c)) +#define ident_char_to_uchar(c) (unsigned char)((c) < 0 ? (256 + (c)) : (c)) #else -#define ident_char_to_uchar(c) (c) +#define ident_char_to_uchar(c) (unsigned char)(c) #endif #if CHAR_MIN < 0 -#define ident_uchar_to_char(c) ((c) > 127 ? ((c) - 256) : (c)) +#define ident_uchar_to_char(c) (unsigned char)((c) > 127 ? ((c) - 256) : (c)) #else -#define ident_uchar_to_char(c) (c) +#define ident_uchar_to_char(c) (unsigned char)(c) #endif +static U64 ident_hash(char **s) { + U32 x = 0xabcdef01; + U32 y = 0x31415926; + U64 hash = 0; + while (is_ident(**s)) { + hash += (U64)x * ident_char_to_uchar(**s) + y; + x = rand_u32(x); + y = rand_u32(y); + ++*s; + } + return hash; +} + + +static bool ident_eq_str(Identifier i, const char *s) { + char *t = i->text; + while (is_ident(*s) && is_ident(*t)) { + if (*s != *t) return false; + ++s, ++t; + } + return !is_ident(*s) && !is_ident(*t); +} + + + +static IdentSlot **ident_slots_insert(IdentSlot **slots, char *s, size_t i) { + IdentSlot **slot; + size_t nslots = arr_len(slots); + while (1) { + slot = &slots[i]; + if (!*slot) break; + if (s && ident_eq_str(*slot, s)) + break; + i = (i+1) % nslots; + } + return slot; +} + +static Identifier ident_new_anonymous(Identifiers *ids) { + U32 idx = rand_u32(ids->rseed); + ids->rseed = idx; + IdentSlot **slot = ident_slots_insert(ids->slots, NULL, idx % arr_len(ids->slots)); + *slot = err_calloc(1, sizeof **slot); + ++ids->nidents; + (*slot)->anonymous = true; + (*slot)->len = 3; + (*slot)->text = "???"; + return *slot; +} + /* moves s to the char after the identifier */ /* inserts if does not exist. reads until non-ident char is found. */ /* advances past identifier */ static Identifier ident_insert(Identifiers *ids, char **s) { - IdentTree *tree = ids->root; - while (1) { - if (!is_ident(**s)) { - return tree; - } - int c = ident_char_to_uchar(**s); - assert(c >= 0 && c <= 255); - unsigned char c_low = (unsigned char)(c & 0xf); - unsigned char c_high = (unsigned char)(c >> 4); - if (!tree->children[c_low]) { - tree->children[c_low] = ident_new(ids, tree, c_low); + size_t nslots = arr_len(ids->slots); + if (nslots <= ids->nidents) { + IdentSlot **slots = ids->slots; + /* reserve more space */ + IdentSlot **new_slots = NULL; + size_t new_nslots = nslots * 2 + 10; + arr_set_len(&new_slots, new_nslots); + arr_zero(new_slots); + arr_foreach(slots, IdentSlotPtr, slotp) { + IdentSlot *slot = *slotp; + if (slot) { + char *ptr = slot->text; + U64 new_hash = ident_hash(&ptr); + IdentSlot **new_slot = ident_slots_insert(new_slots, slot->text, new_hash % new_nslots); + *new_slot = slot; + } } - tree = tree->children[c_low]; - - if (!tree->children[c_high]) { - tree->children[c_high] = ident_new(ids, tree, c_high); - } - tree = tree->children[c_high]; - ++(*s); + arr_clear(&slots); + ids->slots = new_slots; + nslots = new_nslots; } -} - -static inline size_t ident_len(Identifier i) { - if (i->anonymous) return 3; - return (size_t)(i->depth / 2); + char *original = *s; + U64 hash = ident_hash(s); + IdentSlot **slot = ident_slots_insert(ids->slots, original, hash % arr_len(ids->slots)); + if (!*slot) { + *slot = err_calloc(1, sizeof **slot); + ++ids->nidents; + (*slot)->text = original; + (*slot)->len = (size_t)(*s - original); + } + return *slot; } static char *ident_to_str(Identifier i) { - size_t i_len = ident_len(i); - char *str = err_malloc(i_len + 1); - if (i->anonymous) { - strcpy(str, "???"); - return str; - } - str += i_len; - *str = 0; - while (i->parent) { - --str; - unsigned char c_high = i->index_in_parent; - unsigned char c_low = i->parent->index_in_parent; - char c = (char)ident_uchar_to_char((int)c_low + ((int)c_high << 4)); - *str = c; - i = i->parent->parent; /* go to grandparent (prev char) */ - } + char *str = err_malloc(i->len + 1); + /* for some reason, GCC thinks that i->len is -1 when this is called from type_to_str_ (in release mode) */ + +#if !defined(__clang__) && defined(__GNUC__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wstringop-overflow" +#pragma GCC diagnostic ignored "-Wrestrict" +#endif + memcpy(str, i->text, i->len); + +#if !defined(__clang__) && defined(__GNUC__) +#pragma GCC diagnostic pop +#endif + str[i->len] = 0; return str; } static void fprint_ident(FILE *out, Identifier id) { - char *str = ident_to_str(id); - fprintf(out, "%s", str); - free(str); + fwrite(id->text, 1, id->len, out); } static void fprint_ident_debug(FILE *out, Identifier id) { @@ -142,43 +174,29 @@ static void fprint_ident_reduced_charset(FILE *out, Identifier id) { assert(id); if (id->anonymous) { fprintf(out, "x___"); + return; } - if (id->parent == NULL) return; /* at root */ - fprint_ident_reduced_charset(out, id->parent->parent); /* to go up one character, we need to go to the grandparent */ - int c_low = id->parent->index_in_parent; - int c_high = id->index_in_parent; - int c = c_low + (c_high << 4); - if (c > 127) { - fprintf(out, "x__%x",c); - } else { - char chr = (char)ident_uchar_to_char(c); - fputc(chr, out); + for (char *s = id->text; is_ident(*s); ++s) { + int c = ident_char_to_uchar(*s); + if (c > 127) { + fprintf(out, "x__%x", c); + } else { + putc(*s, out); + } } } -/* NULL = no such identifier */ -static Identifier ident_get(Identifiers *ids, const char *s) { - IdentTree *tree = ids->root; - while (*s) { - int c = ident_char_to_uchar(*s); - assert(c >= 0 && c <= 255); - unsigned char c_low = (unsigned char)(c & 0xf); - unsigned char c_high = (unsigned char)(c >> 4); - tree = tree->children[c_low]; - if (!tree) return NULL; - tree = tree->children[c_high]; - if (!tree) return NULL; - ++s; - } - return tree; +/* NULL = no such identifier. returns identifier "foo" for both "foo\0" and "foo+92384324..." */ +static Identifier ident_get(Identifiers *ids, char *s) { + char *ptr = s; + U64 hash = ident_hash(&ptr); + IdentSlot **slot = ident_slots_insert(ids->slots, s, hash % arr_len(ids->slots)); + return *slot; } static Identifier ident_translate(Identifier i, Identifiers *to_idents) { - /* OPTIM */ if (!i || i->anonymous) return NULL; - char *s = ident_to_str(i); - Identifier new_ident = ident_get(to_idents, s); - free(s); + Identifier new_ident = ident_get(to_idents, i->text); return new_ident; } @@ -195,27 +213,30 @@ static IdentDecl *ident_decl(Identifier i) { return (IdentDecl *)arr_last(i->decls); } -static void ident_tree_free(IdentTree *id) { - if (!id) return; - arr_clear(&id->decls); - for (int i = 0; i < TREE_NCHILDREN; ++i) - ident_tree_free(id->children[i]); -} - static void idents_free(Identifiers *ids) { - ident_tree_free(ids->root); - block_arr_free(&ids->trees); + arr_foreach(ids->slots, IdentSlotPtr, slot) { + free(*slot); + } + arr_clear(&ids->slots); } +#ifdef TOC_DEBUG static void idents_test(void) { Identifiers ids; char b[] = "foo_variable bar"; char *s = b; idents_create(&ids); - ident_insert(&ids, &s); + Identifier i1 = ident_insert(&ids, &s); assert(strcmp(s, " bar") == 0); + char b2[] = "foo_variable+6"; + s = b2; + Identifier i2 = ident_insert(&ids, &s); + assert(strcmp(s, "+6") == 0); + assert(i1 == i2); + idents_free(&ids); } +#endif static int ident_index_in_decl(Identifier i, Declaration *d) { int index = 0; @@ -227,24 +248,6 @@ static int ident_index_in_decl(Identifier i, Declaration *d) { return -1; } -static bool ident_eq_str(Identifier i, const char *s) { - const char *t = s + (strlen(s) - 1); - while (1) { - if (!i->parent) { - return false; - } - int c_low = i->parent->index_in_parent; - int c_high = i->index_in_parent; - int c = ident_uchar_to_char(c_low + (c_high << 4)); - if (c != *t) return false; - i = i->parent->parent; - if (t > s) --t; - else break; - } - if (i->parent) return false; /* not at root */ - return true; -} - static Location idecl_where(IdentDecl *id) { switch (id->kind) { @@ -1133,7 +1133,7 @@ static bool exptr_finish(Exporter *ex) { Identifier i = *ident; assert(i->export_name); export_vlq(ex, i->export_id); - export_len(ex, ident_len(i)); + export_len(ex, i->len); fprint_ident(ex->out, i); } @@ -1163,12 +1163,10 @@ static bool import_footer(Importer *im) { for (i = 0; i < n_named_idents; ++i) { U64 id = import_vlq(im); size_t name_len = import_vlq(im); - char *name = err_malloc(name_len+1); + char *name = imptr_malloc(im, name_len+1); name[name_len] = 0; fread(name, 1, name_len, im->in); - char *copy = name; /* don't change the value of name */ - im->ident_map[id] = ident_insert(&im->pkg->idents, ©); - free(name); + im->ident_map[id] = ident_insert(&im->pkg->idents, &name); } for (i = 1; i <= im->max_ident_id; ++i) { if (!im->ident_map[i]) { @@ -42,7 +42,7 @@ static void remove_ident_decls(Block *b, Declaration *d) { U64 i = 0; bool is_tuple = d->type.kind == TYPE_TUPLE; arr_foreach(d->idents, Identifier, ident) { - IdentTree *id_info = *ident; + IdentSlot *id_info = *ident; IdentDecl **decls = &id_info->decls; IdentDecl *last_decl = arr_last(*decls); if (last_decl && last_decl->scope == b) { @@ -978,13 +978,11 @@ static bool types_expr(Typer *tr, Expression *e) { return false; } size_t name_str_len = (size_t)name_str.n; - char *name_cstr = err_malloc(name_str_len + 1); + char *name_cstr = typer_malloc(tr, name_str_len + 1); memcpy(name_cstr, name_str.data, name_str_len); name_cstr[name_str.n] = '\0'; - /* advance copy so we can free the original */ - char *copy = name_cstr; - Identifier name_ident = ident_insert(tr->idents, ©); - free(name_cstr); + Identifier name_ident = ident_insert(tr->idents, &name_cstr); + assert(!*name_cstr); e->pkg.name_ident = name_ident; if (!name_ident->pkg) { char *filename = typer_malloc(tr, name_str_len + 5); @@ -105,7 +105,6 @@ typedef struct Allocator { typedef struct ArrBlock { void *data; size_t n; /* number of things in this block so far */ - void *last; /* last one of them */ } ArrBlock; typedef struct BlockArr { @@ -170,33 +169,23 @@ typedef struct IdentDecl { U16 flags; } IdentDecl; -/* -The way you search an identifier in a tree is: -root.children[low part of 1st char].children[high part of 1st char] -.children[low part of 2nd char]... - -*/ - -#define TREE_NCHILDREN 16 -typedef struct IdentTree { - /* zero value is an empty trie */ - uint16_t depth; - unsigned char index_in_parent; /* index of this in .parent.children */ +typedef struct IdentSlot { bool export_name; /* is this identifier's name important? */ bool anonymous; /* is this identifier not part of a tree? */ + char *text; /* actual name of the identifier */ + size_t len; /* length of name */ U64 export_id; /* 0 if there's no exported identifier here, otherwise unique positive integer associated with this identifier */ struct Package *pkg; /* NULL if this is not associated with a package */ - struct IdentTree *parent; - struct IdentTree *children[TREE_NCHILDREN]; IdentDecl *decls; /* array of declarations of this identifier */ -} IdentTree; +} IdentSlot; -typedef IdentTree *Identifier; +typedef IdentSlot *Identifier; +typedef IdentSlot *IdentSlotPtr; typedef struct Identifiers { - BlockArr trees; - IdentTree *root; + IdentSlot **slots; U64 nidents; + U32 rseed; } Identifiers; typedef enum { |