summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--blockarr.c5
-rw-r--r--identifiers.c255
-rw-r--r--package.c8
-rw-r--r--scope.c2
-rw-r--r--types.c8
-rw-r--r--types.h27
6 files changed, 145 insertions, 160 deletions
diff --git a/blockarr.c b/blockarr.c
index a7340e2..f5e82e8 100644
--- a/blockarr.c
+++ b/blockarr.c
@@ -30,12 +30,9 @@ static void *block_arr_add(BlockArr *arr) {
block->n = 1;
size_t bytes = arr->item_sz << arr->lg_block_sz;
block->data = err_malloc(bytes);
- block->last = block->data;
return block->data;
} else {
- last_block->last = (char*)last_block->last + arr->item_sz;
- ++last_block->n;
- return last_block->last;
+ return (char *)last_block->data + arr->item_sz * (last_block->n++);
}
}
diff --git a/identifiers.c b/identifiers.c
index c562d52..6c17cd7 100644
--- a/identifiers.c
+++ b/identifiers.c
@@ -5,7 +5,7 @@
*/
#if CHAR_MAX - CHAR_MIN > 255
#error "Currently only systems with 8-bit characters can compile toc."
-/* TODO: maybe do a run-time error for large characters? */
+/* TODO: not necessary anymore */
#endif
/* can this character be used in an identifier? */
@@ -26,102 +26,134 @@ static int is_ident(int c) {
return 0;
}
-
-static Identifier ident_new_anonymous(Identifiers *ids) {
- IdentTree *node = block_arr_add(&ids->trees);
- memset(node, 0, sizeof *node);
- node->anonymous = true;
- return node;
-}
-
-/* used internally to allocate identifiers */
-static Identifier ident_new(Identifiers *ids, Identifier parent, unsigned char index_in_parent) {
- IdentTree *node = ident_new_anonymous(ids);
- node->parent = parent;
- if (parent)
- node->depth = (uint16_t)(parent->depth + 1);
- node->index_in_parent = index_in_parent;
- node->anonymous = false;
- return node;
-}
-
-
/* Initialize Identifiers. */
static void idents_create(Identifiers *ids) {
- block_arr_create(&ids->trees, 10, sizeof(IdentTree)); /* blocks of 1 << 10 = 1024 */
- ids->root = ident_new(ids, NULL, 0); /* create root tree */
+ ids->slots = NULL;
ids->nidents = 0;
+ ids->rseed = 0x27182818;
}
#if CHAR_MIN < 0
-#define ident_char_to_uchar(c) ((c) < 0 ? (256 + (c)) : (c))
+#define ident_char_to_uchar(c) (unsigned char)((c) < 0 ? (256 + (c)) : (c))
#else
-#define ident_char_to_uchar(c) (c)
+#define ident_char_to_uchar(c) (unsigned char)(c)
#endif
#if CHAR_MIN < 0
-#define ident_uchar_to_char(c) ((c) > 127 ? ((c) - 256) : (c))
+#define ident_uchar_to_char(c) (unsigned char)((c) > 127 ? ((c) - 256) : (c))
#else
-#define ident_uchar_to_char(c) (c)
+#define ident_uchar_to_char(c) (unsigned char)(c)
#endif
+static U64 ident_hash(char **s) {
+ U32 x = 0xabcdef01;
+ U32 y = 0x31415926;
+ U64 hash = 0;
+ while (is_ident(**s)) {
+ hash += (U64)x * ident_char_to_uchar(**s) + y;
+ x = rand_u32(x);
+ y = rand_u32(y);
+ ++*s;
+ }
+ return hash;
+}
+
+
+static bool ident_eq_str(Identifier i, const char *s) {
+ char *t = i->text;
+ while (is_ident(*s) && is_ident(*t)) {
+ if (*s != *t) return false;
+ ++s, ++t;
+ }
+ return !is_ident(*s) && !is_ident(*t);
+}
+
+
+
+static IdentSlot **ident_slots_insert(IdentSlot **slots, char *s, size_t i) {
+ IdentSlot **slot;
+ size_t nslots = arr_len(slots);
+ while (1) {
+ slot = &slots[i];
+ if (!*slot) break;
+ if (s && ident_eq_str(*slot, s))
+ break;
+ i = (i+1) % nslots;
+ }
+ return slot;
+}
+
+static Identifier ident_new_anonymous(Identifiers *ids) {
+ U32 idx = rand_u32(ids->rseed);
+ ids->rseed = idx;
+ IdentSlot **slot = ident_slots_insert(ids->slots, NULL, idx % arr_len(ids->slots));
+ *slot = err_calloc(1, sizeof **slot);
+ ++ids->nidents;
+ (*slot)->anonymous = true;
+ (*slot)->len = 3;
+ (*slot)->text = "???";
+ return *slot;
+}
+
/* moves s to the char after the identifier */
/* inserts if does not exist. reads until non-ident char is found. */
/* advances past identifier */
static Identifier ident_insert(Identifiers *ids, char **s) {
- IdentTree *tree = ids->root;
- while (1) {
- if (!is_ident(**s)) {
- return tree;
- }
- int c = ident_char_to_uchar(**s);
- assert(c >= 0 && c <= 255);
- unsigned char c_low = (unsigned char)(c & 0xf);
- unsigned char c_high = (unsigned char)(c >> 4);
- if (!tree->children[c_low]) {
- tree->children[c_low] = ident_new(ids, tree, c_low);
+ size_t nslots = arr_len(ids->slots);
+ if (nslots <= ids->nidents) {
+ IdentSlot **slots = ids->slots;
+ /* reserve more space */
+ IdentSlot **new_slots = NULL;
+ size_t new_nslots = nslots * 2 + 10;
+ arr_set_len(&new_slots, new_nslots);
+ arr_zero(new_slots);
+ arr_foreach(slots, IdentSlotPtr, slotp) {
+ IdentSlot *slot = *slotp;
+ if (slot) {
+ char *ptr = slot->text;
+ U64 new_hash = ident_hash(&ptr);
+ IdentSlot **new_slot = ident_slots_insert(new_slots, slot->text, new_hash % new_nslots);
+ *new_slot = slot;
+ }
}
- tree = tree->children[c_low];
-
- if (!tree->children[c_high]) {
- tree->children[c_high] = ident_new(ids, tree, c_high);
- }
- tree = tree->children[c_high];
- ++(*s);
+ arr_clear(&slots);
+ ids->slots = new_slots;
+ nslots = new_nslots;
}
-}
-
-static inline size_t ident_len(Identifier i) {
- if (i->anonymous) return 3;
- return (size_t)(i->depth / 2);
+ char *original = *s;
+ U64 hash = ident_hash(s);
+ IdentSlot **slot = ident_slots_insert(ids->slots, original, hash % arr_len(ids->slots));
+ if (!*slot) {
+ *slot = err_calloc(1, sizeof **slot);
+ ++ids->nidents;
+ (*slot)->text = original;
+ (*slot)->len = (size_t)(*s - original);
+ }
+ return *slot;
}
static char *ident_to_str(Identifier i) {
- size_t i_len = ident_len(i);
- char *str = err_malloc(i_len + 1);
- if (i->anonymous) {
- strcpy(str, "???");
- return str;
- }
- str += i_len;
- *str = 0;
- while (i->parent) {
- --str;
- unsigned char c_high = i->index_in_parent;
- unsigned char c_low = i->parent->index_in_parent;
- char c = (char)ident_uchar_to_char((int)c_low + ((int)c_high << 4));
- *str = c;
- i = i->parent->parent; /* go to grandparent (prev char) */
- }
+ char *str = err_malloc(i->len + 1);
+ /* for some reason, GCC thinks that i->len is -1 when this is called from type_to_str_ (in release mode) */
+
+#if !defined(__clang__) && defined(__GNUC__)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wstringop-overflow"
+#pragma GCC diagnostic ignored "-Wrestrict"
+#endif
+ memcpy(str, i->text, i->len);
+
+#if !defined(__clang__) && defined(__GNUC__)
+#pragma GCC diagnostic pop
+#endif
+ str[i->len] = 0;
return str;
}
static void fprint_ident(FILE *out, Identifier id) {
- char *str = ident_to_str(id);
- fprintf(out, "%s", str);
- free(str);
+ fwrite(id->text, 1, id->len, out);
}
static void fprint_ident_debug(FILE *out, Identifier id) {
@@ -142,43 +174,29 @@ static void fprint_ident_reduced_charset(FILE *out, Identifier id) {
assert(id);
if (id->anonymous) {
fprintf(out, "x___");
+ return;
}
- if (id->parent == NULL) return; /* at root */
- fprint_ident_reduced_charset(out, id->parent->parent); /* to go up one character, we need to go to the grandparent */
- int c_low = id->parent->index_in_parent;
- int c_high = id->index_in_parent;
- int c = c_low + (c_high << 4);
- if (c > 127) {
- fprintf(out, "x__%x",c);
- } else {
- char chr = (char)ident_uchar_to_char(c);
- fputc(chr, out);
+ for (char *s = id->text; is_ident(*s); ++s) {
+ int c = ident_char_to_uchar(*s);
+ if (c > 127) {
+ fprintf(out, "x__%x", c);
+ } else {
+ putc(*s, out);
+ }
}
}
-/* NULL = no such identifier */
-static Identifier ident_get(Identifiers *ids, const char *s) {
- IdentTree *tree = ids->root;
- while (*s) {
- int c = ident_char_to_uchar(*s);
- assert(c >= 0 && c <= 255);
- unsigned char c_low = (unsigned char)(c & 0xf);
- unsigned char c_high = (unsigned char)(c >> 4);
- tree = tree->children[c_low];
- if (!tree) return NULL;
- tree = tree->children[c_high];
- if (!tree) return NULL;
- ++s;
- }
- return tree;
+/* NULL = no such identifier. returns identifier "foo" for both "foo\0" and "foo+92384324..." */
+static Identifier ident_get(Identifiers *ids, char *s) {
+ char *ptr = s;
+ U64 hash = ident_hash(&ptr);
+ IdentSlot **slot = ident_slots_insert(ids->slots, s, hash % arr_len(ids->slots));
+ return *slot;
}
static Identifier ident_translate(Identifier i, Identifiers *to_idents) {
- /* OPTIM */
if (!i || i->anonymous) return NULL;
- char *s = ident_to_str(i);
- Identifier new_ident = ident_get(to_idents, s);
- free(s);
+ Identifier new_ident = ident_get(to_idents, i->text);
return new_ident;
}
@@ -195,27 +213,30 @@ static IdentDecl *ident_decl(Identifier i) {
return (IdentDecl *)arr_last(i->decls);
}
-static void ident_tree_free(IdentTree *id) {
- if (!id) return;
- arr_clear(&id->decls);
- for (int i = 0; i < TREE_NCHILDREN; ++i)
- ident_tree_free(id->children[i]);
-}
-
static void idents_free(Identifiers *ids) {
- ident_tree_free(ids->root);
- block_arr_free(&ids->trees);
+ arr_foreach(ids->slots, IdentSlotPtr, slot) {
+ free(*slot);
+ }
+ arr_clear(&ids->slots);
}
+#ifdef TOC_DEBUG
static void idents_test(void) {
Identifiers ids;
char b[] = "foo_variable bar";
char *s = b;
idents_create(&ids);
- ident_insert(&ids, &s);
+ Identifier i1 = ident_insert(&ids, &s);
assert(strcmp(s, " bar") == 0);
+ char b2[] = "foo_variable+6";
+ s = b2;
+ Identifier i2 = ident_insert(&ids, &s);
+ assert(strcmp(s, "+6") == 0);
+ assert(i1 == i2);
+
idents_free(&ids);
}
+#endif
static int ident_index_in_decl(Identifier i, Declaration *d) {
int index = 0;
@@ -227,24 +248,6 @@ static int ident_index_in_decl(Identifier i, Declaration *d) {
return -1;
}
-static bool ident_eq_str(Identifier i, const char *s) {
- const char *t = s + (strlen(s) - 1);
- while (1) {
- if (!i->parent) {
- return false;
- }
- int c_low = i->parent->index_in_parent;
- int c_high = i->index_in_parent;
- int c = ident_uchar_to_char(c_low + (c_high << 4));
- if (c != *t) return false;
- i = i->parent->parent;
- if (t > s) --t;
- else break;
- }
- if (i->parent) return false; /* not at root */
- return true;
-}
-
static Location idecl_where(IdentDecl *id) {
switch (id->kind) {
diff --git a/package.c b/package.c
index 75945b7..78c6a3f 100644
--- a/package.c
+++ b/package.c
@@ -1133,7 +1133,7 @@ static bool exptr_finish(Exporter *ex) {
Identifier i = *ident;
assert(i->export_name);
export_vlq(ex, i->export_id);
- export_len(ex, ident_len(i));
+ export_len(ex, i->len);
fprint_ident(ex->out, i);
}
@@ -1163,12 +1163,10 @@ static bool import_footer(Importer *im) {
for (i = 0; i < n_named_idents; ++i) {
U64 id = import_vlq(im);
size_t name_len = import_vlq(im);
- char *name = err_malloc(name_len+1);
+ char *name = imptr_malloc(im, name_len+1);
name[name_len] = 0;
fread(name, 1, name_len, im->in);
- char *copy = name; /* don't change the value of name */
- im->ident_map[id] = ident_insert(&im->pkg->idents, &copy);
- free(name);
+ im->ident_map[id] = ident_insert(&im->pkg->idents, &name);
}
for (i = 1; i <= im->max_ident_id; ++i) {
if (!im->ident_map[i]) {
diff --git a/scope.c b/scope.c
index a180c3b..92e2b40 100644
--- a/scope.c
+++ b/scope.c
@@ -42,7 +42,7 @@ static void remove_ident_decls(Block *b, Declaration *d) {
U64 i = 0;
bool is_tuple = d->type.kind == TYPE_TUPLE;
arr_foreach(d->idents, Identifier, ident) {
- IdentTree *id_info = *ident;
+ IdentSlot *id_info = *ident;
IdentDecl **decls = &id_info->decls;
IdentDecl *last_decl = arr_last(*decls);
if (last_decl && last_decl->scope == b) {
diff --git a/types.c b/types.c
index 6742c86..36876c6 100644
--- a/types.c
+++ b/types.c
@@ -978,13 +978,11 @@ static bool types_expr(Typer *tr, Expression *e) {
return false;
}
size_t name_str_len = (size_t)name_str.n;
- char *name_cstr = err_malloc(name_str_len + 1);
+ char *name_cstr = typer_malloc(tr, name_str_len + 1);
memcpy(name_cstr, name_str.data, name_str_len);
name_cstr[name_str.n] = '\0';
- /* advance copy so we can free the original */
- char *copy = name_cstr;
- Identifier name_ident = ident_insert(tr->idents, &copy);
- free(name_cstr);
+ Identifier name_ident = ident_insert(tr->idents, &name_cstr);
+ assert(!*name_cstr);
e->pkg.name_ident = name_ident;
if (!name_ident->pkg) {
char *filename = typer_malloc(tr, name_str_len + 5);
diff --git a/types.h b/types.h
index 2e136b4..1f6a337 100644
--- a/types.h
+++ b/types.h
@@ -105,7 +105,6 @@ typedef struct Allocator {
typedef struct ArrBlock {
void *data;
size_t n; /* number of things in this block so far */
- void *last; /* last one of them */
} ArrBlock;
typedef struct BlockArr {
@@ -170,33 +169,23 @@ typedef struct IdentDecl {
U16 flags;
} IdentDecl;
-/*
-The way you search an identifier in a tree is:
-root.children[low part of 1st char].children[high part of 1st char]
-.children[low part of 2nd char]...
-
-*/
-
-#define TREE_NCHILDREN 16
-typedef struct IdentTree {
- /* zero value is an empty trie */
- uint16_t depth;
- unsigned char index_in_parent; /* index of this in .parent.children */
+typedef struct IdentSlot {
bool export_name; /* is this identifier's name important? */
bool anonymous; /* is this identifier not part of a tree? */
+ char *text; /* actual name of the identifier */
+ size_t len; /* length of name */
U64 export_id; /* 0 if there's no exported identifier here, otherwise unique positive integer associated with this identifier */
struct Package *pkg; /* NULL if this is not associated with a package */
- struct IdentTree *parent;
- struct IdentTree *children[TREE_NCHILDREN];
IdentDecl *decls; /* array of declarations of this identifier */
-} IdentTree;
+} IdentSlot;
-typedef IdentTree *Identifier;
+typedef IdentSlot *Identifier;
+typedef IdentSlot *IdentSlotPtr;
typedef struct Identifiers {
- BlockArr trees;
- IdentTree *root;
+ IdentSlot **slots;
U64 nidents;
+ U32 rseed;
} Identifiers;
typedef enum {