summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLeo Tenenbaum <pommicket@gmail.com>2020-01-19 23:21:56 -0500
committerLeo Tenenbaum <pommicket@gmail.com>2020-01-19 23:21:56 -0500
commit4a1467810d1cf9498db9507f36f78a0bd385c1e9 (patch)
treee212837c08ac0f1dd308cf800b1a8fdf6cdd3a3f
parentd4cf195c8ad0338fef7ccb08d0767360378b24d2 (diff)
extracted hash table for future use by foreign to avoid loading libraries twice
-rw-r--r--README.html4
-rw-r--r--README.md4
-rw-r--r--data_structures.c (renamed from arr.c)167
-rw-r--r--identifiers.c90
-rw-r--r--misc.c2
-rw-r--r--tests.c1
-rw-r--r--toc.c4
-rw-r--r--types.h49
8 files changed, 243 insertions, 78 deletions
diff --git a/README.html b/README.html
index f00bf27..baa360d 100644
--- a/README.html
+++ b/README.html
@@ -46,7 +46,9 @@ it is nearly as fast in theory.</p>
<h3><code>toc</code> Compiler Source Code</h3>
-<p>Most of the source code for the <code>toc</code> compiler is licensed under the GNU General Public License, version 3. See <code>LICENSE</code> for more information.</p>
+<p>Most of the source code for the <code>toc</code> compiler is licensed under the GNU General Public License, version 3, and the rest (some small general utilities) is in the public domain. Each source file begins with a comment explaining its license.</p>
+
+<p>See <code>LICENSE</code> for the GNU General Public License.</p>
<p><code>toc</code> is written in C, for speed and portability. It has no dependencies, other than the C runtime library. If you want to be able to call external C functions at compile time, however, you will need <code>libffcall</code> and <code>libdl</code> (so this is only currently supported on Unix-y systems).</p>
diff --git a/README.md b/README.md
index acc64ba..2fa6838 100644
--- a/README.md
+++ b/README.md
@@ -43,7 +43,9 @@ On other systems, you can just compile main.c with a C compiler. `toc` uses seve
### `toc` Compiler Source Code
-Most of the source code for the `toc` compiler is licensed under the GNU General Public License, version 3. See `LICENSE` for more information.
+Most of the source code for the `toc` compiler is licensed under the GNU General Public License, version 3, and the rest (some small general utilities) is in the public domain. Each source file begins with a comment explaining its license.
+
+See `LICENSE` for the GNU General Public License.
`toc` is written in C, for speed and portability. It has no dependencies, other than the C runtime library. If you want to be able to call external C functions at compile time, however, you will need `libffcall` and `libdl` (so this is only currently supported on Unix-y systems).
diff --git a/arr.c b/data_structures.c
index b44935d..970d966 100644
--- a/arr.c
+++ b/data_structures.c
@@ -1,7 +1,26 @@
-/*
- Copyright (C) 2019, 2020 Leo Tenenbaum.
- This file is part of toc. toc is distributed under version 3 of the GNU General Public License, without any warranty whatsoever.
- You should have received a copy of the GNU General Public License along with toc. If not, see <https://www.gnu.org/licenses/>.
+/*
+Dynamic arrays and hash tables in C
+
+This is free and unencumbered software released into the public domain.
+Anyone is free to copy, modify, publish, use, compile, sell, or
+distribute this software, either in source code form or as a compiled
+binary, for any purpose, commercial or non-commercial, and by any
+means.
+In jurisdictions that recognize copyright laws, the author or authors
+of this software dedicate any and all copyright interest in the
+software to the public domain. We make this dedication for the benefit
+of the public at large and to the detriment of our heirs and
+successors. We intend this dedication to be an overt act of
+relinquishment in perpetuity of all present and future rights to this
+software under copyright law.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+OTHER DEALINGS IN THE SOFTWARE.
+For more information, please refer to <http://unlicense.org/>
*/
/* OPTIM: is it faster to store void *end? */
@@ -79,8 +98,7 @@ static void arr_set_len_(void **arr, size_t n, size_t item_sz) {
}
static void arr_set_lena_(void **arr, size_t n, size_t item_sz, Allocator *a) {
if (n == 0) {
- /* OPTIM: arr_cleara */
- *arr = NULL;
+ arr_cleara_(arr, item_sz, a);
return;
}
arr_resva_(arr, n, item_sz, a);
@@ -215,3 +233,140 @@ static void arr_test(void) {
arr_clear(&foos);
}
#endif
+
+static U64 str_hash(const char *s, size_t len) {
+ U32 x = 0xabcdef01;
+ U32 y = 0x31415926;
+ U64 hash = 0;
+ for (size_t i = 0; i < len; ++i) {
+ hash += (U64)x * (unsigned char)(*s) + y;
+ x = rand_u32(x);
+ y = rand_u32(y);
+ ++s;
+ }
+ return hash;
+}
+
+static inline void str_hash_table_create(StrHashTable *t, size_t data_size, Allocator *allocr) {
+ t->slots = NULL;
+ t->data_size = data_size;
+ t->nentries = 0;
+ t->allocr = allocr;
+ t->rand_seed = 0xabacabad;
+}
+
+static StrHashTableSlot **str_hash_table_slot_get(StrHashTableSlot **slots, const char *s, size_t s_len, size_t i) {
+ StrHashTableSlot **slot;
+ size_t slots_cap = arr_len(slots);
+ while (1) {
+ assert(i < slots_cap);
+ slot = &slots[i];
+ if (!*slot) break;
+ if (s && (*slot)->str &&
+ s_len == (*slot)->len && memcmp(s, (*slot)->str, s_len) == 0)
+ break;
+ i = (i+1) % slots_cap;
+ }
+ return slot;
+}
+
+static void str_hash_table_grow(StrHashTable *t) {
+ size_t slots_cap = arr_len(t->slots);
+ if (slots_cap <= 2 * t->nentries) {
+ StrHashTableSlot **new_slots = NULL;
+ size_t new_slots_cap = slots_cap * 2 + 10;
+ arr_set_len(&new_slots, new_slots_cap);
+ arr_zero(new_slots);
+ arr_foreach(t->slots, StrHashTableSlotPtr, slotp) {
+ StrHashTableSlot *slot = *slotp;
+ if (slot) {
+ U64 new_hash = str_hash(slot->str, slot->len);
+ StrHashTableSlot **new_slot = str_hash_table_slot_get(new_slots, slot->str, slot->len, new_hash % new_slots_cap);
+ *new_slot = slot;
+ }
+ }
+ arr_cleara(&t->slots, t->allocr);
+ t->slots = new_slots;
+ slots_cap = new_slots_cap;
+ }
+}
+
+static inline size_t str_hash_table_slot_size(StrHashTable *t) {
+ return sizeof(StrHashTableSlot) + ((t->data_size + sizeof(MaxAlign) - 1) / sizeof(MaxAlign)) * sizeof(MaxAlign);
+}
+
+static StrHashTableSlot *str_hash_table_insert_(StrHashTable *t, const char *str, size_t len) {
+ str_hash_table_grow(t);
+ size_t slots_cap = arr_len(t->slots);
+ U64 hash = str_hash(str, len);
+ StrHashTableSlot **slot = str_hash_table_slot_get(t->slots, str, len, hash % slots_cap);
+ if (!*slot) {
+ *slot = allocr_calloc(t->allocr, 1, str_hash_table_slot_size(t));
+ (*slot)->str = str;
+ (*slot)->len = len;
+ ++t->nentries;
+ }
+ return *slot;
+}
+
+/* use this if you don't need the slot */
+static inline void *str_hash_table_insert(StrHashTable *t, const char *str, size_t len) {
+ return str_hash_table_insert_(t, str, len)->data;
+}
+
+static StrHashTableSlot *str_hash_table_insert_anonymous_(StrHashTable *t) {
+ str_hash_table_grow(t);
+ size_t slots_cap = arr_len(t->slots);
+ U32 slot_idx = (U32)((t->rand_seed = rand_u32(t->rand_seed)) % slots_cap);
+ StrHashTableSlot **slot = str_hash_table_slot_get(t->slots, NULL, 0, slot_idx);
+ if (!*slot) {
+ *slot = allocr_calloc(t->allocr, 1, str_hash_table_slot_size(t));
+ ++t->nentries;
+ }
+ return *slot;
+}
+
+/* use this if you don't need the slot */
+static inline void *str_hash_table_insert_anonymous(StrHashTable *t) {
+ return str_hash_table_insert_anonymous_(t)->data;
+}
+
+
+static void str_hash_table_free(StrHashTable *t) {
+ arr_foreach(t->slots, StrHashTableSlotPtr, slotp) {
+ allocr_free(t->allocr, *slotp, str_hash_table_slot_size(t));
+ }
+ arr_cleara(&t->slots, t->allocr);
+}
+
+static StrHashTableSlot *str_hash_table_get_(StrHashTable *t, const char *str, size_t len) {
+ size_t slot_index = str_hash(str, len) % arr_len(t->slots);
+ return *str_hash_table_slot_get(t->slots, str, len, slot_index);
+}
+
+static inline void *str_hash_table_get(StrHashTable *t, const char *str, size_t len) {
+ return str_hash_table_get_(t, str, len)->data;
+}
+
+#ifdef TOC_DEBUG
+static void str_hash_table_test(void) {
+ StrHashTable t;
+ str_hash_table_create(&t, sizeof(int), NULL);
+ int *p = str_hash_table_insert(&t, "Hello", 5);
+ *p = 182;
+ int *q = str_hash_table_insert(&t, "Hello", 5);
+ assert(p == q);
+ assert(*q == 182);
+ *q = 112;
+ int *r = str_hash_table_insert(&t, "Hellop", 6);
+ assert(p != r);
+ assert(*r == 0);
+ *r = 999;
+ int *s = str_hash_table_insert_anonymous(&t);
+ assert(p != s && r != s);
+ *s = 123;
+ int *u = str_hash_table_get(&t, "Hello", 5);
+ assert(p == u);
+ str_hash_table_free(&t);
+}
+#endif
diff --git a/identifiers.c b/identifiers.c
index 52f09b0..a17c002 100644
--- a/identifiers.c
+++ b/identifiers.c
@@ -28,22 +28,22 @@ static int is_ident(int c) {
/* Initialize Identifiers. */
static void idents_create(Identifiers *ids) {
- ids->slots = NULL;
- ids->nidents = 0;
+ str_hash_table_create(&ids->table, sizeof(IdentSlot) - sizeof(StrHashTableSlot), NULL);
ids->rseed = 0x27182818;
}
-static U64 ident_hash(char **s) {
- U32 x = 0xabcdef01;
- U32 y = 0x31415926;
- U64 hash = 0;
+/* advances s until a non-identifier character is reached, then returns the number of characters advanced */
+static size_t ident_str_len(char **s) {
+ char *original = *s;
while (is_ident(**s)) {
- hash += (U64)x * (unsigned char)(**s) + y;
- x = rand_u32(x);
- y = rand_u32(y);
++*s;
}
- return hash;
+ return (size_t)(*s - original);
+}
+
+static U64 ident_hash(char **s) {
+ char *original = *s;
+ return str_hash(original, ident_str_len(s));
}
/* are these strings equal, up to the first non-ident character? */
@@ -57,7 +57,7 @@ static bool ident_str_eq_str(const char *s, const char *t) {
static inline bool ident_eq_str(Identifier i, const char *s) {
if (i->anonymous) return false;
- return ident_str_eq_str(i->text, s);
+ return ident_str_eq_str(i->str, s);
}
@@ -78,55 +78,25 @@ static IdentSlot **ident_slots_insert(IdentSlot **slots, char *s, size_t i) {
static Identifier ident_new_anonymous(Identifiers *ids) {
U32 idx = rand_u32(ids->rseed);
ids->rseed = idx;
- IdentSlot **slot = ident_slots_insert(ids->slots, NULL, idx % arr_len(ids->slots));
- *slot = err_calloc(1, sizeof **slot);
- ++ids->nidents;
- (*slot)->anonymous = true;
- (*slot)->len = 3;
- (*slot)->text = "???";
- return *slot;
+ IdentSlot *slot = (IdentSlot *)str_hash_table_insert_anonymous_(&ids->table);
+ slot->anonymous = true;
+ return slot;
}
/* moves s to the char after the identifier */
/* inserts if does not exist. reads until non-ident char is found. */
/* advances past identifier */
static Identifier ident_insert(Identifiers *ids, char **s) {
- size_t nslots = arr_len(ids->slots);
- if (nslots <= 2*ids->nidents) {
- IdentSlot **slots = ids->slots;
- /* reserve more space */
- IdentSlot **new_slots = NULL;
- size_t new_nslots = nslots * 2 + 10;
- arr_set_len(&new_slots, new_nslots);
- arr_zero(new_slots);
- arr_foreach(slots, IdentSlotPtr, slotp) {
- IdentSlot *slot = *slotp;
- if (slot) {
- char *ptr = slot->text;
- U64 new_hash = ident_hash(&ptr);
- IdentSlot **new_slot = ident_slots_insert(new_slots, slot->text, new_hash % new_nslots);
- *new_slot = slot;
- }
- }
- arr_clear(&slots);
- ids->slots = new_slots;
- nslots = new_nslots;
- }
char *original = *s;
- U64 hash = ident_hash(s);
- IdentSlot **slot = ident_slots_insert(ids->slots, original, hash % arr_len(ids->slots));
- if (!*slot) {
- *slot = err_calloc(1, sizeof **slot);
- ++ids->nidents;
- (*slot)->text = original;
- (*slot)->len = (size_t)(*s - original);
- }
- return *slot;
+ size_t len = ident_str_len(s);
+ return (Identifier)str_hash_table_insert_(&ids->table, original, len);
}
static char *ident_to_str(Identifier i) {
char *str = err_malloc(i->len + 1);
- /* for some reason, GCC thinks that i->len is -1 when this is called from type_to_str_ (in release mode) */
+ /* for some reason, GCC thinks that i->len is -1 when this is called from type_to_str_ (in release mode)
+ TODO: test this now (some stuff was changed)
+ */
#if !defined(__clang__) && defined(__GNUC__)
#pragma GCC diagnostic push
@@ -134,7 +104,7 @@ static char *ident_to_str(Identifier i) {
#pragma GCC diagnostic ignored "-Wrestrict"
#pragma GCC diagnostic ignored "-Warray-bounds"
#endif
- memcpy(str, i->text, i->len);
+ memcpy(str, i->str, i->len);
#if !defined(__clang__) && defined(__GNUC__)
@@ -146,7 +116,7 @@ static char *ident_to_str(Identifier i) {
static void fprint_ident(FILE *out, Identifier id) {
- fwrite(id->text, 1, id->len, out);
+ fwrite(id->str, 1, id->len, out);
}
static void fprint_ident_debug(FILE *out, Identifier id) {
@@ -174,7 +144,7 @@ static void fprint_ident_reduced_charset(FILE *out, Identifier id) {
fprintf(out, "a%p__",(void *)id);
return;
}
- for (char *s = id->text; is_ident(*s); ++s) {
+ for (const char *s = id->str; is_ident(*s); ++s) {
int c = (unsigned char)(*s);
if (c > 127) {
fprintf(out, "x__%x", c);
@@ -187,14 +157,13 @@ static void fprint_ident_reduced_charset(FILE *out, Identifier id) {
/* NULL = no such identifier. returns identifier "foo" for both "foo\0" and "foo+92384324..." */
static Identifier ident_get(Identifiers *ids, char *s) {
char *ptr = s;
- U64 hash = ident_hash(&ptr);
- IdentSlot **slot = ident_slots_insert(ids->slots, s, hash % arr_len(ids->slots));
- return *slot;
+ size_t len = ident_str_len(&ptr);
+ return (Identifier)str_hash_table_get_(&ids->table, s, len);
}
static Identifier ident_translate(Identifier i, Identifiers *to_idents) {
if (!i || i->anonymous) return NULL;
- Identifier new_ident = ident_get(to_idents, i->text);
+ Identifier new_ident = ident_get(to_idents, i->str);
return new_ident;
}
@@ -213,16 +182,15 @@ static IdentDecl *ident_decl(Identifier i) {
/* returns true if i and j are equal, even if they're not in the same table */
static bool ident_eq(Identifier i, Identifier j) {
- return ident_str_eq_str(i->text, j->text);
+ return i->len == j->len && memcmp(i->str, j->str, i->len) == 0;
}
static void idents_free(Identifiers *ids) {
- arr_foreach(ids->slots, IdentSlotPtr, slotp) {
- IdentSlot *slot = *slotp;
+ arr_foreach(ids->table.slots, StrHashTableSlotPtr, slotp) {
+ IdentSlot *slot = *(IdentSlot **)slotp;
if (slot) arr_clear(&slot->decls);
- free(slot);
}
- arr_clear(&ids->slots);
+ str_hash_table_free(&ids->table);
}
#ifdef TOC_DEBUG
diff --git a/misc.c b/misc.c
index 0c817c1..3bd06c6 100644
--- a/misc.c
+++ b/misc.c
@@ -53,3 +53,5 @@ static U32 rand_u32(U32 seed) {
return (U32)((seed64 * 0x832f0fda4e1a8642 + 0x41d49cd5459a2ab4) >> 32);
}
+
+
diff --git a/tests.c b/tests.c
index 9d82897..54ba6ea 100644
--- a/tests.c
+++ b/tests.c
@@ -31,6 +31,7 @@ static void test_all(void) {
allocr_test();
arr_test();
block_arr_test();
+ str_hash_table_test();
idents_test();
binfile_test();
}
diff --git a/toc.c b/toc.c
index 0627a41..447abac 100644
--- a/toc.c
+++ b/toc.c
@@ -69,10 +69,10 @@ static inline bool type_is_slicechar(Type *t) {
/* utilities */
#include "allocator.c"
-#include "arr.c"
+#include "misc.c"
+#include "data_structures.c"
#include "location.c"
#include "err.c"
-#include "misc.c"
#include "blockarr.c"
#include "instance_table.c"
#include "copy.c"
diff --git a/types.h b/types.h
index 7ba2f18..36f21ec 100644
--- a/types.h
+++ b/types.h
@@ -1,7 +1,27 @@
/*
- Copyright (C) 2019, 2020 Leo Tenenbaum.
- This file is part of toc. toc is distributed under version 3 of the GNU General Public License, without any warranty whatsoever.
- You should have received a copy of the GNU General Public License along with toc. If not, see <https://www.gnu.org/licenses/>.
+toc's types. Note that although these types are in the public domain,
+the code which uses them (i.e. most of the rest of toc) is not.
+
+This is free and unencumbered software released into the public domain.
+Anyone is free to copy, modify, publish, use, compile, sell, or
+distribute this software, either in source code form or as a compiled
+binary, for any purpose, commercial or non-commercial, and by any
+means.
+In jurisdictions that recognize copyright laws, the author or authors
+of this software dedicate any and all copyright interest in the
+software to the public domain. We make this dedication for the benefit
+of the public at large and to the detriment of our heirs and
+successors. We intend this dedication to be an overt act of
+relinquishment in perpetuity of all present and future rights to this
+software under copyright law.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+OTHER DEALINGS IN THE SOFTWARE.
+For more information, please refer to <http://unlicense.org/>
*/
/* NOTE: make sure you edit copy.c and package.c and cgen_recurse_subexprs/types when you make a change to expression-related types or type-related types in this file! */
@@ -170,23 +190,38 @@ typedef struct IdentDecl {
} IdentDecl;
typedef struct IdentSlot {
+ char *str;
+ size_t len;
bool export_name; /* is this identifier's name important? */
bool anonymous; /* is this identifier not part of a tree? */
bool imported; /* was this identifier imported from a package? */
- char *text; /* actual name of the identifier */
- size_t len; /* length of name */
U64 export_id; /* 0 if there's no exported identifier here, otherwise unique positive integer associated with this identifier */
struct Package *from_pkg;
struct Package *pkg; /* NULL if this is not associated with a package */
IdentDecl *decls; /* array of declarations of this identifier */
} IdentSlot;
+typedef struct {
+ const char *str;
+ size_t len;
+ MaxAlign data[];
+} StrHashTableSlot;
+
+typedef StrHashTableSlot *StrHashTableSlotPtr;
+
+typedef struct {
+ StrHashTableSlot **slots;
+ Allocator *allocr;
+ U32 rand_seed;
+ size_t data_size;
+ size_t nentries; /* # of filled slots */
+} StrHashTable;
+
typedef IdentSlot *Identifier;
typedef IdentSlot *IdentSlotPtr;
typedef struct Identifiers {
- IdentSlot **slots;
- U64 nidents;
+ StrHashTable table;
U32 rseed;
} Identifiers;