From 4a1467810d1cf9498db9507f36f78a0bd385c1e9 Mon Sep 17 00:00:00 2001 From: Leo Tenenbaum Date: Sun, 19 Jan 2020 23:21:56 -0500 Subject: extracted hash table for future use by foreign to avoid loading libraries twice --- README.html | 4 +- README.md | 4 +- arr.c | 217 ------------------------------- data_structures.c | 372 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ identifiers.c | 90 +++++-------- misc.c | 2 + tests.c | 1 + toc.c | 4 +- types.h | 49 ++++++- 9 files changed, 454 insertions(+), 289 deletions(-) delete mode 100644 arr.c create mode 100644 data_structures.c diff --git a/README.html b/README.html index f00bf27..baa360d 100644 --- a/README.html +++ b/README.html @@ -46,7 +46,9 @@ it is nearly as fast in theory.

toc Compiler Source Code

-

Most of the source code for the toc compiler is licensed under the GNU General Public License, version 3. See LICENSE for more information.

+

Most of the source code for the toc compiler is licensed under the GNU General Public License, version 3, and the rest (some small general utilities) is in the public domain. Each source file begins with a comment explaining its license.

+ +

See LICENSE for the GNU General Public License.

toc is written in C, for speed and portability. It has no dependencies, other than the C runtime library. If you want to be able to call external C functions at compile time, however, you will need libffcall and libdl (so this is only currently supported on Unix-y systems).

diff --git a/README.md b/README.md index acc64ba..2fa6838 100644 --- a/README.md +++ b/README.md @@ -43,7 +43,9 @@ On other systems, you can just compile main.c with a C compiler. `toc` uses seve ### `toc` Compiler Source Code -Most of the source code for the `toc` compiler is licensed under the GNU General Public License, version 3. See `LICENSE` for more information. +Most of the source code for the `toc` compiler is licensed under the GNU General Public License, version 3, and the rest (some small general utilities) is in the public domain. Each source file begins with a comment explaining its license. + +See `LICENSE` for the GNU General Public License. `toc` is written in C, for speed and portability. It has no dependencies, other than the C runtime library. If you want to be able to call external C functions at compile time, however, you will need `libffcall` and `libdl` (so this is only currently supported on Unix-y systems). diff --git a/arr.c b/arr.c deleted file mode 100644 index b44935d..0000000 --- a/arr.c +++ /dev/null @@ -1,217 +0,0 @@ -/* - Copyright (C) 2019, 2020 Leo Tenenbaum. - This file is part of toc. toc is distributed under version 3 of the GNU General Public License, without any warranty whatsoever. - You should have received a copy of the GNU General Public License along with toc. If not, see . -*/ - -/* OPTIM: is it faster to store void *end? */ -typedef struct { - size_t len; - size_t cap; - MaxAlign data[]; -} ArrHeader; - -static inline ArrHeader *arr_hdr(void *arr) { - ArrHeader *hdr = (ArrHeader *)((char *)arr - offsetof(ArrHeader, data)); - return hdr; -} - -static inline size_t arr_len(void *arr) { - if (arr == NULL) return 0; - return arr_hdr(arr)->len; -} - -static inline void arr_zero_(void *arr, size_t item_sz) { - memset(arr, 0, item_sz * arr_len(arr)); -} - -static void arr_resv_(void **arr, size_t n, size_t item_sz) { - if (*arr == NULL) { - ArrHeader *hdr = err_malloc(item_sz * n + sizeof(ArrHeader) + 1); /* +1 => prevent ptr overflow */ - hdr->len = 0; - hdr->cap = n; - *arr = hdr->data; - } else { - ArrHeader *hdr = arr_hdr(*arr); - hdr->cap = n; - hdr = err_realloc(hdr, item_sz * n + sizeof(ArrHeader) + 1); - if (hdr->len > hdr->cap) hdr->len = hdr->cap; - *arr = hdr->data; - } -} -static void arr_resva_(void **arr, size_t n, size_t item_sz, Allocator *a) { - if (*arr == NULL) { - ArrHeader *hdr = allocr_malloc(a, item_sz * n + sizeof(ArrHeader)); - hdr->len = 0; - hdr->cap = n; - *arr = hdr->data; - } else { - ArrHeader *hdr = arr_hdr(*arr); - hdr = allocr_realloc(a, hdr, item_sz * hdr->cap + sizeof(ArrHeader), item_sz * n + sizeof(ArrHeader)); - hdr->cap = n; - if (hdr->len > hdr->cap) hdr->len = hdr->cap; - *arr = hdr->data; - } -} - -static void arr_clear_(void **arr) { - if (*arr) { - free(arr_hdr(*arr)); - *arr = NULL; - } -} - -static void arr_cleara_(void **arr, size_t size, Allocator *allocr) { - if (*arr) { - ArrHeader *header = arr_hdr(*arr); - allocr_free(allocr, header, header->cap * size); - *arr = NULL; - } -} - -static void arr_set_len_(void **arr, size_t n, size_t item_sz) { - if (n == 0) { - arr_clear_(arr); - return; - } - arr_resv_(arr, n, item_sz); - arr_hdr(*arr)->len = n; -} -static void arr_set_lena_(void **arr, size_t n, size_t item_sz, Allocator *a) { - if (n == 0) { - /* OPTIM: arr_cleara */ - *arr = NULL; - return; - } - arr_resva_(arr, n, item_sz, a); - arr_hdr(*arr)->len = n; -} - -static void *arr_add_(void **arr, size_t item_sz) { - ArrHeader *hdr; - if (*arr == NULL) { - arr_resv_(arr, 10, item_sz); - hdr = arr_hdr(*arr); - } else { - hdr = arr_hdr(*arr); - if (hdr->len >= hdr->cap) { - arr_resv_(arr, hdr->len * 2 + 1, item_sz); - hdr = arr_hdr(*arr); - } - } - return &(((char *)hdr->data)[(hdr->len++) * item_sz]); -} -static void *arr_adda_(void **arr, size_t item_sz, Allocator *a) { - ArrHeader *hdr; - if (*arr == NULL) { - arr_resva_(arr, 10, item_sz, a); - hdr = arr_hdr(*arr); - } else { - hdr = arr_hdr(*arr); - if (hdr->len >= hdr->cap) { - arr_resva_(arr, hdr->len * 2 + 1, item_sz, a); - hdr = arr_hdr(*arr); - } - } - return &(((char *)hdr->data)[(hdr->len++) * item_sz]); -} - - -static void *arr_last_(void *arr, size_t item_sz) { - if (arr) { - ArrHeader *hdr = arr_hdr(arr); - return hdr->len == 0 ? NULL : (char *)hdr->data + (hdr->len-1) * item_sz; - } else { - return NULL; - } -} - -static void *arr_end_(void *arr, size_t item_sz) { - if (arr) { - ArrHeader *hdr = arr_hdr(arr); - return hdr->len == 0 ? NULL : (char *)hdr->data + hdr->len * item_sz; - } else { - return NULL; - } -} - -/* OPTIM: shrink array */ -static void arr_remove_last_(void **arr) { - assert(arr_hdr(*arr)->len); - if (--arr_hdr(*arr)->len == 0) { - arr_clear_(arr); - } -} - -static void arr_remove_lasta_(void **arr, size_t item_sz, Allocator *a) { - assert(arr_hdr(*arr)->len); - if (--arr_hdr(*arr)->len == 0) { - arr_cleara_(arr, item_sz, a); - } -} - - - -static void arr_copya_(void **out, void *in, size_t item_sz, Allocator *a) { - size_t len = arr_len(in); - arr_resva_(out, len, item_sz, a); - memcpy(*out, in, len * item_sz); -} - -#ifdef __GNUC__ -#define typeof __typeof__ -#endif - -#if defined(__GNUC__) || defined(__TINYC__) -#define HAS_TYPEOF 1 -#endif - -#if HAS_TYPEOF -/* -this is to cast the return value of arr_add so that gcc produces a warning if you -do something like: -float *arr = NULL; -// ... -int *x = arr_add(&arr); -You shouldn't rely on this, though, e.g. by doing -*arr_add(&arr) = 17; - */ -#define arr_ptr_type(arr) __typeof__(*(arr)) -#else -#define arr_ptr_type(arr) void * -#endif - -#define arr_zero(arr) arr_zero_(arr, sizeof *(arr)) -#define arr_add(arr) (arr_ptr_type(arr))arr_add_((void **)(arr), sizeof **(arr)) -#define arr_adda(arr, allocr) (arr_ptr_type(arr))arr_adda_((void **)(arr), sizeof **(arr), (allocr)) -#define arr_resv(arr, n) arr_resv_((void **)(arr), n, sizeof **(arr)) -#define arr_resva(arr, n, allocr) arr_resva_((void **)(arr), n, sizeof **(arr), (allocr)) -#define arr_set_len(arr, n) arr_set_len_((void **)(arr), n, sizeof **(arr)) -#define arr_set_lena(arr, n, a) arr_set_lena_((void **)(arr), n, sizeof **(arr), (a)) -#define arr_clear(arr) arr_clear_((void **)(arr)), (void)sizeof **arr /* second part makes sure most of the time that you don't accidentally call it without taking the address */ -#define arr_cleara(arr, allocr) arr_cleara_((void **)(arr), sizeof **(arr), (allocr)) -#define arr_last(arr) arr_last_((void *)(arr), sizeof *(arr)) -/* one past last, or NULL if empty */ -#define arr_end(arr) arr_end_((void *)(arr), sizeof *(arr)) -#define arr_foreach(arr, type, var) for (type *var = arr, *var##_foreach_end = arr_end(arr); var < var##_foreach_end; ++var) /* NOTE: < is useful here because currently it's possible for var_foreach_end to be NULL but var could start out not null */ -#define arr_remove_last(arr) arr_remove_last_((void **)(arr)), (void)sizeof **(arr) -#define arr_remove_lasta(arr, a) arr_remove_lasta_((void **)(arr), sizeof **(arr), (a)) -#define arr_copya(out, in, a) do { assert(sizeof *(in) == sizeof **(out)); arr_copya_((void **)(out), (in), sizeof **(out), (a)); } while(0) - -#ifdef TOC_DEBUG -static void arr_test(void) { - int *foos = NULL; - for (int i = 0; i < 10; ++i) { - *(int *)arr_add(&foos) = i; - } - for (int i = 0; i < (int)arr_len(foos); ++i) { - assert(foos[i] == i); - } - int lastx = -1; - arr_foreach(foos, int, x) { - assert(*x == lastx + 1); - lastx = *x; - } - arr_clear(&foos); -} -#endif diff --git a/data_structures.c b/data_structures.c new file mode 100644 index 0000000..970d966 --- /dev/null +++ b/data_structures.c @@ -0,0 +1,372 @@ +/* +Dynamic arrays and hash tables in C + +This is free and unencumbered software released into the public domain. +Anyone is free to copy, modify, publish, use, compile, sell, or +distribute this software, either in source code form or as a compiled +binary, for any purpose, commercial or non-commercial, and by any +means. +In jurisdictions that recognize copyright laws, the author or authors +of this software dedicate any and all copyright interest in the +software to the public domain. We make this dedication for the benefit +of the public at large and to the detriment of our heirs and +successors. We intend this dedication to be an overt act of +relinquishment in perpetuity of all present and future rights to this +software under copyright law. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. +For more information, please refer to +*/ + +/* OPTIM: is it faster to store void *end? */ +typedef struct { + size_t len; + size_t cap; + MaxAlign data[]; +} ArrHeader; + +static inline ArrHeader *arr_hdr(void *arr) { + ArrHeader *hdr = (ArrHeader *)((char *)arr - offsetof(ArrHeader, data)); + return hdr; +} + +static inline size_t arr_len(void *arr) { + if (arr == NULL) return 0; + return arr_hdr(arr)->len; +} + +static inline void arr_zero_(void *arr, size_t item_sz) { + memset(arr, 0, item_sz * arr_len(arr)); +} + +static void arr_resv_(void **arr, size_t n, size_t item_sz) { + if (*arr == NULL) { + ArrHeader *hdr = err_malloc(item_sz * n + sizeof(ArrHeader) + 1); /* +1 => prevent ptr overflow */ + hdr->len = 0; + hdr->cap = n; + *arr = hdr->data; + } else { + ArrHeader *hdr = arr_hdr(*arr); + hdr->cap = n; + hdr = err_realloc(hdr, item_sz * n + sizeof(ArrHeader) + 1); + if (hdr->len > hdr->cap) hdr->len = hdr->cap; + *arr = hdr->data; + } +} +static void arr_resva_(void **arr, size_t n, size_t item_sz, Allocator *a) { + if (*arr == NULL) { + ArrHeader *hdr = allocr_malloc(a, item_sz * n + sizeof(ArrHeader)); + hdr->len = 0; + hdr->cap = n; + *arr = hdr->data; + } else { + ArrHeader *hdr = arr_hdr(*arr); + hdr = allocr_realloc(a, hdr, item_sz * hdr->cap + sizeof(ArrHeader), item_sz * n + sizeof(ArrHeader)); + hdr->cap = n; + if (hdr->len > hdr->cap) hdr->len = hdr->cap; + *arr = hdr->data; + } +} + +static void arr_clear_(void **arr) { + if (*arr) { + free(arr_hdr(*arr)); + *arr = NULL; + } +} + +static void arr_cleara_(void **arr, size_t size, Allocator *allocr) { + if (*arr) { + ArrHeader *header = arr_hdr(*arr); + allocr_free(allocr, header, header->cap * size); + *arr = NULL; + } +} + +static void arr_set_len_(void **arr, size_t n, size_t item_sz) { + if (n == 0) { + arr_clear_(arr); + return; + } + arr_resv_(arr, n, item_sz); + arr_hdr(*arr)->len = n; +} +static void arr_set_lena_(void **arr, size_t n, size_t item_sz, Allocator *a) { + if (n == 0) { + arr_cleara_(arr, item_sz, a); + return; + } + arr_resva_(arr, n, item_sz, a); + arr_hdr(*arr)->len = n; +} + +static void *arr_add_(void **arr, size_t item_sz) { + ArrHeader *hdr; + if (*arr == NULL) { + arr_resv_(arr, 10, item_sz); + hdr = arr_hdr(*arr); + } else { + hdr = arr_hdr(*arr); + if (hdr->len >= hdr->cap) { + arr_resv_(arr, hdr->len * 2 + 1, item_sz); + hdr = arr_hdr(*arr); + } + } + return &(((char *)hdr->data)[(hdr->len++) * item_sz]); +} +static void *arr_adda_(void **arr, size_t item_sz, Allocator *a) { + ArrHeader *hdr; + if (*arr == NULL) { + arr_resva_(arr, 10, item_sz, a); + hdr = arr_hdr(*arr); + } else { + hdr = arr_hdr(*arr); + if (hdr->len >= hdr->cap) { + arr_resva_(arr, hdr->len * 2 + 1, item_sz, a); + hdr = arr_hdr(*arr); + } + } + return &(((char *)hdr->data)[(hdr->len++) * item_sz]); +} + + +static void *arr_last_(void *arr, size_t item_sz) { + if (arr) { + ArrHeader *hdr = arr_hdr(arr); + return hdr->len == 0 ? NULL : (char *)hdr->data + (hdr->len-1) * item_sz; + } else { + return NULL; + } +} + +static void *arr_end_(void *arr, size_t item_sz) { + if (arr) { + ArrHeader *hdr = arr_hdr(arr); + return hdr->len == 0 ? NULL : (char *)hdr->data + hdr->len * item_sz; + } else { + return NULL; + } +} + +/* OPTIM: shrink array */ +static void arr_remove_last_(void **arr) { + assert(arr_hdr(*arr)->len); + if (--arr_hdr(*arr)->len == 0) { + arr_clear_(arr); + } +} + +static void arr_remove_lasta_(void **arr, size_t item_sz, Allocator *a) { + assert(arr_hdr(*arr)->len); + if (--arr_hdr(*arr)->len == 0) { + arr_cleara_(arr, item_sz, a); + } +} + + + +static void arr_copya_(void **out, void *in, size_t item_sz, Allocator *a) { + size_t len = arr_len(in); + arr_resva_(out, len, item_sz, a); + memcpy(*out, in, len * item_sz); +} + +#ifdef __GNUC__ +#define typeof __typeof__ +#endif + +#if defined(__GNUC__) || defined(__TINYC__) +#define HAS_TYPEOF 1 +#endif + +#if HAS_TYPEOF +/* +this is to cast the return value of arr_add so that gcc produces a warning if you +do something like: +float *arr = NULL; +// ... +int *x = arr_add(&arr); +You shouldn't rely on this, though, e.g. by doing +*arr_add(&arr) = 17; + */ +#define arr_ptr_type(arr) __typeof__(*(arr)) +#else +#define arr_ptr_type(arr) void * +#endif + +#define arr_zero(arr) arr_zero_(arr, sizeof *(arr)) +#define arr_add(arr) (arr_ptr_type(arr))arr_add_((void **)(arr), sizeof **(arr)) +#define arr_adda(arr, allocr) (arr_ptr_type(arr))arr_adda_((void **)(arr), sizeof **(arr), (allocr)) +#define arr_resv(arr, n) arr_resv_((void **)(arr), n, sizeof **(arr)) +#define arr_resva(arr, n, allocr) arr_resva_((void **)(arr), n, sizeof **(arr), (allocr)) +#define arr_set_len(arr, n) arr_set_len_((void **)(arr), n, sizeof **(arr)) +#define arr_set_lena(arr, n, a) arr_set_lena_((void **)(arr), n, sizeof **(arr), (a)) +#define arr_clear(arr) arr_clear_((void **)(arr)), (void)sizeof **arr /* second part makes sure most of the time that you don't accidentally call it without taking the address */ +#define arr_cleara(arr, allocr) arr_cleara_((void **)(arr), sizeof **(arr), (allocr)) +#define arr_last(arr) arr_last_((void *)(arr), sizeof *(arr)) +/* one past last, or NULL if empty */ +#define arr_end(arr) arr_end_((void *)(arr), sizeof *(arr)) +#define arr_foreach(arr, type, var) for (type *var = arr, *var##_foreach_end = arr_end(arr); var < var##_foreach_end; ++var) /* NOTE: < is useful here because currently it's possible for var_foreach_end to be NULL but var could start out not null */ +#define arr_remove_last(arr) arr_remove_last_((void **)(arr)), (void)sizeof **(arr) +#define arr_remove_lasta(arr, a) arr_remove_lasta_((void **)(arr), sizeof **(arr), (a)) +#define arr_copya(out, in, a) do { assert(sizeof *(in) == sizeof **(out)); arr_copya_((void **)(out), (in), sizeof **(out), (a)); } while(0) + +#ifdef TOC_DEBUG +static void arr_test(void) { + int *foos = NULL; + for (int i = 0; i < 10; ++i) { + *(int *)arr_add(&foos) = i; + } + for (int i = 0; i < (int)arr_len(foos); ++i) { + assert(foos[i] == i); + } + int lastx = -1; + arr_foreach(foos, int, x) { + assert(*x == lastx + 1); + lastx = *x; + } + arr_clear(&foos); +} +#endif + +static U64 str_hash(const char *s, size_t len) { + U32 x = 0xabcdef01; + U32 y = 0x31415926; + U64 hash = 0; + for (size_t i = 0; i < len; ++i) { + hash += (U64)x * (unsigned char)(*s) + y; + x = rand_u32(x); + y = rand_u32(y); + ++s; + } + return hash; +} + +static inline void str_hash_table_create(StrHashTable *t, size_t data_size, Allocator *allocr) { + t->slots = NULL; + t->data_size = data_size; + t->nentries = 0; + t->allocr = allocr; + t->rand_seed = 0xabacabad; +} + +static StrHashTableSlot **str_hash_table_slot_get(StrHashTableSlot **slots, const char *s, size_t s_len, size_t i) { + StrHashTableSlot **slot; + size_t slots_cap = arr_len(slots); + while (1) { + assert(i < slots_cap); + slot = &slots[i]; + if (!*slot) break; + if (s && (*slot)->str && + s_len == (*slot)->len && memcmp(s, (*slot)->str, s_len) == 0) + break; + i = (i+1) % slots_cap; + } + return slot; +} + +static void str_hash_table_grow(StrHashTable *t) { + size_t slots_cap = arr_len(t->slots); + if (slots_cap <= 2 * t->nentries) { + StrHashTableSlot **new_slots = NULL; + size_t new_slots_cap = slots_cap * 2 + 10; + arr_set_len(&new_slots, new_slots_cap); + arr_zero(new_slots); + arr_foreach(t->slots, StrHashTableSlotPtr, slotp) { + StrHashTableSlot *slot = *slotp; + if (slot) { + U64 new_hash = str_hash(slot->str, slot->len); + StrHashTableSlot **new_slot = str_hash_table_slot_get(new_slots, slot->str, slot->len, new_hash % new_slots_cap); + *new_slot = slot; + } + } + arr_cleara(&t->slots, t->allocr); + t->slots = new_slots; + slots_cap = new_slots_cap; + } +} + +static inline size_t str_hash_table_slot_size(StrHashTable *t) { + return sizeof(StrHashTableSlot) + ((t->data_size + sizeof(MaxAlign) - 1) / sizeof(MaxAlign)) * sizeof(MaxAlign); +} + +static StrHashTableSlot *str_hash_table_insert_(StrHashTable *t, const char *str, size_t len) { + str_hash_table_grow(t); + size_t slots_cap = arr_len(t->slots); + U64 hash = str_hash(str, len); + StrHashTableSlot **slot = str_hash_table_slot_get(t->slots, str, len, hash % slots_cap); + if (!*slot) { + *slot = allocr_calloc(t->allocr, 1, str_hash_table_slot_size(t)); + (*slot)->str = str; + (*slot)->len = len; + ++t->nentries; + } + return *slot; +} + +/* use this if you don't need the slot */ +static inline void *str_hash_table_insert(StrHashTable *t, const char *str, size_t len) { + return str_hash_table_insert_(t, str, len)->data; +} + +static StrHashTableSlot *str_hash_table_insert_anonymous_(StrHashTable *t) { + str_hash_table_grow(t); + size_t slots_cap = arr_len(t->slots); + U32 slot_idx = (U32)((t->rand_seed = rand_u32(t->rand_seed)) % slots_cap); + StrHashTableSlot **slot = str_hash_table_slot_get(t->slots, NULL, 0, slot_idx); + if (!*slot) { + *slot = allocr_calloc(t->allocr, 1, str_hash_table_slot_size(t)); + ++t->nentries; + } + return *slot; +} + +/* use this if you don't need the slot */ +static inline void *str_hash_table_insert_anonymous(StrHashTable *t) { + return str_hash_table_insert_anonymous_(t)->data; +} + + +static void str_hash_table_free(StrHashTable *t) { + arr_foreach(t->slots, StrHashTableSlotPtr, slotp) { + allocr_free(t->allocr, *slotp, str_hash_table_slot_size(t)); + } + arr_cleara(&t->slots, t->allocr); +} + +static StrHashTableSlot *str_hash_table_get_(StrHashTable *t, const char *str, size_t len) { + size_t slot_index = str_hash(str, len) % arr_len(t->slots); + return *str_hash_table_slot_get(t->slots, str, len, slot_index); +} + +static inline void *str_hash_table_get(StrHashTable *t, const char *str, size_t len) { + return str_hash_table_get_(t, str, len)->data; +} + +#ifdef TOC_DEBUG +static void str_hash_table_test(void) { + StrHashTable t; + str_hash_table_create(&t, sizeof(int), NULL); + int *p = str_hash_table_insert(&t, "Hello", 5); + *p = 182; + int *q = str_hash_table_insert(&t, "Hello", 5); + assert(p == q); + assert(*q == 182); + *q = 112; + int *r = str_hash_table_insert(&t, "Hellop", 6); + assert(p != r); + assert(*r == 0); + *r = 999; + int *s = str_hash_table_insert_anonymous(&t); + assert(p != s && r != s); + *s = 123; + int *u = str_hash_table_get(&t, "Hello", 5); + assert(p == u); + str_hash_table_free(&t); +} +#endif diff --git a/identifiers.c b/identifiers.c index 52f09b0..a17c002 100644 --- a/identifiers.c +++ b/identifiers.c @@ -28,22 +28,22 @@ static int is_ident(int c) { /* Initialize Identifiers. */ static void idents_create(Identifiers *ids) { - ids->slots = NULL; - ids->nidents = 0; + str_hash_table_create(&ids->table, sizeof(IdentSlot) - sizeof(StrHashTableSlot), NULL); ids->rseed = 0x27182818; } -static U64 ident_hash(char **s) { - U32 x = 0xabcdef01; - U32 y = 0x31415926; - U64 hash = 0; +/* advances s until a non-identifier character is reached, then returns the number of characters advanced */ +static size_t ident_str_len(char **s) { + char *original = *s; while (is_ident(**s)) { - hash += (U64)x * (unsigned char)(**s) + y; - x = rand_u32(x); - y = rand_u32(y); ++*s; } - return hash; + return (size_t)(*s - original); +} + +static U64 ident_hash(char **s) { + char *original = *s; + return str_hash(original, ident_str_len(s)); } /* are these strings equal, up to the first non-ident character? */ @@ -57,7 +57,7 @@ static bool ident_str_eq_str(const char *s, const char *t) { static inline bool ident_eq_str(Identifier i, const char *s) { if (i->anonymous) return false; - return ident_str_eq_str(i->text, s); + return ident_str_eq_str(i->str, s); } @@ -78,55 +78,25 @@ static IdentSlot **ident_slots_insert(IdentSlot **slots, char *s, size_t i) { static Identifier ident_new_anonymous(Identifiers *ids) { U32 idx = rand_u32(ids->rseed); ids->rseed = idx; - IdentSlot **slot = ident_slots_insert(ids->slots, NULL, idx % arr_len(ids->slots)); - *slot = err_calloc(1, sizeof **slot); - ++ids->nidents; - (*slot)->anonymous = true; - (*slot)->len = 3; - (*slot)->text = "???"; - return *slot; + IdentSlot *slot = (IdentSlot *)str_hash_table_insert_anonymous_(&ids->table); + slot->anonymous = true; + return slot; } /* moves s to the char after the identifier */ /* inserts if does not exist. reads until non-ident char is found. */ /* advances past identifier */ static Identifier ident_insert(Identifiers *ids, char **s) { - size_t nslots = arr_len(ids->slots); - if (nslots <= 2*ids->nidents) { - IdentSlot **slots = ids->slots; - /* reserve more space */ - IdentSlot **new_slots = NULL; - size_t new_nslots = nslots * 2 + 10; - arr_set_len(&new_slots, new_nslots); - arr_zero(new_slots); - arr_foreach(slots, IdentSlotPtr, slotp) { - IdentSlot *slot = *slotp; - if (slot) { - char *ptr = slot->text; - U64 new_hash = ident_hash(&ptr); - IdentSlot **new_slot = ident_slots_insert(new_slots, slot->text, new_hash % new_nslots); - *new_slot = slot; - } - } - arr_clear(&slots); - ids->slots = new_slots; - nslots = new_nslots; - } char *original = *s; - U64 hash = ident_hash(s); - IdentSlot **slot = ident_slots_insert(ids->slots, original, hash % arr_len(ids->slots)); - if (!*slot) { - *slot = err_calloc(1, sizeof **slot); - ++ids->nidents; - (*slot)->text = original; - (*slot)->len = (size_t)(*s - original); - } - return *slot; + size_t len = ident_str_len(s); + return (Identifier)str_hash_table_insert_(&ids->table, original, len); } static char *ident_to_str(Identifier i) { char *str = err_malloc(i->len + 1); - /* for some reason, GCC thinks that i->len is -1 when this is called from type_to_str_ (in release mode) */ + /* for some reason, GCC thinks that i->len is -1 when this is called from type_to_str_ (in release mode) + TODO: test this now (some stuff was changed) + */ #if !defined(__clang__) && defined(__GNUC__) #pragma GCC diagnostic push @@ -134,7 +104,7 @@ static char *ident_to_str(Identifier i) { #pragma GCC diagnostic ignored "-Wrestrict" #pragma GCC diagnostic ignored "-Warray-bounds" #endif - memcpy(str, i->text, i->len); + memcpy(str, i->str, i->len); #if !defined(__clang__) && defined(__GNUC__) @@ -146,7 +116,7 @@ static char *ident_to_str(Identifier i) { static void fprint_ident(FILE *out, Identifier id) { - fwrite(id->text, 1, id->len, out); + fwrite(id->str, 1, id->len, out); } static void fprint_ident_debug(FILE *out, Identifier id) { @@ -174,7 +144,7 @@ static void fprint_ident_reduced_charset(FILE *out, Identifier id) { fprintf(out, "a%p__",(void *)id); return; } - for (char *s = id->text; is_ident(*s); ++s) { + for (const char *s = id->str; is_ident(*s); ++s) { int c = (unsigned char)(*s); if (c > 127) { fprintf(out, "x__%x", c); @@ -187,14 +157,13 @@ static void fprint_ident_reduced_charset(FILE *out, Identifier id) { /* NULL = no such identifier. returns identifier "foo" for both "foo\0" and "foo+92384324..." */ static Identifier ident_get(Identifiers *ids, char *s) { char *ptr = s; - U64 hash = ident_hash(&ptr); - IdentSlot **slot = ident_slots_insert(ids->slots, s, hash % arr_len(ids->slots)); - return *slot; + size_t len = ident_str_len(&ptr); + return (Identifier)str_hash_table_get_(&ids->table, s, len); } static Identifier ident_translate(Identifier i, Identifiers *to_idents) { if (!i || i->anonymous) return NULL; - Identifier new_ident = ident_get(to_idents, i->text); + Identifier new_ident = ident_get(to_idents, i->str); return new_ident; } @@ -213,16 +182,15 @@ static IdentDecl *ident_decl(Identifier i) { /* returns true if i and j are equal, even if they're not in the same table */ static bool ident_eq(Identifier i, Identifier j) { - return ident_str_eq_str(i->text, j->text); + return i->len == j->len && memcmp(i->str, j->str, i->len) == 0; } static void idents_free(Identifiers *ids) { - arr_foreach(ids->slots, IdentSlotPtr, slotp) { - IdentSlot *slot = *slotp; + arr_foreach(ids->table.slots, StrHashTableSlotPtr, slotp) { + IdentSlot *slot = *(IdentSlot **)slotp; if (slot) arr_clear(&slot->decls); - free(slot); } - arr_clear(&ids->slots); + str_hash_table_free(&ids->table); } #ifdef TOC_DEBUG diff --git a/misc.c b/misc.c index 0c817c1..3bd06c6 100644 --- a/misc.c +++ b/misc.c @@ -53,3 +53,5 @@ static U32 rand_u32(U32 seed) { return (U32)((seed64 * 0x832f0fda4e1a8642 + 0x41d49cd5459a2ab4) >> 32); } + + diff --git a/tests.c b/tests.c index 9d82897..54ba6ea 100644 --- a/tests.c +++ b/tests.c @@ -31,6 +31,7 @@ static void test_all(void) { allocr_test(); arr_test(); block_arr_test(); + str_hash_table_test(); idents_test(); binfile_test(); } diff --git a/toc.c b/toc.c index 0627a41..447abac 100644 --- a/toc.c +++ b/toc.c @@ -69,10 +69,10 @@ static inline bool type_is_slicechar(Type *t) { /* utilities */ #include "allocator.c" -#include "arr.c" +#include "misc.c" +#include "data_structures.c" #include "location.c" #include "err.c" -#include "misc.c" #include "blockarr.c" #include "instance_table.c" #include "copy.c" diff --git a/types.h b/types.h index 7ba2f18..36f21ec 100644 --- a/types.h +++ b/types.h @@ -1,7 +1,27 @@ /* - Copyright (C) 2019, 2020 Leo Tenenbaum. - This file is part of toc. toc is distributed under version 3 of the GNU General Public License, without any warranty whatsoever. - You should have received a copy of the GNU General Public License along with toc. If not, see . +toc's types. Note that although these types are in the public domain, +the code which uses them (i.e. most of the rest of toc) is not. + +This is free and unencumbered software released into the public domain. +Anyone is free to copy, modify, publish, use, compile, sell, or +distribute this software, either in source code form or as a compiled +binary, for any purpose, commercial or non-commercial, and by any +means. +In jurisdictions that recognize copyright laws, the author or authors +of this software dedicate any and all copyright interest in the +software to the public domain. We make this dedication for the benefit +of the public at large and to the detriment of our heirs and +successors. We intend this dedication to be an overt act of +relinquishment in perpetuity of all present and future rights to this +software under copyright law. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. +For more information, please refer to */ /* NOTE: make sure you edit copy.c and package.c and cgen_recurse_subexprs/types when you make a change to expression-related types or type-related types in this file! */ @@ -170,23 +190,38 @@ typedef struct IdentDecl { } IdentDecl; typedef struct IdentSlot { + char *str; + size_t len; bool export_name; /* is this identifier's name important? */ bool anonymous; /* is this identifier not part of a tree? */ bool imported; /* was this identifier imported from a package? */ - char *text; /* actual name of the identifier */ - size_t len; /* length of name */ U64 export_id; /* 0 if there's no exported identifier here, otherwise unique positive integer associated with this identifier */ struct Package *from_pkg; struct Package *pkg; /* NULL if this is not associated with a package */ IdentDecl *decls; /* array of declarations of this identifier */ } IdentSlot; +typedef struct { + const char *str; + size_t len; + MaxAlign data[]; +} StrHashTableSlot; + +typedef StrHashTableSlot *StrHashTableSlotPtr; + +typedef struct { + StrHashTableSlot **slots; + Allocator *allocr; + U32 rand_seed; + size_t data_size; + size_t nentries; /* # of filled slots */ +} StrHashTable; + typedef IdentSlot *Identifier; typedef IdentSlot *IdentSlotPtr; typedef struct Identifiers { - IdentSlot **slots; - U64 nidents; + StrHashTable table; U32 rseed; } Identifiers; -- cgit v1.2.3