From 99185ed75bba6c14a996ccf5b6b1b98b823eac0f Mon Sep 17 00:00:00 2001 From: Leo Tenenbaum Date: Sun, 10 Nov 2019 11:38:32 -0500 Subject: wrote value hash table for compile time arguments --- abbrevs.txt | 3 +- build.sh | 4 +- cgen.c | 39 ++++++------ decls_cgen.c | 16 ++--- eval.c | 31 ++++++++-- hash_tables.c | 181 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ main.c | 2 + rand.c | 4 ++ test.toc | 10 +++- toc.c | 27 +++++---- typedefs_cgen.c | 2 +- types.h | 13 ++++ 12 files changed, 287 insertions(+), 45 deletions(-) create mode 100644 hash_tables.c create mode 100644 rand.c diff --git a/abbrevs.txt b/abbrevs.txt index 5db40aa..96d41ae 100644 --- a/abbrevs.txt +++ b/abbrevs.txt @@ -1,6 +1,7 @@ allocr - allocator anon - anonymous arg - argument +cap - capacity ctx - context decl - declaration del - delete @@ -13,6 +14,7 @@ expr - expression fn - function ident - identifier kw - keyword +len - length mut - mutable num - number op - operator @@ -21,6 +23,5 @@ ptr - pointer ret - return stmt - statement str - string -tdecl - type declaration tokr - tokenizer val - value diff --git a/build.sh b/build.sh index 865313a..6754870 100755 --- a/build.sh +++ b/build.sh @@ -22,8 +22,8 @@ else WARNINGS='-Wall -Wextra -Wpedantic -Wshadow -Wconversion -Wno-pointer-to-int-cast -Wno-unused-parameter' fi -DEBUG_FLAGS="-O0 -g3 $WARNINGS -std=c11 -DTOC_DEBUG" -RELEASE_FLAGS="-O3 -s -DNDEBUG $WARNINGS -std=c11" +DEBUG_FLAGS="-O0 -g3 $WARNINGS -std=c11 -DTOC_DEBUG -lm" +RELEASE_FLAGS="-O3 -s -DNDEBUG $WARNINGS -std=c11 -lm" if [ "$1" = "release" ]; then FLAGS="$RELEASE_FLAGS $ADDITIONAL_FLAGS" diff --git a/cgen.c b/cgen.c index 60346fd..575bdc7 100644 --- a/cgen.c +++ b/cgen.c @@ -1,3 +1,15 @@ + +static void cgen_create(CGenerator *g, FILE *out, Identifiers *ids, Evaluator *ev) { + g->outc = out; + g->ident_counter = 1; /* some places use 0 to mean no id */ + g->main_ident = ident_get(ids, "main"); + g->evalr = ev; + g->will_indent = true; + g->indent_lvl = 0; + g->anon_fns = NULL; + g->idents = ids; +} + static bool cgen_stmt(CGenerator *g, Statement *s); #define CGEN_BLOCK_NOENTER 0x01 /* should cgen_block actually enter and exit the block? */ #define CGEN_BLOCK_NOBRACES 0x02 /* should it use braces? */ @@ -109,16 +121,12 @@ static bool cgen_defs_block(CGenerator *g, Block *b); break; \ } - -static void cgen_create(CGenerator *g, FILE *out, Identifiers *ids, Evaluator *ev) { - g->outc = out; - g->ident_counter = 1; /* some places use 0 to mean no id */ - g->main_ident = ident_get(ids, "main"); - g->evalr = ev; - g->will_indent = true; - g->indent_lvl = 0; - g->anon_fns = NULL; - g->idents = ids; +static inline bool fn_has_any_const_params(FnExpr *f) { + arr_foreach(f->params, Declaration, param) { + if (param->flags & DECL_IS_CONST) + return true; + } + return false; } static bool cgen_block_enter(CGenerator *g, Block *b) { @@ -1571,11 +1579,7 @@ static bool cgen_val_pre(CGenerator *g, Value *v, Type *t, Location where) { /* generates a value fit for use as an initializer */ static bool cgen_val(CGenerator *g, Value *v, Type *t, Location where) { - /* - Because Value is a union, a pointer to v works as a pointer to any member. - As a result, this function is only needed for type checking. - */ - return cgen_val_ptr(g, v, t, where); + return cgen_val_ptr(g, val_get_ptr(v, t), t, where); } @@ -1738,8 +1742,9 @@ static bool cgen_defs_expr(CGenerator *g, Expression *e) { static bool cgen_defs_decl(CGenerator *g, Declaration *d) { if (cgen_fn_is_direct(g, d)) { - if (!cgen_fn(g, &d->expr.fn, d->where)) - return false; + if (!fn_has_any_const_params(&d->expr.fn)) + if (!cgen_fn(g, &d->expr.fn, d->where)) + return false; if (!cgen_defs_block(g, &d->expr.fn.body)) return false; } else { diff --git a/decls_cgen.c b/decls_cgen.c index 3a9cdf0..190c004 100644 --- a/decls_cgen.c +++ b/decls_cgen.c @@ -43,16 +43,18 @@ static bool cgen_decls_block(CGenerator *g, Block *b) { static bool cgen_decls_decl(CGenerator *g, Declaration *d) { if (cgen_fn_is_direct(g, d)) { - d->expr.fn.c.name = d->idents[0]; - fn_enter(&d->expr.fn, 0); - if (!cgen_fn_header(g, &d->expr.fn, d->where)) - return false; - cgen_write(g, ";"); - cgen_nl(g); + if (!fn_has_any_const_params(&d->expr.fn)) { + d->expr.fn.c.name = d->idents[0]; + fn_enter(&d->expr.fn, 0); + if (!cgen_fn_header(g, &d->expr.fn, d->where)) + return false; + cgen_write(g, ";"); + cgen_nl(g); + } if (!cgen_decls_block(g, &d->expr.fn.body)) return false; fn_exit(&d->expr.fn); - } else if (d->flags & DECL_HAS_EXPR) { + } else if ((d->flags & DECL_HAS_EXPR) && !(d->flags & DECL_IS_CONST)) { if (!cgen_decls_expr(g, &d->expr)) return false; } diff --git a/eval.c b/eval.c index e03fe7c..61a56de 100644 --- a/eval.c +++ b/eval.c @@ -233,6 +233,29 @@ static void u64_to_val(Value *v, BuiltinType v_type, U64 x) { i64_to_val(v, v_type, (I64)x); } +/* rerturns a pointer to the underlying data of v, e.g. an I64 * if t is the builtin BUILTIN_I64 */ +static void *val_get_ptr(Value *v, Type *t) { + switch (t->kind) { + case TYPE_PTR: + case TYPE_BUILTIN: + case TYPE_TUPLE: + case TYPE_VOID: + case TYPE_UNKNOWN: + case TYPE_FN: + case TYPE_SLICE: + case TYPE_TYPE: + return v; + case TYPE_USER: + return val_get_ptr(v, type_user_underlying(t)); + case TYPE_ARR: + return v->arr; + case TYPE_STRUCT: + return v->struc; + } + assert(0); + return NULL; +} + static void fprint_val_ptr(FILE *f, void *p, Type *t) { switch (t->kind) { case TYPE_VOID: @@ -269,7 +292,7 @@ static void fprint_val_ptr(FILE *f, void *p, Type *t) { if (n > 5) n = 5; for (size_t i = 0; i < n; i++) { if (i) fprintf(f, ", "); - fprint_val_ptr(f, *(char **)p + i * compiler_sizeof(t->arr.of), t->arr.of); + fprint_val_ptr(f, (char *)p + i * compiler_sizeof(t->arr.of), t->arr.of); } if (t->arr.n > n) { fprintf(f, ", ..."); @@ -306,7 +329,7 @@ static void fprint_val_ptr(FILE *f, void *p, Type *t) { fprintf(f, ", "); fprint_ident(f, fi->name); fprintf(f, ": "); - fprint_val_ptr(f, *(char **)p + fi->offset, fi->type); + fprint_val_ptr(f, (char *)p + fi->offset, fi->type); } fprintf(f, "]"); break; @@ -321,7 +344,7 @@ static void fprint_val(FILE *f, Value v, Type *t) { } fprintf(f, ")"); } else { - fprint_val_ptr(f, &v, t); + fprint_val_ptr(f, val_get_ptr(&v, t), t); } } @@ -708,7 +731,7 @@ static void eval_deref_set(void *set, Value *to, Type *type) { static bool eval_val_ptr_at_index(Evaluator *ev, Location where, Value *arr, U64 i, Type *arr_type, Type *idx_type, void **ptr, Type **type) { switch (arr_type->kind) { case TYPE_ARR: { - U64 arr_sz = arr_type->arr.n; + U64 arr_sz = (U64)arr_type->arr.n; if (i >= arr_sz) { err_print(where, "Array out of bounds (%lu, array size = %lu)\n", (unsigned long)i, (unsigned long)arr_sz); return false; diff --git a/hash_tables.c b/hash_tables.c new file mode 100644 index 0000000..15121b0 --- /dev/null +++ b/hash_tables.c @@ -0,0 +1,181 @@ +/* +hash tables are initialized by setting them to {0}, e.g. +HashTable x = {0}; +*/ +static void *val_get_ptr(Value *v, Type *t); +static U64 val_hash(Value *v, Type *t); +static size_t compiler_sizeof(Type *t); + +static U64 f32_hash(F32 f) { + /* OPTIM */ + U64 hash = 0; + if (f < 0) { + hash = 0x9a6db29edcba8af4; + f = -f; + } + F32 last = f; + int exponent = 0; + while (f > 1) { + exponent++; + f /= 10; + if (f == last) { + /* +/- infinity probably */ + hash ^= 0x78bf61a81e80b9f2; + return hash; + } + last = f; + } + for (int i = 0; i < F32_MANT_DIG; i++) { + f *= 10; + } + hash ^= (U64)exponent + (U64)F32_DIG * (U64)f; + return hash; +} + +static U64 f64_hash(F64 f) { + /* OPTIM */ + U64 hash = 0; + if (f < 0) { + hash = 0x9a6db29edcba8af4; + f = -f; + } + F64 last = f; + int exponent = 0; + while (f > 1) { + exponent++; + f /= 10; + if (f == last) { + /* +/- infinity probably */ + hash ^= 0x78bf61a81e80b9f2; + return hash; + } + last = f; + } + for (int i = 0; i < F64_MANT_DIG; i++) { + f *= 10; + } + hash ^= (U64)exponent + (U64)F64_DIG * (U64)f; + return hash; +} + +/* Note that for these value hashing functions, values of different types may collide */ +static U64 val_ptr_hash(void *v, Type *t) { + switch (t->kind) { + case TYPE_VOID: return 0; + case TYPE_UNKNOWN: return 0; + case TYPE_BUILTIN: + switch (t->builtin) { + case BUILTIN_I8: return (U64)*(I8 *)v; + case BUILTIN_U8: return (U64)*(U8 *)v; + case BUILTIN_I16: return (U64)*(I16 *)v; + case BUILTIN_U16: return (U64)*(U16 *)v; + case BUILTIN_I32: return (U64)*(I32 *)v; + case BUILTIN_U32: return (U64)*(U32 *)v; + case BUILTIN_I64: return (U64)*(I64 *)v; + case BUILTIN_U64: return (U64)*(U64 *)v; + case BUILTIN_F32: return f32_hash(*(F32 *)v); + case BUILTIN_F64: return f64_hash(*(F64 *)v); + case BUILTIN_CHAR: return (U64)*(char *)v; + case BUILTIN_BOOL: return (U64)*(bool *)v; + } + assert(0); + return 0; + case TYPE_FN: return (U64)*(FnExpr **)v; + case TYPE_TUPLE: { + U64 hash = 0; + Value *elems = *(Value **)v; + U32 x = 1; + for (U64 i = 0; i < (U64)arr_len(t->tuple); i++) { + hash += (U64)x * val_hash(elems + i, &t->tuple[i]); + x = rand_u32(x); + } + return hash; + } + case TYPE_PTR: return (U64)*(void **)v; + case TYPE_TYPE: return (U64)*(Type **)v; + case TYPE_USER: return val_ptr_hash(v, type_inner(t)); + case TYPE_ARR: { + U32 x = 1; + U64 hash = 0; + U64 size = (U64)compiler_sizeof(t->arr.of); + for (U64 i = 0; i < (U64)t->arr.n; i++) { + hash += (U64)x * val_ptr_hash((char *)v + i * size, t->arr.of); + x = rand_u32(x); + } + return hash; + } + case TYPE_SLICE: { + U32 x = 1; + U64 hash = 0; + Slice *s = v; + U64 size = (U64)compiler_sizeof(t->slice); + for (U64 i = 0; i < (U64)s->n; i++) { + hash += (U64)x * val_ptr_hash((char *)s->data + i * size, t->slice); + x = rand_u32(x); + } + return hash; + } + case TYPE_STRUCT: { + U32 x = 1; + U64 hash = 0; + arr_foreach(t->struc.fields, Field, f) { + hash += (U64)x * val_ptr_hash((char *)v + f->offset, f->type); + x = rand_u32(x); + } + return hash; + } + } + assert(0); + return 0; +} + +static U64 val_hash(Value *v, Type *t) { + return val_ptr_hash(val_get_ptr(v, t), t); +} + +static bool val_eq(Value *u, Value *v, Type *t) { + /* TODO */ + return false; +} + +/* +for a value hash table, you must either ALWAYS or NEVER use an allocator +all values in the hash table must have the same type. +returns true iff the value was already present +*/ +static bool val_hash_table_adda(Allocator *a, HashTable *h, Value *v, Type *t) { + if (h->n * 2 >= h->cap) { + U64 new_cap = h->cap * 2 + 2; + Value *new_data = a ? allocr_malloc(a, new_cap * (U64)sizeof(Value)) + : malloc(new_cap * sizeof(Value)); + h->occupied = a ? allocr_realloc(a, h->occupied, h->cap * (U64)sizeof(bool), + new_cap * (U64)sizeof(bool)) + : realloc(h->occupied, new_cap * sizeof(bool)); + Value *old_data = h->data; + for (size_t i = 0; i < h->n; i++) { + /* re-hash */ + U64 index = val_hash(&old_data[i], t) % new_cap; + while (h->occupied[index]) index++; + new_data[index] = old_data[i]; + h->occupied[index] = true; + } + h->data = new_data; + h->cap = new_cap; + } + Value *vals = h->data; + U64 index = val_hash(v, t) % h->cap; + while (1) { + if (h->occupied[index]) { + if (val_eq(v, &vals[index], t)) + return true; + } else break; + index++; + } + vals[index] = *v; + h->occupied[index] = true; + return false; +} + +static void val_hash_table_add(HashTable *h, Value *v, Type *t) { + val_hash_table_adda(NULL, h, v, t); +} diff --git a/main.c b/main.c index bc803a3..b02b030 100644 --- a/main.c +++ b/main.c @@ -1,9 +1,11 @@ /* TODO: compile-time arguments +double check that val_get_ptr is being used everywhere it should be evaluate default arguments compile-time arguments for out parameter functions compile-time arguments for functions returning tuples +deal with x, y @= fn(x: int, y @ int){} don't allow pointers to functions with compile-time arguments don't allow while {3; 5} (once break is added) any odd number of "s for a string diff --git a/rand.c b/rand.c new file mode 100644 index 0000000..407f860 --- /dev/null +++ b/rand.c @@ -0,0 +1,4 @@ +static U32 rand_u32(U32 seed) { + U64 seed64 = (U64)seed; + return (U32)((seed64 * 0x832f0fda4e1a8642 + 0x41d49cd5459a2ab4) >> 32); +} diff --git a/test.toc b/test.toc index ddd4c04..02811e2 100644 --- a/test.toc +++ b/test.toc @@ -9,8 +9,14 @@ puti @= fn(x: int) { // "); // }; +add @= fn(x: int, y @int) int { + x + y +}; + main @= fn() { - b @= fn(x: int) int { 2*x }; - puti(b(10)); + puti(add(3, 10)); + x @= add(3, 7); + puti(add(4, x)); + puti(add(3, 5)); }; diff --git a/toc.c b/toc.c index bbfce6a..9e4bc94 100644 --- a/toc.c +++ b/toc.c @@ -9,20 +9,10 @@ #include #include #include +#include #include "types.h" -#include "location.c" -#include "err.c" -#include "allocator.c" -#include "arr.c" -#include "blockarr.c" -#include "str.c" -#include "identifiers.c" -#include "tokenizer.c" -#include "parse.c" -#include "scope.c" - static Type *type_user_underlying(Type *t) { assert(t->kind == TYPE_USER); Declaration *d = t->user.decl; @@ -38,6 +28,21 @@ static Type *type_inner(Type *t) { return t; } +#include "rand.c" +#include "location.c" +#include "err.c" +#include "allocator.c" +#include "arr.c" +#include "blockarr.c" +#include "str.c" +#include "hash_tables.c" + +#include "identifiers.c" +#include "tokenizer.c" +#include "parse.c" +#include "scope.c" + + #include "eval.c" #include "types.c" static bool cgen_decls_file(CGenerator *g, ParsedFile *f); diff --git a/typedefs_cgen.c b/typedefs_cgen.c index a068c74..49cd8ec 100644 --- a/typedefs_cgen.c +++ b/typedefs_cgen.c @@ -46,7 +46,7 @@ static bool typedefs_decl(CGenerator *g, Declaration *d) { cgen_nl(g); } } - if (d->flags & DECL_HAS_EXPR) + if ((d->flags & DECL_HAS_EXPR) && !(d->flags & DECL_IS_CONST)) if (!typedefs_expr(g, &d->expr)) return false; return true; diff --git a/types.h b/types.h index fad82e5..ed88db2 100644 --- a/types.h +++ b/types.h @@ -22,8 +22,13 @@ typedef int16_t I16; typedef int32_t I32; typedef int64_t I64; +/* NOTE: if you change these, make sure you change hash_tables.c */ typedef float F32; typedef double F64; +#define F32_MANT_DIG FLT_MANT_DIG +#define F32_DIG FLT_DIG +#define F64_MANT_DIG DBL_MANT_DIG +#define F64_DIG DBL_DIG #define F32_FMT "%.16f" #define F64_FMT "%.16f" @@ -454,6 +459,13 @@ typedef struct EachExpr { }; } EachExpr; +typedef struct { + void *data; + bool *occupied; /* OPTIM: use bits instead of bytes for bools */ + U64 n; + U64 cap; +} HashTable; + typedef struct FnExpr { struct Declaration *params; /* declarations of the parameters to this function */ struct Declaration *ret_decls; /* array of decls, if this has named return values. otherwise, NULL */ @@ -463,6 +475,7 @@ typedef struct FnExpr { /* if name = NULL, this is an anonymous function, and id will be the ID of the fn. */ Identifier name; IdentID id; + HashTable *instances; /* for fns with co */ } c; } FnExpr; /* an expression such as fn(x: int) int { 2 * x } */ -- cgit v1.2.3