From 7e1a78361df3c82c8583796fa94905215b103646 Mon Sep 17 00:00:00 2001 From: Leo Tenenbaum Date: Thu, 22 Aug 2019 21:57:24 -0400 Subject: generating function declarations & definitions --- blocks.c | 49 ------------- build.sh | 2 +- cgen.c | 220 ++++++++++++++++++++++++++++++++++++++++++++++++++++++---- identifiers.c | 40 ++++++----- main.c | 2 +- out.c | 16 ++++- parse.c | 2 + test.toc | 13 +++- toc.c | 3 +- 9 files changed, 257 insertions(+), 90 deletions(-) delete mode 100644 blocks.c diff --git a/blocks.c b/blocks.c deleted file mode 100644 index 5c06a6e..0000000 --- a/blocks.c +++ /dev/null @@ -1,49 +0,0 @@ -/* identifies identifiers in this block */ -static bool block_enter(Block *b) { - bool ret = true; - arr_foreach(&b->stmts, Statement, stmt) { - if (stmt->kind == STMT_DECL) { - Declaration *decl = &stmt->decl; - arr_foreach(&decl->idents, Identifier, ident) { - Array *decls = &(*ident)->decls; - if (decls->item_sz) { - /* check that it hasn't been declared in this block */ - IdentDecl *prev = decls->last; - if (prev->scope == b) { - err_print(decl->where, "Re-declaration of identifier in the same block."); - info_print(prev->decl->where, "Previous declaration was here."); - ret = false; - continue; - } - } else { - /* array not initialized yet */ - arr_create(&(*ident)->decls, sizeof(IdentDecl)); - } - IdentDecl *ident_decl = arr_add(decls); - ident_decl->decl = decl; - ident_decl->scope = b; - } - } - } - return ret; -} - -/* de-identifies identifiers in this block */ -static bool block_exit(Block *b) { - /* OPTIM: figure out some way of not re-iterating over everything */ - bool ret = true; - arr_foreach(&b->stmts, Statement, stmt) { - if (stmt->kind == STMT_DECL) { - Declaration *decl = &stmt->decl; - arr_foreach(&decl->idents, Identifier, ident) { - Array *decls = &(*ident)->decls; - assert(decls->item_sz); - IdentDecl *last_decl = decls->last; - if (last_decl->scope == b) - arr_remove_last(decls); /* remove that declaration */ - - } - } - } - return ret; -} diff --git a/build.sh b/build.sh index 9bc24f5..d42acc5 100755 --- a/build.sh +++ b/build.sh @@ -1,2 +1,2 @@ #!/bin/bash -gcc -o toc main.c -O0 -g -o toc -Wall -Wextra -Wpedantic -Wconversion -Wshadow -std=c11 || exit 1 +gcc -o toc main.c -O0 -g -o toc -Wall -Wextra -Wpedantic -Wconversion -Wshadow -Wno-unused-function -Wno-unused-parameter -std=c11 || exit 1 diff --git a/cgen.c b/cgen.c index 831a22e..03113b5 100644 --- a/cgen.c +++ b/cgen.c @@ -1,7 +1,8 @@ /* the generation of C code */ - +/* TODO: check ferror */ typedef struct { FILE *out; + unsigned long anon_fn_count; } CGenerator; @@ -43,15 +44,50 @@ static void cgen_write_line_comment(CGenerator *g, const char *fmt, ...) { cgen_write(g, " */\n"); } -static void cgen_write_ident(CGenerator *g, Identifier i) { +static void cgen_create(CGenerator *g, FILE *out) { + g->out = out; + g->anon_fn_count = 0; +} + +static void cgen_ident(CGenerator *g, Identifier i) { ident_fprint(g->out, i); } -static void cgen_create(CGenerator *g, FILE *out) { - g->out = out; +static const char *builtin_type_to_str(BuiltinType b) { + /* TODO: make this return int/long/etc. if stdint.h is not available */ + switch (b) { + case BUILTIN_INT: return "int64_t"; + case BUILTIN_I8: return "int8_t"; + case BUILTIN_I16: return "int16_t"; + case BUILTIN_I32: return "int32_t"; + case BUILTIN_I64: return "int64_t"; + case BUILTIN_U8: return "uint8_t"; + case BUILTIN_U16: return "uint16_t"; + case BUILTIN_U32: return "uint32_t"; + case BUILTIN_U64: return "uint64_t"; + case BUILTIN_FLOAT: return "float"; + case BUILTIN_F32: return "float"; + case BUILTIN_F64: return "double"; + case BUILTIN_TYPE_COUNT: break; + } + assert(0); + return NULL; } -static bool expr_cgen(Expression *e, CGenerator *g) { +/* NOTE: this will eventually be split into two functions when functions/arrays are added */ +static bool cgen_type(CGenerator *g, Type *t) { + switch (t->kind) { + case TYPE_VOID: + cgen_write(g, "void"); + break; + case TYPE_BUILTIN: + cgen_write(g, "%s", builtin_type_to_str(t->builtin)); + break; + } + return true; +} + +static bool cgen_expr(CGenerator *g, Expression *e) { switch (e->kind) { case EXPR_INT_LITERAL: cgen_write(g, "%lld", e->intl); @@ -71,11 +107,11 @@ static bool expr_cgen(Expression *e, CGenerator *g) { break; case EXPR_IDENT: /* TODO: check if declared */ - cgen_write_ident(g, e->ident); + cgen_ident(g, e->ident); break; case EXPR_BINARY_OP: cgen_write(g, "("); - expr_cgen(e->binary.lhs, g); + cgen_expr(g, e->binary.lhs); switch (e->binary.op) { case BINARY_PLUS: cgen_write(g, "+"); @@ -84,7 +120,7 @@ static bool expr_cgen(Expression *e, CGenerator *g) { cgen_write(g, "-"); break; } - expr_cgen(e->binary.rhs, g); + cgen_expr(g, e->binary.rhs); cgen_write(g, ")"); break; case EXPR_UNARY_OP: @@ -94,33 +130,189 @@ static bool expr_cgen(Expression *e, CGenerator *g) { cgen_write(g, "-"); break; } - expr_cgen(e->unary.of, g); + cgen_expr(g, e->unary.of); cgen_write(g, ")"); break; + case EXPR_FN: + err_print(e->where, "Function expression not part of declaration or call."); + return false; } return true; } -static bool stmt_cgen(Statement *s, CGenerator *g) { +/* b = NULL => file */ +static bool cgen_block_enter(Array stmts, Block *b) { + bool ret = true; + + arr_foreach(&stmts, Statement, stmt) { + if (stmt->kind == STMT_DECL) { + Declaration *decl = &stmt->decl; + arr_foreach(&decl->idents, Identifier, ident) { + Array *decls = &(*ident)->decls; + if (decls->item_sz) { + /* check that it hasn't been declared in this block */ + IdentDecl *prev = decls->last; + if (prev->scope == b) { + err_print(decl->where, "Re-declaration of identifier in the same block."); + info_print(prev->decl->where, "Previous declaration was here."); + ret = false; + continue; + } + } else { + /* array not initialized yet */ + arr_create(&(*ident)->decls, sizeof(IdentDecl)); + } + if (infer_decl(decl)) { + IdentDecl *ident_decl = arr_add(decls); + ident_decl->decl = decl; + ident_decl->scope = b; + } else { + ret = false; + } + } + if (decl->expr.kind == EXPR_FN) { + /* TODO */ + } + } + } + return ret; +} + +static bool cgen_block_exit(Array stmts, Block *b) { + /* OPTIM: figure out some way of not re-iterating over everything */ + bool ret = true; + arr_foreach(&stmts, Statement, stmt) { + if (stmt->kind == STMT_DECL) { + Declaration *decl = &stmt->decl; + arr_foreach(&decl->idents, Identifier, ident) { + Array *decls = &(*ident)->decls; + assert(decls->item_sz); + IdentDecl *last_decl = decls->last; + if (last_decl->scope == b) + arr_remove_last(decls); /* remove that declaration */ + + } + } + } + return ret; +} + +static bool cgen_decl(CGenerator *g, Declaration *d) { + /* TODO */ + return true; +} + +static bool cgen_stmt(CGenerator *g, Statement *s) { switch (s->kind) { case STMT_EXPR: - if (!expr_cgen(&s->expr, g)) + if (!cgen_expr(g, &s->expr)) return false; - cgen_write(g, ";\n"); + cgen_writeln(g, ";"); + break; + case STMT_DECL: + return cgen_decl(g, &s->decl); + } + return true; +} + +/* + +because functions can have circular dependencies, we need two passes: +one declares the functions, and one defines them. + +*/ + +static bool cgen_fns_stmt(CGenerator *g, Statement *s, bool def); + +static bool cgen_fns_expr(CGenerator *g, Expression *e, Identifier fn_name, bool def) { + switch (e->kind) { + case EXPR_FN: { + bool ret = true; + FnExpr *f = &e->fn; + cgen_type(g, &f->ret_type); + if (!def) { + /* get id for function */ + if (fn_name) { + f->id = fn_name->c_fn_reps++; + } else { + f->id = g->anon_fn_count++; + } + } + cgen_write(g, " "); + if (fn_name) { + cgen_ident(g, fn_name); + } else { + cgen_write(g, "a__"); + } + if (f->id != 0) + cgen_write(g, "%lu", f->id); + cgen_write(g, "("); + arr_foreach(&f->params, Param, p) { + if (p != f->params.data) + cgen_write(g, ", "); + cgen_type(g, &p->type); + cgen_write(g, " "); + cgen_ident(g, p->name); + } + cgen_write(g, ")"); + if (def) { + cgen_writeln(g, " {"); + arr_foreach(&f->body.stmts, Statement, s) { + if (!cgen_stmt(g, s)) ret = false; + } + cgen_writeln(g, "}"); + } else { + cgen_writeln(g, ";"); + } + + arr_foreach(&f->body.stmts, Statement, s) { + if (!cgen_fns_stmt(g, s, def)) + ret = false; + } + return ret; + } + case EXPR_CALL: + cgen_fns_expr(g, e->call.fn, NULL, def); break; + default: break; + } + return true; +} + +static bool cgen_fns_stmt(CGenerator *g, Statement *s, bool def) { + switch (s->kind) { + case STMT_EXPR: + if (!cgen_fns_expr(g, &s->expr, NULL, def)) return false; + break; + case STMT_DECL: + if (s->decl.flags & DECL_FLAG_HAS_EXPR) { + if (!cgen_fns_expr(g, &s->decl.expr, *(Identifier*)s->decl.idents.data, def)) + return false; + } + break; + + } + return true; +} + +static bool cgen_fns(ParsedFile *f, CGenerator *g, bool def) { + arr_foreach(&f->stmts, Statement, s) { + cgen_fns_stmt(g, s, def); } return true; } -static bool file_cgen(ParsedFile *f, CGenerator *g) { +static bool cgen_file(CGenerator *g, ParsedFile *f) { cgen_write_line_comment(g, "toc"); bool ret = true; + if (!cgen_fns(f, g, false)) return false; + if (!cgen_fns(f, g, true)) return false; arr_foreach(&f->stmts, Statement, stmt) { if (stmt->kind == STMT_EXPR) { /* TODO: eventually make this an error / compile-time statement */ warn_print(stmt->where, "Expression statement at top level."); } - if (!stmt_cgen(stmt, g)) + if (!cgen_stmt(g, stmt)) ret = false; } return ret; diff --git a/identifiers.c b/identifiers.c index 30fd755..0ef5fe0 100644 --- a/identifiers.c +++ b/identifiers.c @@ -1,4 +1,25 @@ +typedef struct { + struct Block *scope; /* NULL for file scope */ + struct Declaration *decl; +} IdentDecl; + +/* OPTIM: This is not ideal. There should be one dynamic array of tree nodes. */ +typedef struct IdentTree { + /* zero value is an empty trie */ + long id; + int len; /* length of identifier = depth in tree */ + struct IdentTree *children; + struct IdentTree *parent; + Array decls; /* array of declarations of this identifier */ + unsigned long c_fn_reps; /* number of repetitions of this identifier in the C output--only used for functions */ +} IdentTree; + +typedef IdentTree *Identifier; + +static IdentTree ident_base_tree; +static long ident_curr_id; /* NOTE: you should eventually add something to reset this */ static char identifier_chars[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_."; + #define NIDENTIFIER_CHARS ((int)((sizeof identifier_chars) - 1)) /* -1 for null char */ /* returns -1 if c is not a valid identifier character, its index in identifier_chars otherwise */ @@ -25,25 +46,6 @@ static int isidentstart(int c) { return isident(c); } -typedef struct { - struct Block *scope; /* NULL for file scope */ - struct Declaration *decl; -} IdentDecl; - -typedef struct IdentTree { - /* zero value is an empty trie */ - long id; - int len; /* length of identifier = depth in tree */ - struct IdentTree *children; - struct IdentTree *parent; - Array decls; /* array of declarations of this identifier */ -} IdentTree; - -typedef IdentTree *Identifier; - -static IdentTree ident_base_tree; -static long ident_curr_id; /* NOTE: you should eventually add something to reset this */ - /* moves s to the char after the identifier */ static Identifier ident_tree_insert(IdentTree *t, char **s) { while (1) { diff --git a/main.c b/main.c index 3f903d4..f2e160f 100644 --- a/main.c +++ b/main.c @@ -60,7 +60,7 @@ int main(int argc, char **argv) { FILE *out = fopen(out_filename, "w"); CGenerator cgen; cgen_create(&cgen, out); - if (!file_cgen(&f, &cgen)) { + if (!cgen_file(&cgen, &f)) { err_fprint(TEXT_IMPORTANT("Errors occured while generating C code.\n")); return EXIT_FAILURE; } diff --git a/out.c b/out.c index 3260a0d..ee35fb9 100644 --- a/out.c +++ b/out.c @@ -1,3 +1,15 @@ /* toc */ -"\x48\x65\x6c\x6c\x6f\x20\x74\x68\x65\x72\x65"; -((5+(((3-34)-134)-(-(-(-(-(-(-(-164423)))))))))+(235443-(-543))); +void xasfd(); +void a__(); +void a__1(int64_t y); +double foo(int64_t x, int64_t y, uint64_t z); +void xasfd() { +} +void a__() { +} +void a__1(int64_t y) { +} +double foo(int64_t x, int64_t y, uint64_t z) { +(3+5); +(5+6); +} diff --git a/parse.c b/parse.c index e8e557a..3aad657 100644 --- a/parse.c +++ b/parse.c @@ -1,3 +1,4 @@ +/* TODO: stmt_parse -> parse_stmt, etc. */ typedef enum { TYPE_VOID, TYPE_BUILTIN @@ -41,6 +42,7 @@ typedef struct { Array params; Type ret_type; Block body; + unsigned long id; /* this is used to keep track of local vs global/other local functions (there might be multiple functions called "foo") */ } FnExpr; /* an expression such as fn(x: int) int {return 2 * x;} */ typedef enum { diff --git a/test.toc b/test.toc index 71d3b0a..cec1a43 100644 --- a/test.toc +++ b/test.toc @@ -1,2 +1,11 @@ -"Hello there"; -5+3-34-134--------164423+235443--543; \ No newline at end of file +xasfd @ int = fn () { + x @= (fn( ) { + x @= (fn(y: int) { + })(3); + })(); +}; + +foo @= fn (x: int, y: int, z: u64) f64 { + 3+5; + 5+6; +}; diff --git a/toc.c b/toc.c index e89d824..a767e4a 100644 --- a/toc.c +++ b/toc.c @@ -1,5 +1,4 @@ /* Includes all of toc's files */ - #include #include #include @@ -15,5 +14,5 @@ #include "identifiers.c" #include "tokenizer.c" #include "parse.c" -#include "blocks.c" +#include "infer.c" #include "cgen.c" -- cgit v1.2.3