diff options
-rw-r--r-- | base_cgen.c | 19 | ||||
-rwxr-xr-x | build.sh | 2 | ||||
-rw-r--r-- | cgen.c | 35 | ||||
-rw-r--r-- | eval.c | 3 | ||||
-rw-r--r-- | main.c | 4 | ||||
-rw-r--r-- | out.c | 20 | ||||
-rw-r--r-- | out.h | 1 | ||||
-rw-r--r-- | parse.c | 162 | ||||
-rwxr-xr-x | rung | 2 | ||||
-rwxr-xr-x | runv | 2 | ||||
-rw-r--r-- | test.toc | 20 | ||||
-rw-r--r-- | types.c | 118 |
12 files changed, 285 insertions, 103 deletions
diff --git a/base_cgen.c b/base_cgen.c index 1ddf83a..8ac73e5 100644 --- a/base_cgen.c +++ b/base_cgen.c @@ -60,14 +60,14 @@ static void cgen_writeln(CGenerator *g, const char *fmt, ...) { g->indent_next = true; } -static void cgen_write_comment(CGenerator *g, const char *fmt, ...) { - cgen_write(g, "/* "); - va_list args; - va_start(args, fmt); - cgen_vwrite(g, fmt, args); - va_end(args); - cgen_write(g, " */"); -} +/* static void cgen_write_comment(CGenerator *g, const char *fmt, ...) { */ +/* cgen_write(g, "/\* "); */ +/* va_list args; */ +/* va_start(args, fmt); */ +/* cgen_vwrite(g, fmt, args); */ +/* va_end(args); */ +/* cgen_write(g, " *\/"); */ +/* } */ static void cgen_write_line_comment(CGenerator *g, const char *fmt, ...) { /* could switch to // for c99 */ @@ -88,7 +88,8 @@ static void cgen_create(CGenerator *g, FILE *c_out, FILE *h_out, const char *h_f g->indent_next = true; g->writing_to = CGEN_WRITING_TO_H; - cgen_write(g, "#include <stdint.h>\n"); + cgen_write(g, "#include <stddef.h>\n" + "#include <stdint.h>\n"); g->writing_to = CGEN_WRITING_TO_C; cgen_write(g, "#include \"%s\"\n", h_filename); @@ -1,3 +1,3 @@ #!/bin/bash CC=gcc -$CC -o toc main.c -O0 -g -Wall -Wextra -Wpedantic -Wconversion -Wshadow -Wno-unused-function -std=c11 || exit 1 +$CC -o toc main.c -O0 -g3 -Wall -Wextra -Wpedantic -Wconversion -Wshadow -std=c11 || exit 1 @@ -32,8 +32,14 @@ static bool cgen_expr(CGenerator *g, Expression *e) { case BINARY_SET: cgen_write(g, "="); break; + case BINARY_AT_INDEX: + cgen_write(g, "["); + break; } if (!cgen_expr(g, e->binary.rhs)) return false; + if (e->binary.op == BINARY_AT_INDEX) { + cgen_write(g, "]"); + } cgen_write(g, ")"); break; case EXPR_UNARY_OP: @@ -67,6 +73,28 @@ static bool cgen_expr(CGenerator *g, Expression *e) { static bool cgen_stmt(CGenerator *g, Statement *s); +static void cgen_zero_value(CGenerator *g, Type *t) { + switch (t->kind) { + case TYPE_VOID: /* we should never need this */ + assert(0); + break; + case TYPE_FN: + cgen_write(g, "NULL"); + break; + case TYPE_ARR: + cgen_write(g, "{"); + cgen_zero_value(g, t->arr.of); + cgen_write(g, "}"); + break; + case TYPE_BUILTIN: + if (type_builtin_is_numerical(t->builtin)) { + cgen_write(g, "0"); + } else { + assert(0); + } + break; + } +} static bool cgen_decl(CGenerator *g, Declaration *d) { arr_foreach(&d->idents, Identifier, ident) { @@ -78,13 +106,16 @@ static bool cgen_decl(CGenerator *g, Declaration *d) { } cgen_ident(g, *ident, NULL); cgen_type_post(g, &d->type); + cgen_write_space(g); + cgen_write(g, "="); if (d->flags & DECL_FLAG_HAS_EXPR) { cgen_write_space(g); - cgen_write(g, "="); - cgen_write_space(g); if (!cgen_expr(g, &d->expr)) { return false; } + } else { + cgen_write_space(g); + cgen_zero_value(g, &d->type); } cgen_write(g, "; "); } @@ -58,6 +58,9 @@ static bool eval_expr_as_int(Expression *e, Integer *i) { case BINARY_SET: err_print(e->where, "Expected operator which returns an integer, but got %s", binary_op_to_str(e->binary.op)); return false; + case BINARY_AT_INDEX: + err_print(e->where, "Cannot get index of array at compile time yet."); + return false; } } break; case EXPR_IDENT: { @@ -1,3 +1,7 @@ +/* TODO: don't allow setting things to void */ +/* TODO: don't eval consts in C */ +/* TODO: Functions returning fixed-length arrays */ +/* TODO: improve error for declaring a keyword, e.g. i8: int = 8123; */ #include "toc.c" int main(int argc, char **argv) { @@ -2,13 +2,19 @@ /* toc */ void main__(void) { - int64_t const N = 5; - int64_t const FOO = 1928; - int64_t const BAR = 5; - int64_t foo[1935]; - int64_t bar[77]; - int64_t x; - (x=13); + void (*bar[3])(void) = {NULL}; + int64_t i = 0; + void (*x)(void) = (bar[i]); + x(); + (i=(i+1)); + (x=(bar[i])); + x(); + (i=(i+1)); + (x=(bar[i])); + x(); + (i=(i+1)); + (x=(bar[i])); + x(); } int main(void) { @@ -1,2 +1,3 @@ +#include <stddef.h> #include <stdint.h> void main__(void); @@ -77,7 +77,8 @@ typedef enum { typedef enum { BINARY_SET, /* e.g. x = y */ BINARY_PLUS, - BINARY_MINUS + BINARY_MINUS, + BINARY_AT_INDEX /* e.g. x[i] */ } BinaryOp; #define EXPR_FLAG_FLEXIBLE 0x01 /* e.g. 4 => float/i32/etc. */ @@ -108,7 +109,7 @@ typedef struct Expression { }; } Expression; -#define DECL_FLAG_INFER_TYPE 0x01 +#define DECL_FLAG_ANNOTATES_TYPE 0x01 #define DECL_FLAG_CONST 0x02 #define DECL_FLAG_HAS_EXPR 0x04 #define DECL_FLAG_FOUND_TYPE 0x08 @@ -151,6 +152,7 @@ static const char *binary_op_to_str(BinaryOp b) { case BINARY_PLUS: return "+"; case BINARY_MINUS: return "-"; case BINARY_SET: return "="; + case BINARY_AT_INDEX: return "[]"; } assert(0); return ""; @@ -268,7 +270,6 @@ static size_t type_to_str(Type *t, char *buffer, size_t bufsize) { /* allocate a new expression. - IMPORTANT: This invalidates all other parser-allocated Expression pointers. */ static Expression *parser_new_expr(Parser *p) { return block_arr_add(&p->exprs); @@ -600,6 +601,8 @@ static bool parse_expr(Parser *p, Expression *e, Token *end) { t->token = end; return true; } + + Token *start = t->token; if (token_is_kw(t->token, KW_FN)) { /* this is a function */ @@ -615,9 +618,10 @@ static bool parse_expr(Parser *p, Expression *e, Token *end) { return true; } - /* Find the lowest-precedence operator not in parentheses/braces */ + /* Find the lowest-precedence operator not in parentheses/braces/square brackets */ int paren_level = 0; int brace_level = 0; + int square_level = 0; int lowest_precedence = NOT_AN_OP; /* e.g. (5+3) */ bool entirely_within_parentheses = token_is_kw(t->token, KW_LPAREN); @@ -634,7 +638,7 @@ static bool parse_expr(Parser *p, Expression *e, Token *end) { entirely_within_parentheses = false; if (paren_level < 0) { t->token = token; - tokr_err(t, "Excessive closing parenthesis."); + tokr_err(t, "Excessive closing )."); t->token = end + 1; return false; } @@ -646,12 +650,22 @@ static bool parse_expr(Parser *p, Expression *e, Token *end) { brace_level--; if (brace_level < 0) { t->token = token; - tokr_err(t, "Excessive closing brace."); + tokr_err(t, "Excessive closing }."); + return false; + } + break; + case KW_LSQUARE: + square_level++; + break; + case KW_RSQUARE: + square_level--; + if (square_level < 0) { + tokr_err(t, "Excessive closing ]."); return false; } break; default: { /* OPTIM: use individual cases for each op */ - if (paren_level == 0 && brace_level == 0) { + if (paren_level == 0 && brace_level == 0 && square_level == 0) { int precedence = op_precedence(token->kw); if (precedence == NOT_AN_OP) break; /* nvm it's not an operator */ if (lowest_precedence == NOT_AN_OP || precedence <= lowest_precedence) { @@ -664,15 +678,19 @@ static bool parse_expr(Parser *p, Expression *e, Token *end) { } } - /* TODO: These errors are bad for functions, since they can be very long, - and this will only point to the end. - */ if (paren_level > 0) { - tokr_err(t, "Too many opening parentheses."); + t->token = start; + tokr_err(t, "Too many opening parentheses (."); return false; } if (brace_level > 0) { - tokr_err(t, "Too many opening braces."); + t->token = start; + tokr_err(t, "Too many opening braces {."); + return false; + } + if (square_level > 0) { + t->token = start; + tokr_err(t, "Too many opening square brackets [."); return false; } @@ -688,8 +706,9 @@ static bool parse_expr(Parser *p, Expression *e, Token *end) { if (lowest_precedence == NOT_AN_OP) { /* function calls, array accesses, etc. */ - /* try a function call */ + /* try a function call or array access */ Token *token = t->token; + /* currently unnecessary: paren_level = square_level = 0; */ /* can't call at start, e.g. in (fn() {})(), it is not the empty function "" being called with fn() {} as an argument @@ -698,47 +717,93 @@ static bool parse_expr(Parser *p, Expression *e, Token *end) { paren_level++; token++; } + /* which opening bracket starts the call/array access */ + Token *opening_bracket = NULL; for (; token < end; token++) { if (token->kind == TOKEN_KW) { - if (token->kw == KW_LPAREN) { - if (paren_level == 0) - break; /* this left parenthesis opens the function call */ + switch (token->kw) { + case KW_LPAREN: + if (square_level == 0 && paren_level == 0) + opening_bracket = token; /* maybe this left parenthesis opens the function call */ paren_level++; - } - if (token->kw == KW_RPAREN) { + break; + case KW_LSQUARE: + if (square_level == 0 && paren_level == 0) + opening_bracket = token; /* ^^ (array access) */ + square_level++; + break; + case KW_RPAREN: paren_level--; + break; + case KW_RSQUARE: + square_level--; + break; + default: break; } + + } else if (token->kind == TOKEN_EOF) { + if (paren_level > 0) { + tokr_err(t, "Unmatched ( parenthesis."); + return false; + } + if (square_level > 0) { + tokr_err(t, "Unmatched [ square bracket."); + return false; + } + break; } } - if (token != t->token && token != end) { - /* it's a function call! */ - e->kind = EXPR_CALL; - e->call.fn = parser_new_expr(p); - if (!parse_expr(p, e->call.fn, token)) { /* parse up to ( as function */ - return false; - } - arr_create(&e->call.args, sizeof(Expression)); - t->token = token + 1; /* move past ( */ - if (!token_is_kw(t->token, KW_RPAREN)) { - /* non-empty arg list */ - while (1) { - if (t->token->kind == TOKEN_EOF) { - tokr_err(t, "Expected argument list to continue."); - return false; - } - Expression *arg = arr_add(&e->call.args); - if (!parse_expr(p, arg, expr_find_end(p, EXPR_END_RPAREN_OR_COMMA))) { - return false; + if (opening_bracket) { + switch (opening_bracket->kw) { + case KW_LPAREN: { + /* it's a function call! */ + e->kind = EXPR_CALL; + e->call.fn = parser_new_expr(p); + if (!parse_expr(p, e->call.fn, opening_bracket)) { /* parse up to ( as function */ + return false; + } + arr_create(&e->call.args, sizeof(Expression)); + t->token = opening_bracket + 1; /* move past ( */ + if (!token_is_kw(t->token, KW_RPAREN)) { + /* non-empty arg list */ + while (1) { + if (t->token->kind == TOKEN_EOF) { + tokr_err(t, "Expected argument list to continue."); + return false; + } + Expression *arg = arr_add(&e->call.args); + if (!parse_expr(p, arg, expr_find_end(p, EXPR_END_RPAREN_OR_COMMA))) { + return false; + } + if (token_is_kw(t->token, KW_RPAREN)) + break; + t->token++; /* move past , */ } - if (token_is_kw(t->token, KW_RPAREN)) - break; - t->token++; /* move past , */ } + t->token++; /* move past ) */ + return true; + } + case KW_LSQUARE: { + /* it's an array access */ + e->kind = EXPR_BINARY_OP; + e->binary.op = BINARY_AT_INDEX; + e->binary.lhs = parser_new_expr(p); + e->binary.rhs = parser_new_expr(p); + /* parse array */ + if (!parse_expr(p, e->binary.lhs, opening_bracket)) return false; + /* parse index */ + t->token = opening_bracket + 1; + Token *index_end = expr_find_end(p, EXPR_END_RSQUARE); + if (!parse_expr(p, e->binary.rhs, index_end)) + return false; + t->token++; /* move past ] */ + return true; + } + default: + assert(0); + return false; } - t->token++; /* move past ) */ - return true; } - /* array accesses, etc. */ tokr_err(t, "Not implemented yet."); return false; } @@ -871,10 +936,8 @@ static bool decl_parse(Declaration *d, Parser *p) { return false; } - if (token_is_kw(t->token, KW_EQ)) { - /* := / @= */ - d->flags |= DECL_FLAG_INFER_TYPE; - } else { + if (!token_is_kw(t->token, KW_EQ)) { + d->flags |= DECL_FLAG_ANNOTATES_TYPE; if (!parse_type(p, &d->type)) { return false; } @@ -1059,6 +1122,9 @@ static void fprint_expr(FILE *out, Expression *e) { case BINARY_SET: fprintf(out, "set"); break; + case BINARY_AT_INDEX: + fprintf(out, "at"); + break; } fprintf(out, "("); fprint_expr(out, e->binary.lhs); @@ -1102,7 +1168,7 @@ static void fprint_decl(FILE *out, Declaration *d) { fprintf(out, "[const]"); } fprintf(out, ":"); - if (!(d->flags & DECL_FLAG_INFER_TYPE)) { + if (d->flags & DECL_FLAG_ANNOTATES_TYPE) { fprint_type(out, &d->type); } if (d->flags & DECL_FLAG_HAS_EXPR) { @@ -0,0 +1,2 @@ +#!/bin/bash +gdb toc
\ No newline at end of file @@ -0,0 +1,2 @@ +#!/bin/bash +valgrind -q --track-origins=yes ./toc test.toc @@ -1,9 +1,15 @@ main @= fn() { - N @= 5; - FOO @= 1928; - BAR @= 5; - foo : [N+FOO-3--BAR]int; - bar : [77]int; - x : int; - x = 13; + bar : [3]fn(); + i := 0; + x := bar[i]; + x(); + i = i + 1; + x = bar[i]; + x(); + i = i + 1; + x = bar[i]; + x(); + i = i + 1; + x = bar[i]; + x(); }; @@ -99,6 +99,39 @@ static bool type_must_eq(Location where, Type *expected, Type *got) { return true; } +/* Prints an error and returns false if the given expression is not an l-value */ +static bool expr_must_lval(Expression *e) { + switch (e->kind) { + case EXPR_IDENT: { + IdentDecl *id_decl = ident_decl(e->ident); + if (!id_decl) + err_print(e->where, "Undeclared identifier."); + Declaration *d = id_decl->decl; + if (d->flags & DECL_FLAG_CONST) { + char *istr = ident_to_str(e->ident); + err_print(e->where, "Use of constant %s as a non-constant expression.", istr); + info_print(d->where, "%s was declared here.", istr); + return false; + } + + return true; + } + default: + err_print(e->where, "Cannot assign to non-lvalue."); + return false; + } +} + +static bool type_of_expr(Expression *e, Type *t); +static bool type_of_decl(Declaration *d, Type *t) { + if (d->flags & DECL_FLAG_ANNOTATES_TYPE) { + *t = d->type; + return true; + } else { + return type_of_expr(&d->expr, t); + } +} + /* NOTE: this does descend into un/binary ops, etc. but NOT into functions */ static bool type_of_expr(Expression *e, Type *t) { t->flags = 0; @@ -131,31 +164,36 @@ static bool type_of_expr(Expression *e, Type *t) { char *s = ident_to_str(e->ident); err_print(e->where, "Undeclared identifier: %s", s); free(s); + return false; } Declaration *d = decl->decl; - /* TODO: Check self-referential declarations */ + /* TODO: Check self-referential declarations but allow f @= fn() { foo := f; foo(); } */ if (d->where.code > e->where.code) { char *s = ident_to_str(e->ident); err_print(e->where, "Use of identifier %s before its declaration.", s); info_print(d->where, "%s will be declared here.", s); free(s); + return false; } - *t = d->type; + if (!type_of_decl(d, t)) return false; + } break; case EXPR_CALL: { Expression *f = e->call.fn; Type fn_type; if (f->kind == EXPR_IDENT) { /* allow calling a function before declaring it */ - IdentDecl *decl = ident_decl(f->ident); - if (!decl) { + IdentDecl *id_decl = ident_decl(f->ident); + if (!id_decl) { char *s = ident_to_str(e->ident); err_print(e->where, "Undeclared identifier: %s", s); free(s); } - if (!type_of_expr(&decl->decl->expr, &fn_type)) return false; + Declaration *d = id_decl->decl; + if (!type_of_decl(d, &fn_type)) return false; + } else { - if (!type_of_expr(e->call.fn, &fn_type)) return false; + if (!type_of_expr(f, &fn_type)) return false; } if (fn_type.kind != TYPE_FN) { char type[128]; @@ -183,27 +221,33 @@ static bool type_of_expr(Expression *e, Type *t) { } } break; case EXPR_BINARY_OP: { + Type *lhs_type = &e->binary.lhs->type; + Type *rhs_type = &e->binary.rhs->type; + if (!type_of_expr(e->binary.lhs, lhs_type) + || !type_of_expr(e->binary.rhs, rhs_type)) + return false; switch (e->binary.op) { + case BINARY_SET: + if (!expr_must_lval(e->binary.lhs)) return false; + /* fallthrough */ case BINARY_PLUS: - case BINARY_MINUS: - case BINARY_SET: { - Type *lhs_type = &e->binary.lhs->type; - Type *rhs_type = &e->binary.rhs->type; - if (!type_of_expr(e->binary.lhs, lhs_type) - || !type_of_expr(e->binary.rhs, rhs_type)) - return false; + case BINARY_MINUS: { bool match = true; - if (lhs_type->kind != rhs_type->kind) { - match = false; - } else if (lhs_type->kind != TYPE_BUILTIN) { - match = false; - } else if (!type_builtin_is_numerical(lhs_type->builtin) || !type_builtin_is_numerical(rhs_type->builtin)) { - match = false; - } else { + if (e->binary.op != BINARY_SET) { + /* numerical binary ops */ + if (lhs_type->kind != rhs_type->kind) { + match = false; + } else if (lhs_type->kind != TYPE_BUILTIN) { + match = false; + } else if (!type_builtin_is_numerical(lhs_type->builtin) || !type_builtin_is_numerical(rhs_type->builtin)) { + match = false; + } + } + if (match) { if (e->binary.op == BINARY_SET) { /* type of x = y is always void */ t->kind = TYPE_VOID; - return true; + break; } int lhs_is_flexible = lhs_type->flags & TYPE_FLAG_FLEXIBLE; int rhs_is_flexible = rhs_type->flags & TYPE_FLAG_FLEXIBLE; @@ -218,6 +262,8 @@ static bool type_of_expr(Expression *e, Type *t) { *t = *lhs_type; else *t = *rhs_type; + } else { + match = false; } } if (!match) { @@ -228,8 +274,20 @@ static bool type_of_expr(Expression *e, Type *t) { err_print(e->where, "Mismatched types to operator %s: %s and %s", op, s1, s2); return false; } - return true; + break; } + case BINARY_AT_INDEX: + /* TODO(eventually): support non-builtin numerical (or even perhaps non-numerical) indices */ + if (rhs_type->kind != TYPE_BUILTIN || !type_builtin_is_numerical(rhs_type->builtin)) { + err_print(e->where, "The index of an array must be a builtin numerical type."); + return false; + } + if (lhs_type->kind != TYPE_ARR) { + err_print(e->where, "Trying to take index of non-array."); + return false; + } + *t = *lhs_type->arr.of; + break; } } break; } @@ -265,11 +323,11 @@ static bool types_stmt(Statement *s); static bool types_block(Block *b) { bool ret = true; - block_enter(b); + if (!block_enter(b)) return false; arr_foreach(&b->stmts, Statement, s) { if (!types_stmt(s)) ret = false; } - block_exit(b); + if (!block_exit(b)) return false; return ret; } @@ -294,18 +352,20 @@ static bool types_expr(Expression *e) { static bool types_decl(Declaration *d) { if (d->flags & DECL_FLAG_FOUND_TYPE) return true; - if (!(d->flags & DECL_FLAG_INFER_TYPE)) { + if (d->flags & DECL_FLAG_ANNOTATES_TYPE) { /* type supplied */ if (!type_resolve(&d->type)) return false; } if (d->flags & DECL_FLAG_HAS_EXPR) { - if (!types_expr(&d->expr)) return false; - if (d->flags & DECL_FLAG_INFER_TYPE) { - d->type = d->expr.type; - } else { + if (!types_expr(&d->expr)) { + return false; + } + if (d->flags & DECL_FLAG_ANNOTATES_TYPE) { if (!type_must_eq(d->expr.where, &d->type, &d->expr.type)) return false; + } else { + d->type = d->expr.type; } } d->flags |= DECL_FLAG_FOUND_TYPE; |