From 7c790561cca382660081dfae3a6fe50fc31ad2a5 Mon Sep 17 00:00:00 2001 From: Leo Tenenbaum Date: Thu, 29 Aug 2019 15:50:19 -0400 Subject: Added fixed-size arrays --- base_cgen.c | 7 ++ cgen.c | 12 ++- eval.c | 25 +++++ out.c | 12 +-- parse.c | 328 +++++++++++++++++++++++++++++++++++------------------------- test.toc | 12 +-- toc.c | 1 + tokenizer.c | 7 +- types.c | 75 +++++++++++--- 9 files changed, 298 insertions(+), 181 deletions(-) create mode 100644 eval.c diff --git a/base_cgen.c b/base_cgen.c index 0e1f7f9..537f1c5 100644 --- a/base_cgen.c +++ b/base_cgen.c @@ -149,6 +149,9 @@ static bool cgen_type_pre(CGenerator *g, Type *t) { if (!cgen_type_pre(g, ret_type)) return false; cgen_write(g, "(*"); } break; + case TYPE_ARR: + cgen_type_pre(g, t->arr.of); + break; } return true; } @@ -181,6 +184,10 @@ static bool cgen_type_post(CGenerator *g, Type *t) { cgen_write_space(g); if (!cgen_type_post(g, ret_type)) return false; } break; + case TYPE_ARR: + cgen_write(g, "[%lu]", t->arr.n); + cgen_type_post(g, t->arr.of); + break; } return true; } diff --git a/cgen.c b/cgen.c index 9337ad7..2509611 100644 --- a/cgen.c +++ b/cgen.c @@ -70,11 +70,13 @@ static bool cgen_decl(CGenerator *g, Declaration *d) { cgen_type_pre(g, &d->type); cgen_ident(g, *ident, NULL); cgen_type_post(g, &d->type); - cgen_write_space(g); - cgen_write(g, "="); - cgen_write_space(g); - if (!cgen_expr(g, &d->expr)) { - return false; + if (d->flags & DECL_FLAG_HAS_EXPR) { + cgen_write_space(g); + cgen_write(g, "="); + cgen_write_space(g); + if (!cgen_expr(g, &d->expr)) { + return false; + } } cgen_write(g, "; "); } diff --git a/eval.c b/eval.c new file mode 100644 index 0000000..8cb3303 --- /dev/null +++ b/eval.c @@ -0,0 +1,25 @@ +static bool eval_expr_as_float(Expression *e, FloatLiteral *f) { + switch (e->kind) { + case EXPR_FLOAT_LITERAL: + *f = e->floatl; + return true; + case EXPR_INT_LITERAL: + *f = (FloatLiteral)e->intl; + return true; + } + err_print(e->where, "Not implemented yet"); + return false; +} + +static bool eval_expr_as_int(Expression *e, IntLiteral *i) { + switch (e->kind) { + case EXPR_FLOAT_LITERAL: + err_print(e->where, "Expected integer, but found floating-point literal."); + return false; + case EXPR_INT_LITERAL: + *i = e->intl; + return true; + } + err_print(e->where, "Not implemented yet"); + return false; +} diff --git a/out.c b/out.c index 99a3082..9b84632 100644 --- a/out.c +++ b/out.c @@ -1,18 +1,8 @@ #include "out.h" /* toc */ -static void a___(void); -static void a___1(void); void main__(void) { - void (*foo)(void) = a___; - void (*bar)(void) = a___1; -} -static void a___(void) { - a___1(); - float x = ((-3)-(-3.200000)); -} -static void a___1(void) { - a___(); + void (*(*foo[3])(void) )(void (*[3])(void) ) ; } int main(void) { diff --git a/parse.c b/parse.c index 058c9aa..694c9b2 100644 --- a/parse.c +++ b/parse.c @@ -2,7 +2,8 @@ typedef enum { TYPE_VOID, TYPE_BUILTIN, - TYPE_FN + TYPE_FN, + TYPE_ARR /* e.g. [5]int */ } TypeKind; typedef enum { @@ -20,7 +21,7 @@ typedef enum { } BuiltinType; #define TYPE_FLAG_FLEXIBLE 0x01 - +#define TYPE_FLAG_RESOLVED 0x02 typedef struct Type { Location where; @@ -31,6 +32,13 @@ typedef struct Type { struct { Array types; /* [0] = ret_type, [1..] = param_types */ } fn; + struct { + struct Type *of; + union { + IntLiteral n; /* this is NOT set by parse_type; it will be handled by types.c */ + struct Expression *n_expr; + }; + } arr; }; } Type; @@ -181,6 +189,119 @@ static Keyword builtin_type_to_kw(BuiltinType t) { return KW_COUNT; } + + +#define NOT_AN_OP -1 +static int op_precedence(Keyword op) { + switch (op) { + case KW_PLUS: + return 10; + case KW_MINUS: + return 20; + default: + return NOT_AN_OP; + } +} + + +/* + ends_with = which keyword does this expression end with? + if it's KW_RPAREN, this will match parentheses properly. +*/ +typedef enum { + EXPR_END_RPAREN_OR_COMMA, + EXPR_END_RSQUARE, + EXPR_END_SEMICOLON +} ExprEndKind; +static Token *expr_find_end(Parser *p, ExprEndKind ends_with) { + Tokenizer *t = p->tokr; + int bracket_level = 0; /* if ends_with = EXPR_END_RSQUARE, used for square brackets, + if ends_with = EXPR_END_RPAREN_OR_COMMA, used for parens */ + int brace_level = 0; + Token *token = t->token; + while (1) { + switch (ends_with) { + case EXPR_END_RPAREN_OR_COMMA: + if (token->kind == TOKEN_KW) { + switch (token->kw) { + case KW_COMMA: + if (bracket_level == 0) + return token; + break; + case KW_LPAREN: + bracket_level++; + break; + case KW_RPAREN: + bracket_level--; + if (bracket_level < 0) + return token; + break; + default: break; + } + } + break; + case EXPR_END_RSQUARE: + if (token->kind == TOKEN_KW) { + switch (token->kw) { + case KW_LSQUARE: + bracket_level++; + break; + case KW_RSQUARE: + bracket_level--; + if (bracket_level < 0) + return token; + break; + default: break; + } + } + break; + case EXPR_END_SEMICOLON: + if (token->kind == TOKEN_KW) { + switch (token->kw) { + case KW_SEMICOLON: + /* ignore semicolons inside braces {} */ + if (brace_level == 0) + return token; + break; + case KW_LBRACE: + brace_level++; + break; + case KW_RBRACE: + brace_level--; + if (brace_level < 0) { + t->token = token; + tokr_err(t, "Closing '}' without matching opening '{'."); + return NULL; + } + break; + default: break; + } + } + break; + } + if (token->kind == TOKEN_EOF) { + switch (ends_with) { + case EXPR_END_SEMICOLON: + if (brace_level > 0) { + tokr_err(t, "Opening brace was never closed."); /* FEATURE: Find out where this is */ + return NULL; + } else { + tokr_err(t, "Could not find ';' at end of expression."); + return NULL; + } + case EXPR_END_RPAREN_OR_COMMA: + tokr_err(t, "Opening ( was never closed."); + return NULL; + case EXPR_END_RSQUARE: + tokr_err(t, "Opening [ was never closed."); + return NULL; + } + } + token++; + } +} + +static bool parse_expr(Parser *p, Expression *e, Token *end); static bool parse_type(Parser *p, Type *type) { Tokenizer *t = p->tokr; type->where = t->token->where; @@ -189,51 +310,67 @@ static bool parse_type(Parser *p, Type *type) { case TOKEN_KW: type->kind = TYPE_BUILTIN; type->builtin = kw_to_builtin_type(t->token->kw); - if (type->builtin == BUILTIN_TYPE_COUNT) { - /* Not a builtin */ - if (t->token->kw == KW_FN) { - /* function type */ - type->kind = TYPE_FN; - arr_create(&type->fn.types, sizeof(Type)); - t->token++; - if (!token_is_kw(t->token, KW_LPAREN)) { - tokr_err(t, "Expected ( for function type."); - return false; - } - arr_add(&type->fn.types); /* add return type */ - t->token++; - if (!token_is_kw(t->token, KW_RPAREN)) { - while (1) { - Type *param_type = arr_add(&type->fn.types); - if (!parse_type(p, param_type)) return false; - if (token_is_kw(t->token, KW_RPAREN)) - break; - if (!token_is_kw(t->token, KW_COMMA)) { - tokr_err(t, "Expected , to continue function type parameter list."); - return false; - } - t->token++; /* move past , */ - } - } - t->token++; /* move past ) */ - Type *ret_type = type->fn.types.data; - /* if there's a symbol, that can't be the start of a type */ - if (t->token->kind == TOKEN_KW - && t->token->kw <= KW_LAST_SYMBOL) { - ret_type->kind = TYPE_VOID; - ret_type->flags = 0; - } else { - if (!parse_type(p, ret_type)) + if (type->builtin != BUILTIN_TYPE_COUNT) { + t->token++; + return true; + } + /* Not a builtin */ + switch (t->token->kw) { + case KW_FN: { + /* function type */ + type->kind = TYPE_FN; + arr_create(&type->fn.types, sizeof(Type)); + t->token++; + if (!token_is_kw(t->token, KW_LPAREN)) { + tokr_err(t, "Expected ( for function type."); + return false; + } + arr_add(&type->fn.types); /* add return type */ + t->token++; + if (!token_is_kw(t->token, KW_RPAREN)) { + while (1) { + Type *param_type = arr_add(&type->fn.types); + if (!parse_type(p, param_type)) return false; + if (token_is_kw(t->token, KW_RPAREN)) + break; + if (!token_is_kw(t->token, KW_COMMA)) { + tokr_err(t, "Expected , to continue function type parameter list."); return false; + } + t->token++; /* move past , */ } - return true; } - - break; - } else { - t->token++; + t->token++; /* move past ) */ + Type *ret_type = type->fn.types.data; + /* if there's a symbol that isn't [, that can't be the start of a type */ + if (t->token->kind == TOKEN_KW + && t->token->kw <= KW_LAST_SYMBOL + && t->token->kw != KW_LSQUARE) { + ret_type->kind = TYPE_VOID; + ret_type->flags = 0; + } else { + if (!parse_type(p, ret_type)) + return false; + } return true; } + case KW_LSQUARE: { + /* array type */ + Token *start = t->token; + type->kind = TYPE_ARR; + t->token++; /* move past [ */ + Token *end = expr_find_end(p, EXPR_END_RSQUARE); + type->arr.n_expr = parser_new_expr(p); + if (!parse_expr(p, type->arr.n_expr, end)) return false; + t->token = end + 1; /* go past ] */ + type->arr.of = err_malloc(sizeof *type->arr.of); /* OPTIM */ + if (!parse_type(p, type->arr.of)) return false; + type->flags = 0; + type->where = start->where; + return true; + } + default: break; + } break; default: break; } @@ -335,101 +472,6 @@ static bool parse_fn_expr(Parser *p, FnExpr *f) { return parse_block(p, &f->body); } -#define NOT_AN_OP -1 -static int op_precedence(Keyword op) { - switch (op) { - case KW_PLUS: - return 10; - case KW_MINUS: - return 20; - default: - return NOT_AN_OP; - } -} - - -/* - ends_with = which keyword does this expression end with? - if it's KW_RPAREN, this will match parentheses properly. -*/ -typedef enum { - EXPR_END_RPAREN_OR_COMMA, - EXPR_END_SEMICOLON -} ExprEndKind; -static Token *expr_find_end(Parser *p, ExprEndKind ends_with) { - Tokenizer *t = p->tokr; - int bracket_level = 0; - int brace_level = 0; - Token *token = t->token; - while (1) { - switch (ends_with) { - case EXPR_END_RPAREN_OR_COMMA: - if (token->kind == TOKEN_KW) { - switch (token->kw) { - case KW_COMMA: - if (bracket_level == 0) - return token; - break; - case KW_LPAREN: - bracket_level++; - break; - case KW_RPAREN: - bracket_level--; - if (bracket_level < 0) - return token; - break; - default: break; - } - } - break; - case EXPR_END_SEMICOLON: - if (token->kind == TOKEN_KW) { - switch (token->kw) { - case KW_SEMICOLON: - /* ignore semicolons inside braces {} */ - if (brace_level == 0) - return token; - break; - case KW_LBRACE: - brace_level++; - break; - case KW_RBRACE: - brace_level--; - if (brace_level < 0) { - t->token = token; - tokr_err(t, "Closing '}' without matching opening '{'."); - return NULL; - } - break; - default: break; - } - } - break; - } - if (token->kind == TOKEN_EOF) { - switch (ends_with) { - case EXPR_END_SEMICOLON: - if (brace_level > 0) { - tokr_err(t, "Opening brace was never closed."); /* FEATURE: Find out where this is */ - return NULL; - } else { - tokr_err(t, "Could not find ';' at end of expression."); - return NULL; - } - case EXPR_END_RPAREN_OR_COMMA: - if (bracket_level > 0) { - tokr_err(t, "Opening parenthesis was never closed."); /* FEATURE: Find out where this is */ - return NULL; - } else { - tokr_err(t, "Could not find ')' or ',' at end of expression."); - return NULL; - } - } - } - token++; - } -} - static bool parse_expr(Parser *p, Expression *e, Token *end) { Tokenizer *t = p->tokr; if (end == NULL) return false; @@ -840,7 +882,7 @@ static bool parse_file(Parser *p, ParsedFile *f) { #define PARSE_PRINT_LOCATION(l) //fprintf(out, "[l%lu]", (unsigned long)(l).line); - +static void fprint_expr(FILE *out, Expression *e); static void fprint_type(FILE *out, Type *t) { PARSE_PRINT_LOCATION(t->where); switch (t->kind) { @@ -860,6 +902,16 @@ static void fprint_type(FILE *out, Type *t) { fprintf(out, ") "); fprint_type(out, &types[0]); } break; + case TYPE_ARR: + fprintf(out, "["); + if (t->flags & TYPE_FLAG_RESOLVED) { + fprintf(out, INT_LITERAL_FMT, t->arr.n); + } else { + fprint_expr(out, t->arr.n_expr); + } + fprintf(out, "]"); + fprint_type(out, t->arr.of); + break; } } diff --git a/test.toc b/test.toc index dcb5a5f..5db00b7 100644 --- a/test.toc +++ b/test.toc @@ -1,11 +1,5 @@ main @= fn() { - - foo := fn() { - bar(); - x := -3--3.2; - }; - - bar := fn() { - foo(); - }; + foo : [3]fn()fn([42]fn()); + bar : [3]fn([37]fn(int)int); + foo + bar; }; diff --git a/toc.c b/toc.c index 8ef8287..3309563 100644 --- a/toc.c +++ b/toc.c @@ -15,6 +15,7 @@ #include "identifiers.c" #include "tokenizer.c" #include "parse.c" +#include "eval.c" #include "types.c" #include "base_cgen.c" #include "decls_cgen.c" diff --git a/tokenizer.c b/tokenizer.c index 7333db8..369f234 100644 --- a/tokenizer.c +++ b/tokenizer.c @@ -17,6 +17,8 @@ typedef enum { KW_RPAREN, KW_LBRACE, KW_RBRACE, + KW_LSQUARE, + KW_RSQUARE, KW_EQEQ, KW_LT, KW_LE, @@ -39,7 +41,7 @@ typedef enum { } Keyword; static const char *keywords[KW_COUNT] = - {";", "=", ":", "@", ",", "(", ")", "{", "}", "==", "<", "<=", "-", "+", "fn", + {";", "=", ":", "@", ",", "(", ")", "{", "}", "[", "]", "==", "<", "<=", "-", "+", "fn", "int", "i8", "i16", "i32", "i64", "u8", "u16", "u32", "u64", "float", "double"}; /* Returns KW_COUNT if it's not a keyword */ @@ -65,6 +67,7 @@ static Keyword tokenize_kw(char **s) { typedef unsigned long long IntLiteral; typedef long double FloatLiteral; /* OPTIM: Switch to double */ +#define INT_LITERAL_FMT "%llu" typedef enum { NUM_LITERAL_INT, @@ -124,7 +127,7 @@ static void token_fprint(FILE *out, Token *t) { fprintf(out, "number: "); switch (t->num.kind) { case NUM_LITERAL_INT: - fprintf(out, "%llu", t->num.intval); + fprintf(out, INT_LITERAL_FMT, t->num.intval); break; case NUM_LITERAL_FLOAT: fprintf(out, "%g", (double)t->num.floatval); diff --git a/types.c b/types.c index 833ddc5..2bfbb6f 100644 --- a/types.c +++ b/types.c @@ -50,20 +50,20 @@ static bool block_exit(Block *b) { /* returns the number of characters written, not including the null character */ -static size_t type_to_str(Type *a, char *buffer, size_t bufsize) { - switch (a->kind) { +static size_t type_to_str(Type *t, char *buffer, size_t bufsize) { + switch (t->kind) { case TYPE_VOID: return str_copy(buffer, bufsize, "void"); case TYPE_BUILTIN: { - const char *s = keywords[builtin_type_to_kw(a->builtin)]; + const char *s = keywords[builtin_type_to_kw(t->builtin)]; return str_copy(buffer, bufsize, s); } case TYPE_FN: { /* number of chars written */ size_t written = str_copy(buffer, bufsize, "fn ("); - Type *ret_type = a->fn.types.data; + Type *ret_type = t->fn.types.data; Type *param_types = ret_type + 1; - size_t nparams = a->fn.types.len - 1; + size_t nparams = t->fn.types.len - 1; for (size_t i = 0; i < nparams; i++) { if (i > 0) written += str_copy(buffer + written, bufsize - written, ", "); @@ -76,6 +76,18 @@ static size_t type_to_str(Type *a, char *buffer, size_t bufsize) { } return written; } break; + case TYPE_ARR: { + size_t written = str_copy(buffer, bufsize, "["); + if (t->flags & TYPE_FLAG_RESOLVED) { + snprintf(buffer + written, bufsize - written, INT_LITERAL_FMT, t->arr.n); + written += strlen(buffer + written); + } else { + written += str_copy(buffer + written, bufsize - written, "N"); + } + written += str_copy(buffer + written, bufsize - written, "]"); + written += type_to_str(t->arr.of, buffer + written, bufsize - written); + return written; + } break; } assert(0); @@ -138,6 +150,9 @@ static bool type_eq(Type *a, Type *b) { } return true; } + case TYPE_ARR: + if (a->arr.n != b->arr.n) return false; + return type_eq(a->arr.of, b->arr.of); } assert(0); return false; @@ -274,13 +289,14 @@ static bool type_of_expr(Expression *e, Type *t) { if (!match) { char s1[128], s2[128]; type_to_str(lhs_type, s1, sizeof s1); - type_to_str(lhs_type, s2, sizeof s2); + type_to_str(rhs_type, s2, sizeof s2); const char *op; switch (e->binary.op) { case BINARY_PLUS: op = "+"; break; case BINARY_MINUS: op = "-"; break; } err_print(e->where, "Mismatched types to operator %s: %s and %s", op, s1, s2); + return false; } return true; } @@ -290,6 +306,27 @@ static bool type_of_expr(Expression *e, Type *t) { return true; } +/* fixes the type (replaces [5+3]int with [8]int, etc.) */ +static bool type_resolve(Type *t) { + if (t->flags & TYPE_FLAG_RESOLVED) return true; + switch (t->kind) { + case TYPE_ARR: + /* it's an array */ + if (!type_resolve(t->arr.of)) return false; /* resolve inner type */ + if (!eval_expr_as_int(t->arr.n_expr, &t->arr.n)) return false; /* resolve N */ + break; + case TYPE_FN: + arr_foreach(&t->fn.types, Type, child_type) { + if (!type_resolve(child_type)) + return false; + } + break; + default: break; + } + t->flags |= TYPE_FLAG_RESOLVED; + return true; +} + static bool types_stmt(Statement *s); static bool types_block(Block *b) { @@ -304,11 +341,10 @@ static bool types_block(Block *b) { static bool types_expr(Expression *e) { Type *t = &e->type; - type_of_expr(e, t); + if (!type_of_expr(e, t)) return false; switch (e->kind) { - case EXPR_FN: { - types_block(&e->fn.body); - } break; + case EXPR_FN: + return types_block(&e->fn.body); case EXPR_CALL: { bool ret = true; arr_foreach(&e->call.args, Expression, arg) { @@ -323,13 +359,19 @@ static bool types_expr(Expression *e) { static bool types_decl(Declaration *d) { - if (!types_expr(&d->expr)) return false; if (d->flags & DECL_FLAG_FOUND_TYPE) return true; - if (d->flags & DECL_FLAG_INFER_TYPE) { - d->type = d->expr.type; - } else { - if (!type_must_eq(d->expr.where, &d->type, &d->expr.type)) { + if (!(d->flags & DECL_FLAG_INFER_TYPE)) { + /* type supplied */ + if (!type_resolve(&d->type)) return false; + } + if (d->flags & DECL_FLAG_HAS_EXPR) { + if (!types_expr(&d->expr)) return false; + if (d->flags & DECL_FLAG_INFER_TYPE) { + d->type = d->expr.type; + } else { + if (!type_must_eq(d->expr.where, &d->type, &d->expr.type)) + return false; } } d->flags |= DECL_FLAG_FOUND_TYPE; @@ -339,8 +381,9 @@ static bool types_decl(Declaration *d) { static bool types_stmt(Statement *s) { switch (s->kind) { case STMT_EXPR: - if (!types_expr(&s->expr)) + if (!types_expr(&s->expr)) { return false; + } break; case STMT_DECL: if (!types_decl(&s->decl)) -- cgit v1.2.3