diff options
author | Leo Tenenbaum <pommicket@gmail.com> | 2019-08-20 22:41:31 -0400 |
---|---|---|
committer | Leo Tenenbaum <pommicket@gmail.com> | 2019-08-20 22:41:31 -0400 |
commit | f9719e6ae07d1b857db2c54545c38c20a9d1c14c (patch) | |
tree | dd92fc98c87d4f273488f24c3eae0136d5cc8b9b | |
parent | b05ff83a9ddaafc951dc8d4926de179c949a101d (diff) |
started function calls
still some bugs
-rwxr-xr-x | build.sh | 2 | ||||
-rw-r--r-- | parse.c | 295 | ||||
-rw-r--r-- | test.toc | 13 | ||||
-rw-r--r-- | tokenizer.c | 6 |
4 files changed, 211 insertions, 105 deletions
@@ -1,2 +1,2 @@ #!/bin/bash -gcc -o toc main.c -g -o toc -Wall -Wextra -Wpedantic -Wconversion -std=c11 || exit 1 +gcc -o toc main.c -O0 -g -o toc -Wall -Wextra -Wpedantic -Wconversion -Wshadow -std=c11 || exit 1 @@ -46,10 +46,12 @@ typedef struct { typedef enum { EXPR_INT_LITERAL, EXPR_FLOAT_LITERAL, + EXPR_STR_LITERAL, EXPR_IDENT, /* variable or constant */ EXPR_BINARY_OP, EXPR_UNARY_OP, - EXPR_FN + EXPR_FN, + EXPR_CALL } ExprKind; typedef enum { @@ -71,6 +73,7 @@ typedef struct Expression { union { FloatLiteral floatl; IntLiteral intl; + StrLiteral strl; struct { UnaryOp op; struct Expression *of; @@ -80,6 +83,10 @@ typedef struct Expression { struct Expression *lhs; struct Expression *rhs; } binary; + struct { + struct Expression *fn; + Array args; /* of expression */ + } call; Identifier ident; FnExpr fn; }; @@ -99,7 +106,8 @@ typedef struct { /* OPTIM: Instead of using dynamic arrays, do two passes. */ typedef enum { - STMT_DECL + STMT_DECL, + STMT_EXPR } StatementKind; typedef struct { @@ -107,6 +115,7 @@ typedef struct { StatementKind kind; union { Declaration decl; + Expression expr; }; } Statement; @@ -287,13 +296,97 @@ static int op_precedence(Keyword op) { } } -static bool expr_parse(Expression *e, Parser *p, Token *end) { + +/* + ends_with = which keyword does this expression end with? + if it's KW_RPAREN, this will match parentheses properly. +*/ +typedef enum { + EXPR_END_RPAREN_OR_COMMA, + EXPR_END_SEMICOLON +} ExprEndKind; +static Token *expr_find_end(Parser *p, ExprEndKind ends_with) { + Tokenizer *t = p->tokr; + int bracket_level = 0; + int brace_level = 0; + Token *token = t->token; + while (1) { + switch (ends_with) { + case EXPR_END_RPAREN_OR_COMMA: + if (token->kind == TOKEN_KW) { + switch (token->kw) { + case KW_COMMA: + if (bracket_level == 0) + return token; + break; + case KW_LPAREN: + bracket_level++; + break; + case KW_RPAREN: + bracket_level--; + if (bracket_level < 0) + return token; + break; + default: break; + } + } + break; + case EXPR_END_SEMICOLON: + if (token->kind == TOKEN_KW) { + switch (token->kw) { + case KW_SEMICOLON: + /* ignore semicolons inside braces {} */ + if (brace_level == 0) + return token; + break; + case KW_LBRACE: + brace_level++; + break; + case KW_RBRACE: + brace_level--; + if (brace_level < 0) { + t->token = token; + tokr_err(t, "Closing '}' without matching opening '{'."); + return NULL; + } + break; + default: break; + } + } + break; + } + if (token->kind == TOKEN_EOF) { + switch (ends_with) { + case EXPR_END_SEMICOLON: + if (brace_level > 0) { + tokr_err(t, "Opening brace was never closed."); /* FEATURE: Find out where this is */ + return NULL; + } else { + tokr_err(t, "Could not find ';' at end of expression."); + return NULL; + } + case EXPR_END_RPAREN_OR_COMMA: + if (bracket_level > 0) { + tokr_err(t, "Opening parenthesis was never closed."); /* FEATURE: Find out where this is */ + return NULL; + } else { + tokr_err(t, "Could not find ')' or ',' at end of expression."); + return NULL; + } + } + } + token++; + } +} + +static bool expr_parse(Expression *e, Parser *p, Token *end) { Tokenizer *t = p->tokr; if (end == NULL) return false; e->flags = 0; e->where = t->token->where; if (end <= t->token) { tokr_err(t, "Empty expression."); + t->token = end + 1; return false; } if (end - t->token == 1) { @@ -321,8 +414,13 @@ static bool expr_parse(Expression *e, Parser *p, Token *end) { e->kind = EXPR_IDENT; e->ident = t->token->ident; break; + case TOKEN_STR_LITERAL: + e->kind = EXPR_STR_LITERAL; + e->strl = t->token->str; + break; default: tokr_err(t, "Unrecognized expression."); + t->token = end + 1; return false; } t->token = end; @@ -336,8 +434,9 @@ static bool expr_parse(Expression *e, Parser *p, Token *end) { return false; } if (t->token != end) { - tokr_err(t, "Direct function calling in an expression is not supported yet."); + tokr_err(t, "Direct function calling in an expression is not supported yet.\nYou can wrap the function in parentheses."); /* TODO */ + t->token = end + 1; return false; } return true; @@ -378,26 +477,88 @@ static bool expr_parse(Expression *e, Parser *p, Token *end) { } } } + + /* TODO: These errors are bad for functions, since they can be very long, + and this will only point to the end. + */ if (paren_level > 0) { tokr_err(t, "Too many opening parentheses."); + t->token = end + 1; return false; } if (paren_level < 0) { tokr_err(t, "Too many closing parentheses."); + t->token = end + 1; return false; } if (entirely_within_parentheses) { t->token++; /* move past opening ( */ Token *new_end = end - 1; /* parse to ending ) */ - if (!expr_parse(e, p, new_end)) + if (!expr_parse(e, p, new_end)) { + t->token = end + 1; return false; + } t->token++; /* move past closing ) */ return true; } if (lowest_precedence == NOT_AN_OP) { /* function calls, array accesses, etc. */ + /* try a function call */ + Token *token = t->token; + /* + can't call at start, e.g. in (fn() {})(), it is not the empty function "" + being called with fn() {} as an argument + */ + if (token_is_kw(t->token, KW_LPAREN)) { + paren_level++; + token++; + } + for (; token < end; token++) { + if (token->kind == TOKEN_KW) { + if (token->kw == KW_LPAREN) { + if (paren_level == 0) + break; /* this left parenthesis opens the function call */ + paren_level++; + } + if (token->kw == KW_RPAREN) { + paren_level--; + } + } + } + if (token != t->token && token != end) { + /* it's a function call! */ + e->kind = EXPR_CALL; + e->call.fn = parser_new_expr(p); + if (!expr_parse(e->call.fn, p, token)) { /* parse up to ( as function */ + t->token = end + 1; + return false; + } + arr_create(&e->call.args, sizeof(Expression)); + t->token = token + 1; /* move past ( */ + if (!token_is_kw(t->token, KW_RPAREN)) { + /* non-empty arg list */ + while (1) { + if (t->token->kind == TOKEN_EOF) { + tokr_err(t, "Expected argument list to continue."); + t->token = end + 1; + return false; + } + Expression *arg = arr_add(&e->call.args); + if (!expr_parse(arg, p, expr_find_end(p, EXPR_END_RPAREN_OR_COMMA))) { + t->token = end + 1; + return false; + } + if (token_is_kw(t->token, KW_RPAREN)) + break; + } + } + t->token++; /* move past ) */ + return true; + } + /* array accesses, etc. */ tokr_err(t, "Not implemented yet."); + t->token = end + 1; return false; } @@ -428,6 +589,7 @@ static bool expr_parse(Expression *e, Parser *p, Token *end) { } if (!is_unary) { tokr_err(t, "%s is not a unary operator.", keywords[lowest_precedence_op->kw]); + t->token = end + 1; return false; } e->unary.op = op; @@ -454,100 +616,22 @@ static bool expr_parse(Expression *e, Parser *p, Token *end) { Expression *lhs = parser_new_expr(p); e->binary.lhs = lhs; - if (!expr_parse(lhs, p, lowest_precedence_op)) + if (!expr_parse(lhs, p, lowest_precedence_op)) { + t->token = end + 1; return false; + } Expression *rhs = parser_new_expr(p); t->token = lowest_precedence_op + 1; e->binary.rhs = rhs; - if (!expr_parse(rhs, p, end)) + if (!expr_parse(rhs, p, end)) { + t->token = end + 1; return false; + } return true; } -/* - ends_with = which keyword does this expression end with? - if it's KW_RPAREN, this will match parentheses properly. -*/ -typedef enum { - EXPR_END_RPAREN_OR_COMMA, - EXPR_END_SEMICOLON -} ExprEndKind; -static Token *expr_find_end(Parser *p, ExprEndKind ends_with) { - Tokenizer *t = p->tokr; - int bracket_level = 0; - int brace_level = 0; - Token *token = t->token; - while (1) { - switch (ends_with) { - case EXPR_END_RPAREN_OR_COMMA: - if (token->kind == TOKEN_KW) { - switch (token->kw) { - case KW_COMMA: - if (bracket_level == 0) - return token; - break; - case KW_LPAREN: - bracket_level++; - break; - case KW_RPAREN: - bracket_level--; - if (bracket_level == 0) - return token; - break; - default: break; - } - } - break; - case EXPR_END_SEMICOLON: - if (token->kind == TOKEN_KW) { - switch (token->kw) { - case KW_SEMICOLON: - /* ignore semicolons inside braces {} */ - if (brace_level == 0) - return token; - break; - case KW_LBRACE: - brace_level++; - break; - case KW_RBRACE: - brace_level--; - if (brace_level < 0) { - t->token = token; - tokr_err(t, "Closing '}' without matching opening '{'."); - return NULL; - } - break; - default: break; - } - } - break; - } - if (token->kind == TOKEN_EOF) { - switch (ends_with) { - case EXPR_END_SEMICOLON: - if (brace_level > 0) { - tokr_err(t, "Opening brace was never closed."); /* FEATURE: Find out where this is */ - return NULL; - } else { - tokr_err(t, "Could not find ';' at end of expression."); - return NULL; - } - case EXPR_END_RPAREN_OR_COMMA: - if (bracket_level > 0) { - tokr_err(t, "Opening parenthesis was never closed."); /* FEATURE: Find out where this is */ - return NULL; - } else { - tokr_err(t, "Could not find ')' or ',' at end of expression."); - return NULL; - } - } - } - token++; - } -} - static bool decl_parse(Declaration *d, Parser *p) { Tokenizer *t = p->tokr; /* OPTIM: Maybe don't use a dynamic array or use parser allocator. */ @@ -608,12 +692,14 @@ static bool decl_parse(Declaration *d, Parser *p) { t->token++; return true; } - tokr_err(t, "Expected ';'"); /* should never happen in theory right now */ + tokr_err(t, "Expected ';' at end of expression"); /* should never happen in theory right now */ return false; } static bool stmt_parse(Statement *s, Parser *p) { Tokenizer *t = p->tokr; + if (t->token->kind == TOKEN_EOF) + tokr_err(t, "Expected statement."); s->where = t->token->where; /* NOTE: This may cause problems in the future! Other statements might have comma @@ -623,8 +709,15 @@ static bool stmt_parse(Statement *s, Parser *p) { s->kind = STMT_DECL; return decl_parse(&s->decl, p); } else { - tokr_err(t, "Unreocgnized statement."); - return false; + s->kind = STMT_EXPR; + if (!expr_parse(&s->expr, p, expr_find_end(p, EXPR_END_SEMICOLON))) + return false; + if (!token_is_kw(t->token, KW_SEMICOLON)) { + tokr_err(t, "Expected ';' at end of statement."); + return false; + } + t->token++; /* move past ; */ + return true; } } @@ -699,6 +792,9 @@ static void expr_fprint(FILE *out, Expression *e) { case EXPR_FLOAT_LITERAL: fprintf(out, "%f", (double)e->floatl); break; + case EXPR_STR_LITERAL: + fprintf(out, "\"%s\"", e->strl.str); + break; case EXPR_IDENT: ident_fprint(out, e->ident); break; @@ -730,6 +826,15 @@ static void expr_fprint(FILE *out, Expression *e) { case EXPR_FN: fn_expr_fprint(out, &e->fn); break; + case EXPR_CALL: + expr_fprint(out, e->call.fn); + fprintf(out, "("); + arr_foreach(&e->call.args, Expression, arg) { + if (arg != e->call.args.data) fprintf(out, ", "); + expr_fprint(out, arg); + } + fprintf(out, ")"); + break; } } @@ -760,6 +865,10 @@ static void stmt_fprint(FILE *out, Statement *s) { decl_fprint(out, &s->decl); fprintf(out, ";\n"); break; + case STMT_EXPR: + expr_fprint(out, &s->expr); + fprintf(out, ";\n"); + break; } } @@ -769,4 +878,4 @@ static void parsed_file_fprint(FILE *out, ParsedFile *f) { } } -/* TODO: Freeing parser */ +/* TODO: Freeing parser (remember to free args) */ @@ -1,8 +1,5 @@ -main :- fn () { - bar := fn (x: int, y: int, z: float) float { - - foo :- fn () { - x:=7+y; - }; - }; -};
\ No newline at end of file +main :- fn(){ + (fn(x: int, y: float) int { + printf("Hello!\n"); + })(3, 7.3); +}; diff --git a/tokenizer.c b/tokenizer.c index d905bc8..2f022e0 100644 --- a/tokenizer.c +++ b/tokenizer.c @@ -455,8 +455,8 @@ static bool tokenize_string(Tokenizer *tokr, char *str) { len++; tokr_nextchar(&t); } - char *str = malloc(len + 1); - char *strptr = str; + char *strlit = malloc(len + 1); + char *strptr = strlit; tokr_get_location(&t, token); tokr_nextchar(&t); /* past opening " */ while (*t.s != '"') { @@ -477,7 +477,7 @@ static bool tokenize_string(Tokenizer *tokr, char *str) { *strptr = 0; token->kind = TOKEN_STR_LITERAL; token->str.len = len; - token->str.str = str; + token->str.str = strlit; tokr_nextchar(&t); /* move past closing " */ continue; } |