summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--#parse.c#1308
1 files changed, 0 insertions, 1308 deletions
diff --git a/#parse.c# b/#parse.c#
deleted file mode 100644
index 079d8e4..0000000
--- a/#parse.c#
+++ /dev/null
@@ -1,1308 +0,0 @@
-/* TODO: stmt_parse -> parse_stmt, etc. */
-typedef enum {
- TYPE_VOID,
- TYPE_BUILTIN,
- TYPE_FN,
- TYPE_TUPLE,
- TYPE_ARR /* e.g. [5]int */
-} TypeKind;
-
-typedef enum {
- BUILTIN_I8,
- BUILTIN_I16,
- BUILTIN_I32,
- BUILTIN_I64,
- BUILTIN_U8,
- BUILTIN_U16,
- BUILTIN_U32,
- BUILTIN_U64,
- BUILTIN_FLOAT,
- BUILTIN_DOUBLE,
- BUILTIN_TYPE_COUNT
-} BuiltinType;
-
-#define TYPE_FLAG_FLEXIBLE 0x01
-#define TYPE_FLAG_RESOLVED 0x02
-
-typedef struct Type {
- Location where;
- TypeKind kind;
- unsigned short flags;
- union {
- BuiltinType builtin;
- struct {
- Array types; /* [0] = ret_type, [1..] = param_types */
- } fn;
- Array tuple;
- struct {
- struct Type *of;
- union {
- UInteger n; /* this is NOT set by parse_type; it will be handled by types.c */
- struct Expression *n_expr;
- };
- } arr;
- };
-} Type;
-
-typedef struct {
- Identifier name;
- Type type;
-} Param;
-
-typedef struct Block {
- Array stmts;
-} Block;
-
-typedef struct {
- Array params;
- Type ret_type;
- Block body;
- Identifier name; /* NULL if the function is anonymous (set to NULL by parse.c, set to actual value by types_cgen.c) */
- unsigned long id; /* this is used to keep track of local vs global/other local functions (there might be multiple functions called "foo") */
-} FnExpr; /* an expression such as fn(x: int) int {return 2 * x;} */
-
-typedef enum {
- EXPR_INT_LITERAL,
- EXPR_FLOAT_LITERAL,
- EXPR_STR_LITERAL,
- EXPR_IDENT, /* variable or constant */
- EXPR_BINARY_OP,
- EXPR_UNARY_OP,
- EXPR_FN,
- EXPR_CALL
-} ExprKind;
-
-typedef enum {
- UNARY_MINUS
-} UnaryOp;
-
-typedef enum {
- BINARY_SET, /* e.g. x = y */
- BINARY_PLUS,
- BINARY_MINUS,
- BINARY_COMMA,
- BINARY_AT_INDEX /* e.g. x[i] */
-} BinaryOp;
-
-#define EXPR_FLAG_FLEXIBLE 0x01 /* e.g. 4 => float/i32/etc. */
-
-typedef struct Expression {
- Location where;
- ExprKind kind;
- Type type;
- union {
- Floating floatl;
- UInteger intl;
- StrLiteral strl;
- struct {
- UnaryOp op;
- struct Expression *of;
- } unary;
- struct {
- BinaryOp op;
- struct Expression *lhs;
- struct Expression *rhs;
- } binary;
- struct {
- struct Expression *fn;
- Array args; /* of expression */
- } call;
- Identifier ident;
- FnExpr fn;
- };
-} Expression;
-
-#define DECL_FLAG_ANNOTATES_TYPE 0x01
-#define DECL_FLAG_CONST 0x02
-#define DECL_FLAG_HAS_EXPR 0x04
-#define DECL_FLAG_FOUND_TYPE 0x08
-
-/* OPTIM: Instead of using dynamic arrays, do two passes. */
-typedef struct Declaration {
- Location where;
- Array idents;
- Type type;
- unsigned short flags;
- Expression expr;
-} Declaration;
-
-typedef enum {
- STMT_DECL,
- STMT_EXPR
-} StatementKind;
-
-typedef struct {
- Location where;
- StatementKind kind;
- union {
- Declaration decl;
- Expression expr;
- };
-} Statement;
-
-typedef struct {
- Array stmts;
-} ParsedFile;
-
-typedef struct {
- Tokenizer *tokr;
- BlockArr exprs; /* a dynamic array of expressions, so that we don't need to call malloc every time we make an expression */
- Block *block; /* which block are we in? NULL = file scope */
-} Parser;
-
-static const char *binary_op_to_str(BinaryOp b) {
- switch (b) {
- case BINARY_PLUS: return "+";
- case BINARY_MINUS: return "-";
- case BINARY_SET: return "=";
- case BINARY_COMMA: return ",";
- case BINARY_AT_INDEX: return "[]";
- }
- assert(0);
- return "";
-}
-
-static bool type_builtin_is_integer(BuiltinType b) {
- switch (b) {
- case BUILTIN_I8:
- case BUILTIN_I16:
- case BUILTIN_I32:
- case BUILTIN_I64:
- case BUILTIN_U8:
- case BUILTIN_U16:
- case BUILTIN_U32:
- case BUILTIN_U64:
- return true;
- default: return false;
- }
-}
-
-static bool type_builtin_is_floating(BuiltinType b) {
- switch (b) {
- case BUILTIN_FLOAT:
- case BUILTIN_DOUBLE:
- return true;
- default: return false;
- }
-}
-
-static bool type_builtin_is_numerical(BuiltinType b) {
- return type_builtin_is_integer(b) || type_builtin_is_floating(b);
-}
-
-
-/* returns BUILTIN_TYPE_COUNT on failure */
-static BuiltinType kw_to_builtin_type(Keyword kw) {
- switch (kw) {
- case KW_I8: return BUILTIN_I8;
- case KW_I16: return BUILTIN_I16;
- case KW_I32: return BUILTIN_I32;
- case KW_I64: return BUILTIN_I64;
- case KW_INT: return BUILTIN_I64;
- case KW_U8: return BUILTIN_U8;
- case KW_U16: return BUILTIN_U16;
- case KW_U32: return BUILTIN_U32;
- case KW_U64: return BUILTIN_U64;
- case KW_FLOAT: return BUILTIN_FLOAT;
- case KW_DOUBLE: return BUILTIN_DOUBLE;
- default: return BUILTIN_TYPE_COUNT;
- }
-}
-
-static Keyword builtin_type_to_kw(BuiltinType t) {
- switch (t) {
- case BUILTIN_I8: return KW_I8;
- case BUILTIN_I16: return KW_I16;
- case BUILTIN_I32: return KW_I32;
- case BUILTIN_I64: return KW_I64;
- case BUILTIN_U8: return KW_U8;
- case BUILTIN_U16: return KW_U16;
- case BUILTIN_U32: return KW_U32;
- case BUILTIN_U64: return KW_U64;
- case BUILTIN_FLOAT: return KW_FLOAT;
- case BUILTIN_DOUBLE: return KW_DOUBLE;
- case BUILTIN_TYPE_COUNT: break;
- }
- assert(0);
- return KW_COUNT;
-}
-
-/* returns the number of characters written, not including the null character */
-static size_t type_to_str(Type *t, char *buffer, size_t bufsize) {
- switch (t->kind) {
- case TYPE_VOID:
- return str_copy(buffer, bufsize, "void");
- case TYPE_BUILTIN: {
- const char *s = keywords[builtin_type_to_kw(t->builtin)];
- return str_copy(buffer, bufsize, s);
- }
- case TYPE_FN: {
- /* number of chars written */
- size_t written = str_copy(buffer, bufsize, "fn (");
- Type *ret_type = t->fn.types.data;
- Type *param_types = ret_type + 1;
- size_t nparams = t->fn.types.len - 1;
- for (size_t i = 0; i < nparams; i++) {
- if (i > 0)
- written += str_copy(buffer + written, bufsize - written, ", ");
- written += type_to_str(&param_types[i], buffer + written, bufsize - written);
- }
- written += str_copy(buffer + written, bufsize - written, ")");
- if (ret_type->kind != TYPE_VOID) {
- written += str_copy(buffer + written, bufsize - written, " ");
- written += type_to_str(ret_type, buffer + written, bufsize - written);
- }
- return written;
- } break;
- case TYPE_ARR: {
- size_t written = str_copy(buffer, bufsize, "[");
- if (t->flags & TYPE_FLAG_RESOLVED) {
- snprintf(buffer + written, bufsize - written, UINTEGER_FMT, t->arr.n);
- written += strlen(buffer + written);
- } else {
- written += str_copy(buffer + written, bufsize - written, "N");
- }
- written += str_copy(buffer + written, bufsize - written, "]");
- written += type_to_str(t->arr.of, buffer + written, bufsize - written);
- return written;
- } break;
- case TYPE_TUPLE: {
- size_t written = str_copy(buffer, bufsize, "(");
- arr_foreach(&t->tuple, Type, child) {
- if (child != t->tuple.data)
- written += str_copy(buffer + written, bufsize - written, ", ");
- written += type_to_str(child, buffer + written, bufsize - written);
- }
- written += str_copy(buffer + written, bufsize - written, ")");
- return written;
- }
- }
-
- assert(0);
- return 0;
-}
-
-/*
- allocate a new expression.
-*/
-static Expression *parser_new_expr(Parser *p) {
- return block_arr_add(&p->exprs);
-}
-
-#define NOT_AN_OP -1
-static int op_precedence(Keyword op) {
- switch (op) {
- case KW_EQ:
- return 0;
- case KW_COMMA:
- return 5;
- case KW_PLUS:
- return 10;
- case KW_MINUS:
- return 20;
- default:
- return NOT_AN_OP;
- }
-}
-
-
-/*
- ends_with = which keyword does this expression end with?
- if it's KW_RPAREN, this will match parentheses properly.
-*/
-typedef enum {
- EXPR_END_RPAREN_OR_COMMA,
- EXPR_END_RSQUARE,
- EXPR_END_SEMICOLON
-} ExprEndKind;
-static Token *expr_find_end(Parser *p, ExprEndKind ends_with) {
- Tokenizer *t = p->tokr;
- int bracket_level = 0; /* if ends_with = EXPR_END_RSQUARE, used for square brackets,
- if ends_with = EXPR_END_RPAREN_OR_COMMA, used for parens */
- int brace_level = 0;
- Token *token = t->token;
- while (1) {
- switch (ends_with) {
- case EXPR_END_RPAREN_OR_COMMA:
- if (token->kind == TOKEN_KW) {
- switch (token->kw) {
- case KW_COMMA:
- if (bracket_level == 0)
- return token;
- break;
- case KW_LPAREN:
- bracket_level++;
- break;
- case KW_RPAREN:
- bracket_level--;
- if (bracket_level < 0)
- return token;
- break;
- default: break;
- }
- }
- break;
- case EXPR_END_RSQUARE:
- if (token->kind == TOKEN_KW) {
- switch (token->kw) {
- case KW_LSQUARE:
- bracket_level++;
- break;
- case KW_RSQUARE:
- bracket_level--;
- if (bracket_level < 0)
- return token;
- break;
- default: break;
- }
- }
- break;
- case EXPR_END_SEMICOLON:
- if (token->kind == TOKEN_KW) {
- switch (token->kw) {
- case KW_SEMICOLON:
- /* ignore semicolons inside braces {} */
- if (brace_level == 0)
- return token;
- break;
- case KW_LBRACE:
- brace_level++;
- break;
- case KW_RBRACE:
- brace_level--;
- if (brace_level < 0) {
- t->token = token;
- tokr_err(t, "Closing '}' without matching opening '{'.");
- return NULL;
- }
- break;
- default: break;
- }
- }
- break;
- }
- if (token->kind == TOKEN_EOF) {
- switch (ends_with) {
- case EXPR_END_SEMICOLON:
- if (brace_level > 0) {
- tokr_err(t, "Opening brace was never closed."); /* FEATURE: Find out where this is */
- return NULL;
- } else {
- tokr_err(t, "Could not find ';' at end of expression.");
- return NULL;
- }
- case EXPR_END_RPAREN_OR_COMMA:
- tokr_err(t, "Opening ( was never closed.");
- return NULL;
- case EXPR_END_RSQUARE:
- tokr_err(t, "Opening [ was never closed.");
- return NULL;
- }
- }
- token++;
- }
-}
-
-static bool parse_expr(Parser *p, Expression *e, Token *end);
-static bool parse_type(Parser *p, Type *type) {
- Tokenizer *t = p->tokr;
- type->where = t->token->where;
- type->flags = 0;
- switch (t->token->kind) {
- case TOKEN_KW:
- type->kind = TYPE_BUILTIN;
- type->builtin = kw_to_builtin_type(t->token->kw);
- if (type->builtin != BUILTIN_TYPE_COUNT) {
- t->token++;
- break;
- }
- /* Not a builtin */
- switch (t->token->kw) {
- case KW_FN: {
- /* function type */
- type->kind = TYPE_FN;
- arr_create(&type->fn.types, sizeof(Type));
- t->token++;
- if (!token_is_kw(t->token, KW_LPAREN)) {
- tokr_err(t, "Expected ( for function type.");
- return false;
- }
- arr_add(&type->fn.types); /* add return type */
- t->token++;
- if (!token_is_kw(t->token, KW_RPAREN)) {
- while (1) {
- Type *param_type = arr_add(&type->fn.types);
- if (!parse_type(p, param_type)) return false;
- if (token_is_kw(t->token, KW_RPAREN))
- break;
- if (!token_is_kw(t->token, KW_COMMA)) {
- tokr_err(t, "Expected , to continue function type parameter list.");
- return false;
- }
- t->token++; /* move past , */
- }
- }
- t->token++; /* move past ) */
- Type *ret_type = type->fn.types.data;
- /* if there's a symbol that isn't [ or (, that can't be the start of a type */
- if (t->token->kind == TOKEN_KW
- && t->token->kw <= KW_LAST_SYMBOL
- && t->token->kw != KW_LSQUARE
- && t->token->kw != KW_LPAREN) {
- ret_type->kind = TYPE_VOID;
- ret_type->flags = 0;
- } else {
- if (!parse_type(p, ret_type))
- return false;
- }
- break;
- }
- case KW_LSQUARE: {
- /* array type */
- Token *start = t->token;
- type->kind = TYPE_ARR;
- t->token++; /* move past [ */
- Token *end = expr_find_end(p, EXPR_END_RSQUARE);
- type->arr.n_expr = parser_new_expr(p);
- if (!parse_expr(p, type->arr.n_expr, end)) return false;
- t->token = end + 1; /* go past ] */
- type->arr.of = err_malloc(sizeof *type->arr.of); /* OPTIM */
- if (!parse_type(p, type->arr.of)) return false;
- type->flags = 0;
- type->where = start->where;
- break;
- }
- case KW_LPAREN:
- /* tuple! */
- type->kind = TYPE_TUPLE;
- arr_create(&type->tuple, sizeof(Type));
- t->token++; /* move past ( */
- while (1) {
- Type *child = arr_add(&type->tuple);
- parse_type(p, child);
- if (token_is_kw(t->token, KW_RPAREN)) { /* we're done with the tuple */
- t->token++; /* move past ) */
- break;
- }
- if (token_is_kw(t->token, KW_COMMA)) {
- t->token++; /* move past , */
- continue;
- } else {
- tokr_err(t, "Expected , to list next tuple type or ) to end tuple type.");
- return false;
- }
- }
- break;
- default:
- tokr_err(t, "Unrecognized type.");
- return false;
- }
- break;
- default:
- tokr_err(t, "Unrecognized type.");
- return false;
- }
- return true;
-
-}
-
-static bool parse_param(Parser *parser, Param *p) {
- Tokenizer *t = parser->tokr;
- if (t->token->kind != TOKEN_IDENT) {
- tokr_err(t, "Expected parameter name.");
- return false;
- }
- p->name = t->token->ident;
- t->token++;
- if (!token_is_kw(t->token, KW_COLON)) {
- tokr_err(t, "Expected ':' between parameter name and type.");
- return false;
- }
- t->token++;
- if (!parse_type(parser, &p->type))
- return false;
- return true;
-}
-
-static bool parse_stmt(Parser *p, Statement *s);
-
-static bool parse_block(Parser *p, Block *b) {
- Tokenizer *t = p->tokr;
- Block *prev_block = p->block;
- p->block = b;
- if (!token_is_kw(t->token, KW_LBRACE)) {
- tokr_err(t, "Expected '{' to open block.");
- return false;
- }
- t->token++; /* move past { */
- arr_create(&b->stmts, sizeof(Statement));
- bool ret = true;
- if (!token_is_kw(t->token, KW_RBRACE)) {
- /* non-empty function body */
- while (1) {
- Statement *stmt = arr_add(&b->stmts);
- if (!parse_stmt(p, stmt)) {
- ret = false;
- }
- if (token_is_kw(t->token, KW_RBRACE)) break;
- if (t->token->kind == TOKEN_EOF) {
- tokr_err(t, "Expected '}' to close function body.");
- return false;
- }
- }
- }
-
- t->token++; /* move past } */
- p->block = prev_block;
- return ret;
-}
-
-static bool parse_fn_expr(Parser *p, FnExpr *f) {
- Tokenizer *t = p->tokr;
- /* only called when token is fn */
- assert(token_is_kw(t->token, KW_FN));
- f->name = NULL;
- t->token++;
- if (!token_is_kw(t->token, KW_LPAREN)) {
- tokr_err(t, "Expected '(' after 'fn'.");
- return false;
- }
- arr_create(&f->params, sizeof(Param));
-
- t->token++;
-
- if (!token_is_kw(t->token, KW_RPAREN)) {
- /* non-empty parameter list */
- while (1) {
- Param *param = arr_add(&f->params);
- if (!parse_param(p, param))
- return false;
- if (token_is_kw(t->token, KW_RPAREN)) break;
- if (token_is_kw(t->token, KW_COMMA)) {
- t->token++;
- continue;
- }
- tokr_err(t, "Expected ',' or ')' to continue or end parameter list.");
- return false;
- }
- }
-
- t->token++; /* move past ) */
- if (token_is_kw(t->token, KW_LBRACE)) {
- /* void function */
- f->ret_type.kind = TYPE_VOID;
- f->ret_type.flags = 0;
- } else {
- if (!parse_type(p, &f->ret_type)) {
- return false;
- }
- }
- return parse_block(p, &f->body);
-}
-
-static bool parse_expr(Parser *p, Expression *e, Token *end) {
- Tokenizer *t = p->tokr;
- if (end == NULL) return false;
- e->where = t->token->where;
- if (end <= t->token) {
- tokr_err(t, "Empty expression.");
- return false;
- }
- if (end - t->token == 1) {
- /* 1-token expression */
- switch (t->token->kind) {
- case TOKEN_NUM_LITERAL: {
- NumLiteral *num = &t->token->num;
- switch (num->kind) {
- case NUM_LITERAL_FLOAT:
- e->kind = EXPR_FLOAT_LITERAL;
- e->type.kind = TYPE_BUILTIN;
- e->type.builtin = BUILTIN_FLOAT;
- e->floatl = num->floatval;
- break;
- case NUM_LITERAL_INT:
- e->kind = EXPR_INT_LITERAL;
- e->type.kind = TYPE_BUILTIN;
- e->type.builtin = BUILTIN_I64; /* TODO: if it's too big, use a u64 instead. */
- e->intl = num->intval;
- break;
- }
- } break;
- case TOKEN_IDENT:
- e->kind = EXPR_IDENT;
- e->ident = t->token->ident;
- break;
- case TOKEN_STR_LITERAL:
- e->kind = EXPR_STR_LITERAL;
- e->strl = t->token->str;
- break;
- default:
- tokr_err(t, "Unrecognized expression.");
- return false;
- }
- t->token = end;
- return true;
- }
-
- Token *start = t->token;
-
- if (token_is_kw(t->token, KW_FN)) {
- /* this is a function */
- e->kind = EXPR_FN;
- if (!parse_fn_expr(p, &e->fn))
- return false;
-
- if (t->token != end) {
- tokr_err(t, "Direct function calling in an expression is not supported yet.\nYou can wrap the function in parentheses.");
- /* TODO */
- return false;
- }
- return true;
- }
-
- /* Find the lowest-precedence operator not in parentheses/braces/square brackets */
- int paren_level = 0;
- int brace_level = 0;
- int square_level = 0;
- int lowest_precedence = NOT_AN_OP;
- /* e.g. (5+3) */
- bool entirely_within_parentheses = token_is_kw(t->token, KW_LPAREN);
- Token *lowest_precedence_op;
- for (Token *token = t->token; token < end; token++) {
- if (token->kind == TOKEN_KW) {
- switch (token->kw) {
- case KW_LPAREN:
- paren_level++;
- break;
- case KW_RPAREN:
- paren_level--;
- if (paren_level == 0 && token != end - 1)
- entirely_within_parentheses = false;
- if (paren_level < 0) {
- t->token = token;
- tokr_err(t, "Excessive closing ).");
- t->token = end + 1;
- return false;
- }
- break;
- case KW_LBRACE:
- brace_level++;
- break;
- case KW_RBRACE:
- brace_level--;
- if (brace_level < 0) {
- t->token = token;
- tokr_err(t, "Excessive closing }.");
- return false;
- }
- break;
- case KW_LSQUARE:
- square_level++;
- break;
- case KW_RSQUARE:
- square_level--;
- if (square_level < 0) {
- tokr_err(t, "Excessive closing ].");
- return false;
- }
- break;
- default: { /* OPTIM: use individual cases for each op */
- if (paren_level == 0 && brace_level == 0 && square_level == 0) {
- int precedence = op_precedence(token->kw);
- if (precedence == NOT_AN_OP) break; /* nvm it's not an operator */
- if (lowest_precedence == NOT_AN_OP || precedence <= lowest_precedence) {
- lowest_precedence = precedence;
- lowest_precedence_op = token;
- }
- }
- } break;
- }
- }
- }
-
- if (paren_level > 0) {
- t->token = start;
- tokr_err(t, "Too many opening parentheses (.");
- return false;
- }
- if (brace_level > 0) {
- t->token = start;
- tokr_err(t, "Too many opening braces {.");
- return false;
- }
- if (square_level > 0) {
- t->token = start;
- tokr_err(t, "Too many opening square brackets [.");
- return false;
- }
-
- if (entirely_within_parentheses) {
- t->token++; /* move past opening ( */
- Token *new_end = end - 1; /* parse to ending ) */
- if (!parse_expr(p, e, new_end))
- return false;
- t->token++; /* move past closing ) */
- return true;
- }
-
- if (lowest_precedence == NOT_AN_OP) {
- /* function calls, array accesses, etc. */
-
- /* try a function call or array access */
- Token *token = t->token;
- /* currently unnecessary: paren_level = square_level = 0; */
- /*
- can't call at start, e.g. in (fn() {})(), it is not the empty function ""
- being called with fn() {} as an argument
- */
- if (token_is_kw(t->token, KW_LPAREN)) {
- paren_level++;
- token++;
- }
- /* which opening bracket starts the call/array access */
- Token *opening_bracket = NULL;
- for (; token < end; token++) {
- if (token->kind == TOKEN_KW) {
- switch (token->kw) {
- case KW_LPAREN:
- if (square_level == 0 && paren_level == 0)
- opening_bracket = token; /* maybe this left parenthesis opens the function call */
- paren_level++;
- break;
- case KW_LSQUARE:
- if (square_level == 0 && paren_level == 0)
- opening_bracket = token; /* ^^ (array access) */
- square_level++;
- break;
- case KW_RPAREN:
- paren_level--;
- break;
- case KW_RSQUARE:
- square_level--;
- break;
- default: break;
- }
-
- } else if (token->kind == TOKEN_EOF) {
- if (paren_level > 0) {
- tokr_err(t, "Unmatched ( parenthesis.");
- return false;
- }
- if (square_level > 0) {
- tokr_err(t, "Unmatched [ square bracket.");
- return false;
- }
- break;
- }
- }
- if (opening_bracket) {
- switch (opening_bracket->kw) {
- case KW_LPAREN: {
- /* it's a function call! */
- e->kind = EXPR_CALL;
- e->call.fn = parser_new_expr(p);
- if (!parse_expr(p, e->call.fn, opening_bracket)) { /* parse up to ( as function */
- return false;
- }
- arr_create(&e->call.args, sizeof(Expression));
- t->token = opening_bracket + 1; /* move past ( */
- if (!token_is_kw(t->token, KW_RPAREN)) {
- /* non-empty arg list */
- while (1) {
- if (t->token->kind == TOKEN_EOF) {
- tokr_err(t, "Expected argument list to continue.");
- return false;
- }
- Expression *arg = arr_add(&e->call.args);
- if (!parse_expr(p, arg, expr_find_end(p, EXPR_END_RPAREN_OR_COMMA))) {
- return false;
- }
- if (token_is_kw(t->token, KW_RPAREN))
- break;
- t->token++; /* move past , */
- }
- }
- t->token++; /* move past ) */
- return true;
- }
- case KW_LSQUARE: {
- /* it's an array access */
- e->kind = EXPR_BINARY_OP;
- e->binary.op = BINARY_AT_INDEX;
- e->binary.lhs = parser_new_expr(p);
- e->binary.rhs = parser_new_expr(p);
- /* parse array */
- if (!parse_expr(p, e->binary.lhs, opening_bracket)) return false;
- /* parse index */
- t->token = opening_bracket + 1;
- Token *index_end = expr_find_end(p, EXPR_END_RSQUARE);
- if (!parse_expr(p, e->binary.rhs, index_end))
- return false;
- t->token++; /* move past ] */
- return true;
- }
- default:
- assert(0);
- return false;
- }
- }
- tokr_err(t, "Not implemented yet.");
- return false;
- }
-
- /* This is a unary op not a binary one. */
- while (lowest_precedence_op != t->token
- && lowest_precedence_op[-1].kind == TOKEN_KW
- && op_precedence(lowest_precedence_op[-1].kw) != NOT_AN_OP) {
- lowest_precedence_op--;
- }
-
- /* Unary */
- if (lowest_precedence_op == t->token) {
- UnaryOp op;
- bool is_unary;
- switch (lowest_precedence_op->kw) {
- case KW_PLUS:
- /* unary + is ignored entirely */
- t->token++;
- /* re-parse this expression without + */
- return parse_expr(p, e, end);
- case KW_MINUS:
- is_unary = true;
- op = UNARY_MINUS;
- break;
- default:
- is_unary = false;
- break;
- }
- if (!is_unary) {
- tokr_err(t, "%s is not a unary operator.", keywords[lowest_precedence_op->kw]);
- return false;
- }
- e->unary.op = op;
- e->kind = EXPR_UNARY_OP;
- t->token++;
- Expression *of = parser_new_expr(p);
- e->unary.of = of;
- return parse_expr(p, of, end);
- }
-
-
- BinaryOp op;
- switch (lowest_precedence_op->kw) {
- case KW_PLUS:
- op = BINARY_PLUS;
- break;
- case KW_MINUS:
- op = BINARY_MINUS;
- break;
- case KW_EQ:
- op = BINARY_SET;
- break;
- case KW_COMMA:
- op = BINARY_COMMA;
- break;
- default: assert(0); break;
- }
- e->binary.op = op;
- e->kind = EXPR_BINARY_OP;
-
- Expression *lhs = parser_new_expr(p);
- e->binary.lhs = lhs;
- if (!parse_expr(p, lhs, lowest_precedence_op)) {
- return false;
- }
-
- Expression *rhs = parser_new_expr(p);
- t->token = lowest_precedence_op + 1;
- e->binary.rhs = rhs;
- if (!parse_expr(p, rhs, end)) {
- return false;
- }
-
- return true;
-}
-
-/*
- parses
- x : int, y : float;
- ^^this^^
- then recursively calls itself to parse the rest
- NOTE: this function actually parses all types in the declaration, but it just
- calls itself to do that.
-
-*/
-static bool parse_single_type_in_decl(Parser *p, Declaration *d) {
- Tokenizer *t = p->tokr;
- /* OPTIM: Maybe don't use a dynamic array or use parser allocator. */
- size_t n_idents_with_this_type = 1;
- while (1) {
- Identifier *ident = arr_add(&d->idents);
- if (t->token->kind != TOKEN_IDENT) {
- tokr_err(t, "Cannot declare non-identifier.");
- return false;
- }
- *ident = t->token->ident;
- /*
- only keep track of file scoped declarations---
- block enter/exit code will handle the rest
- */
- if (p->block == NULL) {
- if ((*ident)->decls.len) {
- /* this was already declared! */
- IdentDecl *prev = (*ident)->decls.data;
- tokr_err(t, "Re-declaration of identifier in global scope.");
- info_print(prev->decl->where, "Previous declaration was here.");
- return false;
- }
- assert(!(*ident)->decls.item_sz);
- arr_create(&(*ident)->decls, sizeof(IdentDecl));
- IdentDecl *ident_decl = arr_add(&(*ident)->decls);
- ident_decl->decl = d;
- ident_decl->scope = NULL;
- }
- t->token++;
- if (token_is_kw(t->token, KW_COMMA)) {
- t->token++;
- n_idents_with_this_type++;
- continue;
- }
- if (token_is_kw(t->token, KW_COLON)) {
- t->token++;
- break;
- }
- if (token_is_kw(t->token, KW_AT)) {
- d->flags |= DECL_FLAG_CONST;
- t->token++;
- break;
- }
- tokr_err(t, "Expected ',' to continue listing variables or ':' / '@' to indicate type.");
- return false;
- }
-
-
- if (token_is_kw(t->token, KW_SEMICOLON)) {
- /* e.g. foo :; */
- tokr_err(t, "Cannot infer type without expression.");
- return false;
- }
-
- bool annotates_type = !token_is_kw(t->token, KW_EQ) && !token_is_kw(t->token, KW_COMMA);
- if (d->type.kind != TYPE_VOID /* multiple types in one declaration */
- && (!!(d->flags & DECL_FLAG_ANNOTATES_TYPE)) != annotates_type) { /* annotation on one decl but not the other */
- /* e.g. x: int, y := 3, 5;*/
- tokr_err(t, "You must specify either all types or no types in a single declaration.");
- return false;
- }
- if (annotates_type) {
- d->flags |= DECL_FLAG_ANNOTATES_TYPE;
- Type type;
- if (!parse_type(p, &type)) {
- return false;
- }
- if (n_idents_with_this_type == 1 && d->type.kind == TYPE_VOID) {
- d->type = type;
- } else if (d->type.kind == TYPE_TUPLE) {
- /* add to tuple */
- for (size_t i = 0; i < n_idents_with_this_type; i++) {
- *(Type*)arr_add(&d->type.tuple) = type;
- }
- } else {
- /* construct tuple */
- Array tup_arr;
- arr_create(&tup_arr, sizeof(Type));
- if (d->type.kind != TYPE_VOID) {
- *(Type*)arr_add(&tup_arr) = d->type; /* add current type */
- }
- d->type.flags = 0;
- d->type.kind = TYPE_TUPLE;
- d->type.tuple = tup_arr;
- for (size_t i = 0; i < n_idents_with_this_type; i++) {
- *(Type*)arr_add(&d->type.tuple) = type;
- }
- }
- }
-
- if (token_is_kw(t->token, KW_COMMA)) {
- /* next type in declaration */
- t->token++; /* move past , */
- return parse_single_type_in_decl(p, d);
- }
-
- /* OPTIM: switch t->token->kw ? */
- if (token_is_kw(t->token, KW_EQ)) {
- t->token++;
- if (!parse_expr(p, &d->expr, expr_find_end(p, EXPR_END_SEMICOLON)))
- return false;
- d->flags |= DECL_FLAG_HAS_EXPR;
- if (token_is_kw(t->token, KW_SEMICOLON)) {
- t->token++;
- return true;
- }
- tokr_err(t, "Expected ';' at end of expression"); /* should never happen in theory right now */
- return false;
- } else if (token_is_kw(t->token, KW_SEMICOLON)) {
- t->token++;
- return true;
- } else {
- tokr_err(t, "Expected ';' or '=' at end of delaration.");
- return false;
- }
-}
-
-static bool parse_decl(Parser *p, Declaration *d) {
- d->type.kind = TYPE_VOID;
- d->where = p->tokr->token->where;
- arr_create(&d->idents, sizeof(Identifier));
-
- d->flags = 0;
- return parse_single_type_in_decl(p, d); /* recursively calls itself to parse all types */
-}
-
-static bool parse_stmt(Parser *p, Statement *s) {
- Tokenizer *t = p->tokr;
- if (t->token->kind == TOKEN_EOF)
- tokr_err(t, "Expected statement.");
- s->where = t->token->where;
- /*
- NOTE: This may cause problems in the future! Other statements might have comma
- as the second token.
- */
- if (token_is_kw(t->token + 1, KW_COLON) || token_is_kw(t->token + 1, KW_COMMA)
- || token_is_kw(t->token + 1, KW_AT)) {
- s->kind = STMT_DECL;
- if (!parse_decl(p, &s->decl)) {
- /* move to next statement */
- /* TODO: This might cause unhelpful errors if the first semicolon is inside a block, etc. */
- while (!token_is_kw(t->token, KW_SEMICOLON)) {
- if (t->token->kind == TOKEN_EOF) {
- /* don't bother continuing */
- tokr_err(t, "No semicolon found at end of declaration.");
- return false;
- }
- t->token++;
- }
- t->token++; /* move past ; */
- return false;
- }
- return true;
- } else {
- s->kind = STMT_EXPR;
- Token *end = expr_find_end(p, EXPR_END_SEMICOLON);
- if (!end) {
- tokr_err(t, "No semicolon found at end of statement.");
- while (t->token->kind != TOKEN_EOF) t->token++; /* move to end of file */
- return false;
- }
- if (!parse_expr(p, &s->expr, end)) {
- t->token = end + 1;
- return false;
- }
- if (!token_is_kw(t->token, KW_SEMICOLON)) {
- tokr_err(t, "Expected ';' at end of statement.");
- t->token = end + 1;
- return false;
- }
- t->token++; /* move past ; */
- return true;
- }
-}
-
-static void parser_from_tokenizer(Parser *p, Tokenizer *t) {
- p->tokr = t;
- p->block = NULL;
- block_arr_create(&p->exprs, 10, sizeof(Expression)); /* block size = 1024 */
-}
-
-static bool parse_file(Parser *p, ParsedFile *f) {
- Tokenizer *t = p->tokr;
- arr_create(&f->stmts, sizeof(Statement));
- bool ret = true;
- while (t->token->kind != TOKEN_EOF) {
- Statement *stmt = arr_add(&f->stmts);
- if (!parse_stmt(p, stmt))
- ret = false;
- }
- return ret;
-}
-
-#define PARSE_PRINT_LOCATION(l) //fprintf(out, "[l%lu]", (unsigned long)(l).line);
-
-static void fprint_expr(FILE *out, Expression *e);
-static void fprint_type(FILE *out, Type *t) {
- PARSE_PRINT_LOCATION(t->where);
- switch (t->kind) {
- case TYPE_BUILTIN:
- fprintf(out, "%s", keywords[builtin_type_to_kw(t->builtin)]);
- break;
- case TYPE_VOID:
- fprintf(out, "void");
- break;
- case TYPE_FN: {
- Type *types = t->fn.types.data;
- fprintf(out, "fn (");
- for (size_t i = 1; i < t->fn.types.len; i++){
- fprint_type(out, &types[i]);
- fprintf(out, ",");
- }
- fprintf(out, ") ");
- fprint_type(out, &types[0]);
- } break;
- case TYPE_TUPLE: {
- fprintf(out, "(");
- arr_foreach(&t->tuple, Type, child) {
- if (child != t->tuple.data) {
- fprintf(out, ", ");
- }
- fprint_type(out, child);
- }
- fprintf(out, ")");
- } break;
- case TYPE_ARR:
- fprintf(out, "[");
- if (t->flags & TYPE_FLAG_RESOLVED) {
- fprintf(out, INTEGER_FMT, t->arr.n);
- } else {
- fprint_expr(out, t->arr.n_expr);
- }
- fprintf(out, "]");
- fprint_type(out, t->arr.of);
- break;
- }
-}
-
-static void fprint_param(FILE *out, Param *p) {
- fprint_ident(out, p->name);
- fprintf(out, ": ");
- fprint_type(out, &p->type);
-}
-
-static void fprint_stmt(FILE *out, Statement *s);
-
-static void fprint_block(FILE *out, Block *b) {
- fprintf(out, "{\n");
- arr_foreach(&b->stmts, Statement, stmt) {
- fprint_stmt(out, stmt);
- }
- fprintf(out, "}");
-
-}
-
-static void fprint_fn_expr(FILE *out, FnExpr *f) {
- fprintf(out, "fn (");
- arr_foreach(&f->params, Param, param) {
- if (param != f->params.data)
- fprintf(out, ", ");
- fprint_param(out, param);
- }
- fprintf(out, ") ");
- fprint_type(out, &f->ret_type);
- fprintf(out, " ");
- fprint_block(out, &f->body);
-}
-
-static void fprint_expr(FILE *out, Expression *e) {
- PARSE_PRINT_LOCATION(e->where);
- switch (e->kind) {
- case EXPR_INT_LITERAL:
- fprintf(out, "%lld", (long long)e->intl);
- break;
- case EXPR_FLOAT_LITERAL:
- fprintf(out, "%f", (double)e->floatl);
- break;
- case EXPR_STR_LITERAL:
- fprintf(out, "\"%s\"", e->strl.str);
- break;
- case EXPR_IDENT:
- fprint_ident(out, e->ident);
- break;
- case EXPR_BINARY_OP:
- switch (e->binary.op) {
- case BINARY_PLUS:
- fprintf(out, "add");
- break;
- case BINARY_MINUS:
- fprintf(out, "subtract");
- break;
- case BINARY_SET:
- fprintf(out, "set");
- break;
- case BINARY_AT_INDEX:
- fprintf(out, "at");
- break;
- case BINARY_COMMA:
- fprintf(out, "tuple");
- break;
- }
- fprintf(out, "(");
- fprint_expr(out, e->binary.lhs);
- fprintf(out, ",");
- fprint_expr(out, e->binary.rhs);
- fprintf(out, ")");
- break;
- case EXPR_UNARY_OP:
- switch (e->unary.op) {
- case UNARY_MINUS:
- fprintf(out, "negate");
- break;
- }
- fprintf(out, "(");
- fprint_expr(out, e->unary.of);
- fprintf(out, ")");
- break;
- case EXPR_FN:
- fprint_fn_expr(out, &e->fn);
- break;
- case EXPR_CALL:
- fprint_expr(out, e->call.fn);
- fprintf(out, "(");
- arr_foreach(&e->call.args, Expression, arg) {
- if (arg != e->call.args.data) fprintf(out, ", ");
- fprint_expr(out, arg);
- }
- fprintf(out, ")");
- break;
- }
-}
-
-
-static void fprint_decl(FILE *out, Declaration *d) {
- PARSE_PRINT_LOCATION(d->where);
- arr_foreach(&d->idents, Identifier, ident) {
- if (ident != d->idents.data) fprintf(out, ", ");
- fprint_ident(out, *ident);
- }
- if (d->flags & DECL_FLAG_CONST) {
- fprintf(out, "[const]");
- }
- fprintf(out, ":");
- if (d->flags & DECL_FLAG_ANNOTATES_TYPE) {
- fprint_type(out, &d->type);
- }
- if (d->flags & DECL_FLAG_HAS_EXPR) {
- fprintf(out, "=");
- fprint_expr(out, &d->expr);
- }
-}
-
-static void fprint_stmt(FILE *out, Statement *s) {
- PARSE_PRINT_LOCATION(s->where);
- switch (s->kind) {
- case STMT_DECL:
- fprint_decl(out, &s->decl);
- fprintf(out, ";\n");
- break;
- case STMT_EXPR:
- fprint_expr(out, &s->expr);
- fprintf(out, ";\n");
- break;
- }
-}
-
-static void fprint_parsed_file(FILE *out, ParsedFile *f) {
- arr_foreach(&f->stmts, Statement, stmt) {
- fprint_stmt(out, stmt);
- }
-}
-
-/* TODO: Freeing parser (remember to free args) */