diff options
Diffstat (limited to 'parse.c')
-rw-r--r-- | parse.c | 212 |
1 files changed, 205 insertions, 7 deletions
@@ -1,10 +1,58 @@ +typedef enum { + TYPE_BUILTIN +} TypeKind; + +typedef enum { + BUILTIN_INT, + BUILTIN_I8, + BUILTIN_I16, + BUILTIN_I32, + BUILTIN_I64, + BUILTIN_U8, + BUILTIN_U16, + BUILTIN_U32, + BUILTIN_U64, + BUILTIN_FLOAT, + BUILTIN_F32, + BUILTIN_F64, + BUILTIN_TYPE_COUNT +} BuiltinType; + + +typedef struct { + TypeKind kind; + union { + BuiltinType builtin; + }; +} Type; + +typedef enum { + EXPR_INT_CONST, + EXPR_FLOAT_CONST +} ExprKind; + +typedef struct { + ExprKind kind; + Type type; + bool is_flexible_num:1; /* expressions like 5 or 7*8+3 can be any numerical type */ + union { + FloatConst floatc; + IntConst intc; + }; +} Expression; + typedef struct { Location where; Identifier var; - bool is_const; - bool has_expr; + Type type; + Expression expr; + bool infer_type:1; + bool is_const:1; + bool has_expr:1; } Declaration; +/* OPTIM: Instead of using dynamic arrays, do two passes. */ + arr_declaration(Declarations, Declaration, decls_) typedef enum { @@ -25,7 +73,133 @@ typedef struct { Statements stmts; } ParsedFile; -/* TODO: Add newline tokens back in; give tokens pointer to text */ + +/* returns BUILTIN_TYPE_COUNT on failure */ +static BuiltinType kw_to_builtin_type(Keyword kw) { + switch (kw) { + case KW_INT: return BUILTIN_INT; + case KW_I8: return BUILTIN_I8; + case KW_I16: return BUILTIN_I16; + case KW_I32: return BUILTIN_I32; + case KW_I64: return BUILTIN_I64; + case KW_U8: return BUILTIN_U8; + case KW_U16: return BUILTIN_U16; + case KW_U32: return BUILTIN_U32; + case KW_U64: return BUILTIN_U64; + case KW_FLOAT: return BUILTIN_FLOAT; + case KW_F32: return BUILTIN_F32; + case KW_F64: return BUILTIN_F64; + default: return BUILTIN_TYPE_COUNT; + } +} + +static bool parse_type(Type *type, Tokenizer *t) { + switch (t->token->kind) { + case TOKEN_KW: + type->kind = TYPE_BUILTIN; + type->builtin = kw_to_builtin_type(t->token->kw); + if (type->builtin == BUILTIN_TYPE_COUNT) { + tokr_err(t, "Expected type."); + return false; + } else { + t->token++; + return true; + } + break; + default: break; + } + tokr_err(t, "Unrecognized type."); + return false; +} + +static bool parse_expr(Expression *e, Tokenizer *t, Token *end) { + if (end == NULL) return false; + memset(e, 0, sizeof *e); + if (end - t->token == 1) { + /* 1-token expression */ + switch (t->token->kind) { + case TOKEN_NUM_CONST: { + NumConst *num = &t->token->num; + switch (num->kind) { + case NUM_CONST_FLOAT: + e->kind = EXPR_FLOAT_CONST; + e->type.kind = TYPE_BUILTIN; + e->type.builtin = BUILTIN_FLOAT; + e->floatc = num->floatval; + break; + case NUM_CONST_INT: + e->kind = EXPR_INT_CONST; + e->is_flexible_num = true; + e->type.kind = TYPE_BUILTIN; + e->type.builtin = BUILTIN_INT; /* TODO: if it's too big, use a u64 instead. */ + e->floatc = num->intval; + break; + } + } break; + default: + tokr_err(t, "Unrecognized expression."); + return false; + } + t->token = end; + return true; + } + /* TODO */ + tokr_err(t, "multi-token exprs not supported yet."); + return false; +} + +/* +ends_with = which keyword does this expression end with? +if it's KW_RPAREN, this will match parentheses properly. +*/ +typedef enum { + EXPR_END_RPAREN_OR_COMMA, + EXPR_END_SEMICOLON +} ExprEndKind; +static Token *expr_find_end(Tokenizer *t, ExprEndKind ends_with) { + long bracket_level = 0; + Token *token = t->token; + while (1) { + switch (ends_with) { + case EXPR_END_RPAREN_OR_COMMA: + if (token->kind == TOKEN_KW) { + if (token->kw == KW_COMMA && bracket_level == 0) + return token; + if (token->kw == KW_LPAREN) + bracket_level++; + if (token->kw == KW_RPAREN) { + bracket_level--; + if (bracket_level == 0) { + return token; + } + } + } + break; + case EXPR_END_SEMICOLON: + if (token_is_kw(token, KW_SEMICOLON)) + return token; + break; + } + if (token->kind == TOKEN_EOF) { + switch (ends_with) { + case EXPR_END_SEMICOLON: + tokr_err(t, "Could not find ';' at end of expression."); + return NULL; + case EXPR_END_RPAREN_OR_COMMA: + if (bracket_level > 0) { + tokr_err(t, "Mismatched parentheses."); /* FEATURE: Find out where this is */ + return NULL; + } else { + tokr_err(t, "Could not find ')' or ',' at end of expression."); + return NULL; + } + return NULL; + } + } + token++; + } +} + static bool parse_decls(Declarations *ds, Tokenizer *t) { decls_create(ds); while (1) { @@ -43,19 +217,36 @@ static bool parse_decls(Declarations *ds, Tokenizer *t) { tokr_err(t, "Expected ':' in declaration."); return false; } - - /* TODO: type */ - t->token++; - + + if (!token_is_kw(t->token, KW_MINUS) + && !token_is_kw(t->token, KW_EQ) + && !token_is_kw(t->token, KW_SEMICOLON)) { + if (!parse_type(&decl.type, t)) + return false; + } else { + decl.infer_type = true; + } + if (token_is_kw(t->token, KW_SEMICOLON)) { + if (decl.infer_type) { + tokr_err(t, "Cannot infer type without expression."); + return false; + } } else if (token_is_kw(t->token, KW_EQ)) { t->token++; + if (!parse_expr(&decl.expr, t, expr_find_end(t, EXPR_END_SEMICOLON))) + return false; decl.has_expr = true; } else if (token_is_kw(t->token, KW_MINUS)) { t->token++; + if (!parse_expr(&decl.expr, t, expr_find_end(t, EXPR_END_SEMICOLON))) + return false; decl.has_expr = true; decl.is_const = true; + } else { + tokr_err(t, "Expected ';', '=', or '-' in delaration."); + return false; } decls_add(ds, &decl); if (token_is_kw(t->token, KW_SEMICOLON)) { @@ -92,6 +283,13 @@ static bool parse_file(ParsedFile *f, Tokenizer *t) { return ret; } +static void expr_fprint(FILE *out, Expression *e) { + /* TODO */ +/* switch (e->kind) { */ +/* case : */ +/* } */ +} + static void decl_fprint(FILE *out, Declaration *d) { fprintf(out, "l%lu:", (unsigned long)d->where.line); ident_fprint(out, d->var); |