summaryrefslogtreecommitdiff
path: root/parse.c
diff options
context:
space:
mode:
Diffstat (limited to 'parse.c')
-rw-r--r--parse.c212
1 files changed, 205 insertions, 7 deletions
diff --git a/parse.c b/parse.c
index 775eac3..9ab9ee1 100644
--- a/parse.c
+++ b/parse.c
@@ -1,10 +1,58 @@
+typedef enum {
+ TYPE_BUILTIN
+} TypeKind;
+
+typedef enum {
+ BUILTIN_INT,
+ BUILTIN_I8,
+ BUILTIN_I16,
+ BUILTIN_I32,
+ BUILTIN_I64,
+ BUILTIN_U8,
+ BUILTIN_U16,
+ BUILTIN_U32,
+ BUILTIN_U64,
+ BUILTIN_FLOAT,
+ BUILTIN_F32,
+ BUILTIN_F64,
+ BUILTIN_TYPE_COUNT
+} BuiltinType;
+
+
+typedef struct {
+ TypeKind kind;
+ union {
+ BuiltinType builtin;
+ };
+} Type;
+
+typedef enum {
+ EXPR_INT_CONST,
+ EXPR_FLOAT_CONST
+} ExprKind;
+
+typedef struct {
+ ExprKind kind;
+ Type type;
+ bool is_flexible_num:1; /* expressions like 5 or 7*8+3 can be any numerical type */
+ union {
+ FloatConst floatc;
+ IntConst intc;
+ };
+} Expression;
+
typedef struct {
Location where;
Identifier var;
- bool is_const;
- bool has_expr;
+ Type type;
+ Expression expr;
+ bool infer_type:1;
+ bool is_const:1;
+ bool has_expr:1;
} Declaration;
+/* OPTIM: Instead of using dynamic arrays, do two passes. */
+
arr_declaration(Declarations, Declaration, decls_)
typedef enum {
@@ -25,7 +73,133 @@ typedef struct {
Statements stmts;
} ParsedFile;
-/* TODO: Add newline tokens back in; give tokens pointer to text */
+
+/* returns BUILTIN_TYPE_COUNT on failure */
+static BuiltinType kw_to_builtin_type(Keyword kw) {
+ switch (kw) {
+ case KW_INT: return BUILTIN_INT;
+ case KW_I8: return BUILTIN_I8;
+ case KW_I16: return BUILTIN_I16;
+ case KW_I32: return BUILTIN_I32;
+ case KW_I64: return BUILTIN_I64;
+ case KW_U8: return BUILTIN_U8;
+ case KW_U16: return BUILTIN_U16;
+ case KW_U32: return BUILTIN_U32;
+ case KW_U64: return BUILTIN_U64;
+ case KW_FLOAT: return BUILTIN_FLOAT;
+ case KW_F32: return BUILTIN_F32;
+ case KW_F64: return BUILTIN_F64;
+ default: return BUILTIN_TYPE_COUNT;
+ }
+}
+
+static bool parse_type(Type *type, Tokenizer *t) {
+ switch (t->token->kind) {
+ case TOKEN_KW:
+ type->kind = TYPE_BUILTIN;
+ type->builtin = kw_to_builtin_type(t->token->kw);
+ if (type->builtin == BUILTIN_TYPE_COUNT) {
+ tokr_err(t, "Expected type.");
+ return false;
+ } else {
+ t->token++;
+ return true;
+ }
+ break;
+ default: break;
+ }
+ tokr_err(t, "Unrecognized type.");
+ return false;
+}
+
+static bool parse_expr(Expression *e, Tokenizer *t, Token *end) {
+ if (end == NULL) return false;
+ memset(e, 0, sizeof *e);
+ if (end - t->token == 1) {
+ /* 1-token expression */
+ switch (t->token->kind) {
+ case TOKEN_NUM_CONST: {
+ NumConst *num = &t->token->num;
+ switch (num->kind) {
+ case NUM_CONST_FLOAT:
+ e->kind = EXPR_FLOAT_CONST;
+ e->type.kind = TYPE_BUILTIN;
+ e->type.builtin = BUILTIN_FLOAT;
+ e->floatc = num->floatval;
+ break;
+ case NUM_CONST_INT:
+ e->kind = EXPR_INT_CONST;
+ e->is_flexible_num = true;
+ e->type.kind = TYPE_BUILTIN;
+ e->type.builtin = BUILTIN_INT; /* TODO: if it's too big, use a u64 instead. */
+ e->floatc = num->intval;
+ break;
+ }
+ } break;
+ default:
+ tokr_err(t, "Unrecognized expression.");
+ return false;
+ }
+ t->token = end;
+ return true;
+ }
+ /* TODO */
+ tokr_err(t, "multi-token exprs not supported yet.");
+ return false;
+}
+
+/*
+ends_with = which keyword does this expression end with?
+if it's KW_RPAREN, this will match parentheses properly.
+*/
+typedef enum {
+ EXPR_END_RPAREN_OR_COMMA,
+ EXPR_END_SEMICOLON
+} ExprEndKind;
+static Token *expr_find_end(Tokenizer *t, ExprEndKind ends_with) {
+ long bracket_level = 0;
+ Token *token = t->token;
+ while (1) {
+ switch (ends_with) {
+ case EXPR_END_RPAREN_OR_COMMA:
+ if (token->kind == TOKEN_KW) {
+ if (token->kw == KW_COMMA && bracket_level == 0)
+ return token;
+ if (token->kw == KW_LPAREN)
+ bracket_level++;
+ if (token->kw == KW_RPAREN) {
+ bracket_level--;
+ if (bracket_level == 0) {
+ return token;
+ }
+ }
+ }
+ break;
+ case EXPR_END_SEMICOLON:
+ if (token_is_kw(token, KW_SEMICOLON))
+ return token;
+ break;
+ }
+ if (token->kind == TOKEN_EOF) {
+ switch (ends_with) {
+ case EXPR_END_SEMICOLON:
+ tokr_err(t, "Could not find ';' at end of expression.");
+ return NULL;
+ case EXPR_END_RPAREN_OR_COMMA:
+ if (bracket_level > 0) {
+ tokr_err(t, "Mismatched parentheses."); /* FEATURE: Find out where this is */
+ return NULL;
+ } else {
+ tokr_err(t, "Could not find ')' or ',' at end of expression.");
+ return NULL;
+ }
+ return NULL;
+ }
+ }
+ token++;
+ }
+}
+
static bool parse_decls(Declarations *ds, Tokenizer *t) {
decls_create(ds);
while (1) {
@@ -43,19 +217,36 @@ static bool parse_decls(Declarations *ds, Tokenizer *t) {
tokr_err(t, "Expected ':' in declaration.");
return false;
}
-
- /* TODO: type */
-
t->token++;
-
+
+ if (!token_is_kw(t->token, KW_MINUS)
+ && !token_is_kw(t->token, KW_EQ)
+ && !token_is_kw(t->token, KW_SEMICOLON)) {
+ if (!parse_type(&decl.type, t))
+ return false;
+ } else {
+ decl.infer_type = true;
+ }
+
if (token_is_kw(t->token, KW_SEMICOLON)) {
+ if (decl.infer_type) {
+ tokr_err(t, "Cannot infer type without expression.");
+ return false;
+ }
} else if (token_is_kw(t->token, KW_EQ)) {
t->token++;
+ if (!parse_expr(&decl.expr, t, expr_find_end(t, EXPR_END_SEMICOLON)))
+ return false;
decl.has_expr = true;
} else if (token_is_kw(t->token, KW_MINUS)) {
t->token++;
+ if (!parse_expr(&decl.expr, t, expr_find_end(t, EXPR_END_SEMICOLON)))
+ return false;
decl.has_expr = true;
decl.is_const = true;
+ } else {
+ tokr_err(t, "Expected ';', '=', or '-' in delaration.");
+ return false;
}
decls_add(ds, &decl);
if (token_is_kw(t->token, KW_SEMICOLON)) {
@@ -92,6 +283,13 @@ static bool parse_file(ParsedFile *f, Tokenizer *t) {
return ret;
}
+static void expr_fprint(FILE *out, Expression *e) {
+ /* TODO */
+/* switch (e->kind) { */
+/* case : */
+/* } */
+}
+
static void decl_fprint(FILE *out, Declaration *d) {
fprintf(out, "l%lu:", (unsigned long)d->where.line);
ident_fprint(out, d->var);