From dc3dab7f04f852c3ca7c7850623bedad27f185dd Mon Sep 17 00:00:00 2001 From: Leo Tenenbaum Date: Sun, 18 Aug 2019 11:16:53 -0400 Subject: Basic declarations --- main.c | 5 +++- parse.c | 86 +++++++++++++++++++++++++++++++++++++++++++++++++++---------- test.toc | 7 ++--- tokenizer.c | 55 ++++++++++++++++++++++----------------- 4 files changed, 112 insertions(+), 41 deletions(-) diff --git a/main.c b/main.c index fe5fb14..f9332df 100644 --- a/main.c +++ b/main.c @@ -57,7 +57,10 @@ int main(int argc, char **argv) { printf("\n"); ParsedFile f; - parse_file(&f, &t); + if (!parse_file(&f, &t)) { + err_fprint(TEXT_IMPORTANT("Errors occured while parsing.\n")); + return EXIT_FAILURE; + } parsed_file_fprint(stdout, &f); diff --git a/parse.c b/parse.c index b025d08..775eac3 100644 --- a/parse.c +++ b/parse.c @@ -1,17 +1,18 @@ -typedef struct { - LineNo line; - LineNo col; -} Location; - typedef struct { Location where; - char *var; + Identifier var; + bool is_const; + bool has_expr; } Declaration; arr_declaration(Declarations, Declaration, decls_) +typedef enum { + STMT_DECLS +} StatementKind; + typedef struct { - int type; + StatementKind kind; Location where; union { Declarations decls; @@ -26,11 +27,47 @@ typedef struct { /* TODO: Add newline tokens back in; give tokens pointer to text */ static bool parse_decls(Declarations *ds, Tokenizer *t) { - if (t->token->kind != TOKEN_IDENT) { - tokr_err(t, "Cannot declare non-identifier."); - return false; + decls_create(ds); + while (1) { + Declaration decl = {0}; + if (t->token->kind != TOKEN_IDENT) { + tokr_err(t, "Cannot declare non-identifier."); + return false; + } + + decl.where = t->token->where; + decl.var = t->token->ident; + t->token++; + + if (!token_is_kw(t->token, KW_COLON)) { + tokr_err(t, "Expected ':' in declaration."); + return false; + } + + /* TODO: type */ + + t->token++; + + if (token_is_kw(t->token, KW_SEMICOLON)) { + } else if (token_is_kw(t->token, KW_EQ)) { + t->token++; + decl.has_expr = true; + } else if (token_is_kw(t->token, KW_MINUS)) { + t->token++; + decl.has_expr = true; + decl.is_const = true; + } + decls_add(ds, &decl); + if (token_is_kw(t->token, KW_SEMICOLON)) { + t->token++; + break; + } + if (!token_is_kw(t->token, KW_COMMA)) { + tokr_err(t, "Expected ';' or ',' to finish or continue declaration."); + return false; + } + t->token++; /* move past comma */ } - t->token++; return true; } @@ -38,9 +75,9 @@ static bool parse_stmt(Statement *s, Tokenizer *t) { if (token_is_kw(t->token + 1, KW_COLON)) { return parse_decls(&s->decls, t); } else { - t->token++; /* TODO: This is temporary */ + tokr_err(t, "Unreocgnized statement."); + return false; } - return true; } static bool parse_file(ParsedFile *f, Tokenizer *t) { @@ -55,8 +92,29 @@ static bool parse_file(ParsedFile *f, Tokenizer *t) { return ret; } +static void decl_fprint(FILE *out, Declaration *d) { + fprintf(out, "l%lu:", (unsigned long)d->where.line); + ident_fprint(out, d->var); + if (d->is_const) { + fprintf(out, "[const]"); + } + if (d->has_expr) { + fprintf(out, "="); + } +} + static void stmt_fprint(FILE *out, Statement *s) { - fprintf(out, "statement!\n"); + switch (s->kind) { + case STMT_DECLS: + arr_foreach(s->decls, Declaration, decl) { + if (decl != s->decls.data) { + fprintf(out, ", "); + } + decl_fprint(out, decl); + } + fprintf(out, ";\n"); + break; + } } static void parsed_file_fprint(FILE *out, ParsedFile *f) { diff --git a/test.toc b/test.toc index 2163792..10a9cff 100644 --- a/test.toc +++ b/test.toc @@ -1,3 +1,4 @@ -P :- -Q :- -R :- \ No newline at end of file +P := ; +Q := ; +R := , foo :; +S :, R :-; \ No newline at end of file diff --git a/tokenizer.c b/tokenizer.c index 7782ca3..447ffbc 100644 --- a/tokenizer.c +++ b/tokenizer.c @@ -11,6 +11,7 @@ typedef enum { KW_SEMICOLON, KW_EQ, KW_COLON, + KW_COMMA, KW_FN, KW_LPAREN, KW_RPAREN, @@ -25,7 +26,7 @@ typedef enum { /* OPTIM: Use a trie or just a function if this gets too long */ static const char *keywords[KW_COUNT] = - {";", "=", ":", "fn", "(", ")", "{", "}", "==", "<", "<=", "-"}; + {";", "=", ":", ",", "fn", "(", ")", "{", "}", "==", "<", "<=", "-"}; #define TOKR_USE_LLONG 1 @@ -51,11 +52,15 @@ typedef struct { size_t len; } StrConst; -/* NOTE: LineNo is typedef'd in util/err.c */ typedef struct { - TokenKind kind; LineNo line; char *code; +} Location; + +/* NOTE: LineNo is typedef'd in util/err.c */ +typedef struct { + TokenKind kind; + Location where; union { Keyword kw; Identifier ident; @@ -79,7 +84,7 @@ static bool token_is_kw(Token *t, Keyword kw) { } static void token_fprint(FILE *out, Token *t) { - fprintf(out, "l%lu-", (unsigned long)t->line); + fprintf(out, "l%lu-", (unsigned long)t->where.line); switch (t->kind) { case TOKEN_KW: fprintf(out, "keyword: %s", keywords[t->kw]); @@ -112,10 +117,10 @@ static void token_fprint(FILE *out, Token *t) { } static void tokr_add(Tokenizer *t, Token *token) { - if (!token->line) - token->line = t->line; - if (!token->code) - token->code = t->s; + if (!token->where.line) + token->where.line = t->line; + if (!token->where.code) + token->where.code = t->s; tokens_add(&t->tokens, token); } @@ -166,18 +171,28 @@ static void tokenization_err(Tokenizer *t, const char *fmt, ...) { /* to be used after tokenization */ static void tokr_err(Tokenizer *t, const char *fmt, ...) { - LineNo line = t->token->line; + LineNo line = t->token->where.line; va_list args; va_start(args, fmt); - err_vprint(line, t->token->code, fmt, args); + err_vprint(line, t->token->where.code, fmt, args); va_end(args); while (1) { - if (t->token->line != line) break; + if (t->token->where.line != line) break; if (t->token->kind == TOKEN_EOF) break; t->token++; } } +static void tokr_put_location(Tokenizer *tokr, Token *t) { + t->where.line = tokr->line; + t->where.code = tokr->s; +} + +static void tokr_get_location(Tokenizer *tokr, Token *t) { + tokr->line = t->where.line; + tokr->s = t->where.code; +} + static bool tokenize_string(Tokenizer *tokr, char *str) { int has_err = 0; Tokenizer t; @@ -256,8 +271,7 @@ static bool tokenize_string(Tokenizer *tokr, char *str) { n.kind = NUM_CONST_INT; n.intval = 0; Token token = {0}; - token.line = t.line; - token.code = t.s; + tokr_put_location(&t, &token); if (*t.s == '0') { tokr_nextchar(&t); /* octal/hexadecimal/binary (or zero) */ @@ -374,8 +388,7 @@ static bool tokenize_string(Tokenizer *tokr, char *str) { /* it's a character constant! */ tokr_nextchar(&t); Token token = {0}; - token.line = t.line; - token.code = t.s; + tokr_put_location(&t, &token); char c; if (*t.s == '\\') { /* escape sequence */ @@ -403,8 +416,7 @@ static bool tokenize_string(Tokenizer *tokr, char *str) { if (*t.s == '"') { /* it's a string constant! */ Token token; - token.line = t.line; - token.code = t.s; + tokr_put_location(&t, &token); tokr_nextchar(&t); size_t len = 0; size_t backslashes = 0; @@ -413,8 +425,7 @@ static bool tokenize_string(Tokenizer *tokr, char *str) { backslashes++; } else if (*t.s == 0) { /* return t to opening " so that we go to the next line */ - t.line = token.line; - t.s = token.code; + tokr_get_location(&t, &token); tokenization_err(&t, "No matching \" found."); goto err; } else { @@ -425,8 +436,7 @@ static bool tokenize_string(Tokenizer *tokr, char *str) { } char *str = malloc(len + 1); char *strptr = str; - t.s = token.code; - t.line = token.line; + tokr_get_location(&t, &token); tokr_nextchar(&t); /* past opening " */ while (*t.s != '"') { assert(*t.s); @@ -455,8 +465,7 @@ static bool tokenize_string(Tokenizer *tokr, char *str) { if (isidentstart(*t.s)) { /* it's an identifier */ Token token = {0}; - token.line = t.line; - token.code = t.s; + tokr_put_location(&t, &token); Identifier ident = ident_insert(&t.s); token.kind = TOKEN_IDENT; token.ident = ident; -- cgit v1.2.3