diff options
-rw-r--r-- | cgen.c | 127 | ||||
-rw-r--r-- | main.c | 11 | ||||
-rw-r--r-- | out.c | 3 | ||||
-rw-r--r-- | parse.c | 98 | ||||
-rw-r--r-- | test.toc | 6 | ||||
-rw-r--r-- | toc.c | 1 | ||||
-rw-r--r-- | tokenizer.c | 33 | ||||
-rw-r--r-- | util/err.c | 14 |
8 files changed, 217 insertions, 76 deletions
@@ -0,0 +1,127 @@ +/* the generation of C code */ + +typedef struct { + FILE *out; +} CGenerator; + + +static void cgen_vwrite(CGenerator *g, const char *fmt, va_list args) { + vfprintf(g->out, fmt, args); +} + +static void cgen_write(CGenerator *g, const char *fmt, ...) { + va_list args; + va_start(args, fmt); + cgen_vwrite(g, fmt, args); + va_end(args); +} + +static void cgen_writeln(CGenerator *g, const char *fmt, ...) { + va_list args; + va_start(args, fmt); + cgen_vwrite(g, fmt, args); + va_end(args); + cgen_write(g, "\n"); +} + +static void cgen_write_comment(CGenerator *g, const char *fmt, ...) { + cgen_write(g, "/* "); + va_list args; + va_start(args, fmt); + cgen_vwrite(g, fmt, args); + va_end(args); + cgen_write(g, " */"); +} + +static void cgen_write_line_comment(CGenerator *g, const char *fmt, ...) { + /* could switch to // for c99 */ + cgen_write(g, "/* "); + va_list args; + va_start(args, fmt); + cgen_vwrite(g, fmt, args); + va_end(args); + cgen_write(g, " */\n"); +} + +static void cgen_write_ident(CGenerator *g, Identifier i) { + ident_fprint(g->out, i); +} + +static void cgen_create(CGenerator *g, FILE *out) { + g->out = out; +} + +static bool expr_cgen(Expression *e, CGenerator *g) { + switch (e->kind) { + case EXPR_INT_LITERAL: + cgen_write(g, "%lld", e->intl); + break; + case EXPR_FLOAT_LITERAL: + /* TODO: more precision */ + cgen_write(g, "%f", e->floatl); + break; + case EXPR_STR_LITERAL: + cgen_write(g, "\""); + /* OPTIM: Maybe don't use i? this will probably be optimized by the compiler though... */ + for (size_t i = 0; i < e->strl.len; i++) { + /* TODO: Print ordinary characters nicely */ + cgen_write(g, "\\x%02x", e->strl.str[i]); + } + cgen_write(g, "\""); + break; + case EXPR_IDENT: + /* TODO: check if declared */ + cgen_write_ident(g, e->ident); + break; + case EXPR_BINARY_OP: + cgen_write(g, "("); + expr_cgen(e->binary.lhs, g); + switch (e->binary.op) { + case BINARY_PLUS: + cgen_write(g, "+"); + break; + case BINARY_MINUS: + cgen_write(g, "-"); + break; + } + expr_cgen(e->binary.rhs, g); + cgen_write(g, ")"); + break; + case EXPR_UNARY_OP: + cgen_write(g, "("); + switch (e->unary.op) { + case UNARY_MINUS: + cgen_write(g, "-"); + break; + } + expr_cgen(e->unary.of, g); + cgen_write(g, ")"); + break; + } + return true; +} + +static bool stmt_cgen(Statement *s, CGenerator *g) { + switch (s->kind) { + case STMT_EXPR: + if (!expr_cgen(&s->expr, g)) + return false; + cgen_write(g, ";\n"); + break; + } + return true; +} + +static bool file_cgen(ParsedFile *f, CGenerator *g) { + cgen_write_line_comment(g, "toc"); + bool ret = true; + arr_foreach(&f->stmts, Statement, stmt) { + if (stmt->kind == STMT_EXPR) { + /* TODO: eventually make this an error / compile-time statement */ + warn_print(stmt->where, "Expression statement at top level."); + } + if (!stmt_cgen(stmt, g)) + ret = false; + } + return ret; +} @@ -54,6 +54,17 @@ int main(int argc, char **argv) { parsed_file_fprint(stdout, &f); tokr_free(&t); + + const char *out_filename = "out.c"; + + FILE *out = fopen(out_filename, "w"); + CGenerator cgen; + cgen_create(&cgen, out); + if (!file_cgen(&f, &cgen)) { + err_fprint(TEXT_IMPORTANT("Errors occured while generating C code.\n")); + return EXIT_FAILURE; + } + free(contents); fclose(in); @@ -0,0 +1,3 @@ +/* toc */ +"\x48\x65\x6c\x6c\x6f\x20\x74\x68\x65\x72\x65"; +((5+(((3-34)-134)-(-(-(-(-(-(-(-164423)))))))))+(235443-(-543))); @@ -1,9 +1,3 @@ -/* - TODO: - all of these functions should leave the tokenizer at a "reasonable" place - for parsing to continue. -*/ - typedef enum { TYPE_VOID, TYPE_BUILTIN @@ -398,7 +392,6 @@ static bool expr_parse(Expression *e, Parser *p, Token *end) { e->where = t->token->where; if (end <= t->token) { tokr_err(t, "Empty expression."); - t->token = end + 1; return false; } if (end - t->token == 1) { @@ -432,7 +425,6 @@ static bool expr_parse(Expression *e, Parser *p, Token *end) { break; default: tokr_err(t, "Unrecognized expression."); - t->token = end + 1; return false; } t->token = end; @@ -471,7 +463,6 @@ static bool expr_parse(Expression *e, Parser *p, Token *end) { if (brace_level < 0) { t->token = token; tokr_err(t, "Excessive closing brace."); - t->token = end + 1; return false; } break; @@ -494,22 +485,18 @@ static bool expr_parse(Expression *e, Parser *p, Token *end) { */ if (paren_level > 0) { tokr_err(t, "Too many opening parentheses."); - t->token = end + 1; return false; } if (brace_level > 0) { tokr_err(t, "Too many opening braces."); - t->token = end + 1; return false; } if (entirely_within_parentheses) { t->token++; /* move past opening ( */ Token *new_end = end - 1; /* parse to ending ) */ - if (!expr_parse(e, p, new_end)) { - t->token = end + 1; + if (!expr_parse(e, p, new_end)) return false; - } t->token++; /* move past closing ) */ return true; } @@ -519,14 +506,12 @@ static bool expr_parse(Expression *e, Parser *p, Token *end) { if (token_is_kw(t->token, KW_FN)) { /* this is a function */ e->kind = EXPR_FN; - if (!fn_expr_parse(&e->fn, p)) { - t->token = end + 1; /* move token past end for further parsing */ + if (!fn_expr_parse(&e->fn, p)) return false; - } + if (t->token != end) { tokr_err(t, "Direct function calling in an expression is not supported yet.\nYou can wrap the function in parentheses."); /* TODO */ - t->token = end + 1; return false; } return true; @@ -559,7 +544,6 @@ static bool expr_parse(Expression *e, Parser *p, Token *end) { e->kind = EXPR_CALL; e->call.fn = parser_new_expr(p); if (!expr_parse(e->call.fn, p, token)) { /* parse up to ( as function */ - t->token = end + 1; return false; } arr_create(&e->call.args, sizeof(Expression)); @@ -569,12 +553,10 @@ static bool expr_parse(Expression *e, Parser *p, Token *end) { while (1) { if (t->token->kind == TOKEN_EOF) { tokr_err(t, "Expected argument list to continue."); - t->token = end + 1; return false; } Expression *arg = arr_add(&e->call.args); if (!expr_parse(arg, p, expr_find_end(p, EXPR_END_RPAREN_OR_COMMA))) { - t->token = end + 1; return false; } if (token_is_kw(t->token, KW_RPAREN)) @@ -587,7 +569,6 @@ static bool expr_parse(Expression *e, Parser *p, Token *end) { } /* array accesses, etc. */ tokr_err(t, "Not implemented yet."); - t->token = end + 1; return false; } @@ -618,7 +599,6 @@ static bool expr_parse(Expression *e, Parser *p, Token *end) { } if (!is_unary) { tokr_err(t, "%s is not a unary operator.", keywords[lowest_precedence_op->kw]); - t->token = end + 1; return false; } e->unary.op = op; @@ -646,7 +626,6 @@ static bool expr_parse(Expression *e, Parser *p, Token *end) { Expression *lhs = parser_new_expr(p); e->binary.lhs = lhs; if (!expr_parse(lhs, p, lowest_precedence_op)) { - t->token = end + 1; return false; } @@ -654,7 +633,6 @@ static bool expr_parse(Expression *e, Parser *p, Token *end) { t->token = lowest_precedence_op + 1; e->binary.rhs = rhs; if (!expr_parse(rhs, p, end)) { - t->token = end + 1; return false; } @@ -667,6 +645,8 @@ static bool decl_parse(Declaration *d, Parser *p) { d->where = t->token->where; arr_create(&d->idents, sizeof(Identifier)); + d->flags = 0; + while (1) { Identifier *ident = arr_add(&d->idents); if (t->token->kind != TOKEN_IDENT) { @@ -701,48 +681,50 @@ static bool decl_parse(Declaration *d, Parser *p) { t->token++; break; } - tokr_err(t, "Expected ',' to continue listing variables or ':' to indicate type."); + if (token_is_kw(t->token, KW_AT)) { + d->flags |= DECL_FLAG_CONST; + t->token++; + break; + } + tokr_err(t, "Expected ',' to continue listing variables or ':' / '@' to indicate type."); return false; } - d->flags = 0; + if (token_is_kw(t->token, KW_SEMICOLON)) { + /* e.g. foo :; */ + tokr_err(t, "Cannot infer type without expression."); + return false; + } - - if (!token_is_kw(t->token, KW_MINUS) - && !token_is_kw(t->token, KW_EQ) - && !token_is_kw(t->token, KW_SEMICOLON)) { - if (!type_parse(&d->type, p)) - return false; - } else { + if (token_is_kw(t->token, KW_EQ)) { + /* := / @= */ d->flags |= DECL_FLAG_INFER_TYPE; - } - - if (token_is_kw(t->token, KW_SEMICOLON)) { - if (d->flags & DECL_FLAG_INFER_TYPE) { - tokr_err(t, "Cannot infer type without expression."); + } else { + if (!type_parse(&d->type, p)) { return false; } - } else if (token_is_kw(t->token, KW_EQ)) { + } + + /* OPTIM: switch */ + if (token_is_kw(t->token, KW_EQ)) { t->token++; if (!expr_parse(&d->expr, p, expr_find_end(p, EXPR_END_SEMICOLON))) return false; d->flags |= DECL_FLAG_HAS_EXPR; - } else if (token_is_kw(t->token, KW_MINUS)) { - t->token++; - if (!expr_parse(&d->expr, p, expr_find_end(p, EXPR_END_SEMICOLON))) - return false; - d->flags |= DECL_FLAG_HAS_EXPR | DECL_FLAG_CONST; - } else { - tokr_err(t, "Expected ';', '=', or '-' in delaration."); + if (token_is_kw(t->token, KW_SEMICOLON)) { + t->token++; + return true; + } + tokr_err(t, "Expected ';' at end of expression"); /* should never happen in theory right now */ return false; - } - if (token_is_kw(t->token, KW_SEMICOLON)) { + } else if (token_is_kw(t->token, KW_SEMICOLON)) { t->token++; return true; + } else { + tokr_err(t, "Expected ';' or '=' at end of delaration."); + return false; } - tokr_err(t, "Expected ';' at end of expression"); /* should never happen in theory right now */ - return false; } static bool stmt_parse(Statement *s, Parser *p) { @@ -754,7 +736,8 @@ static bool stmt_parse(Statement *s, Parser *p) { NOTE: This may cause problems in the future! Other statements might have comma as the second token. */ - if (token_is_kw(t->token + 1, KW_COLON) || token_is_kw(t->token + 1, KW_COMMA)) { + if (token_is_kw(t->token + 1, KW_COLON) || token_is_kw(t->token + 1, KW_COMMA) + || token_is_kw(t->token + 1, KW_AT)) { s->kind = STMT_DECL; if (!decl_parse(&s->decl, p)) { /* move to next statement */ @@ -762,23 +745,30 @@ static bool stmt_parse(Statement *s, Parser *p) { while (!token_is_kw(t->token, KW_SEMICOLON)) { if (t->token->kind == TOKEN_EOF) { /* don't bother continuing */ + tokr_err(t, "No semicolon found at end of declaration."); return false; } t->token++; } + t->token++; /* move past ; */ return false; } return true; } else { s->kind = STMT_EXPR; Token *end = expr_find_end(p, EXPR_END_SEMICOLON); + if (!end) { + tokr_err(t, "No semicolon found at end of statement."); + while (t->token->kind != TOKEN_EOF) t->token++; /* move to end of file */ + return false; + } if (!expr_parse(&s->expr, p, end)) { - t->token = end; + t->token = end + 1; return false; } if (!token_is_kw(t->token, KW_SEMICOLON)) { tokr_err(t, "Expected ';' at end of statement."); - t->token = end; + t->token = end + 1; return false; } t->token++; /* move past ; */ @@ -1,4 +1,2 @@ -main :- fn(){ - printf("Hello!\n"); -}; -main := 7;
\ No newline at end of file +"Hello there"; +5+3-34-134--------164423+235443--543;
\ No newline at end of file @@ -16,3 +16,4 @@ #include "tokenizer.c" #include "parse.c" #include "blocks.c" +#include "cgen.c" diff --git a/tokenizer.c b/tokenizer.c index 7f6dcf6..25a53d5 100644 --- a/tokenizer.c +++ b/tokenizer.c @@ -11,8 +11,8 @@ typedef enum { KW_SEMICOLON, KW_EQ, KW_COLON, + KW_AT, KW_COMMA, - KW_FN, KW_LPAREN, KW_RPAREN, KW_LBRACE, @@ -22,6 +22,8 @@ typedef enum { KW_LE, KW_MINUS, KW_PLUS, + KW_LAST_SYMBOL = KW_PLUS, /* last one entirely consisting of symbols */ + KW_FN, KW_INT, KW_I8, KW_I16, @@ -38,16 +40,24 @@ typedef enum { } Keyword; static const char *keywords[KW_COUNT] = - {";", "=", ":", ",", "fn", "(", ")", "{", "}", "==", "<", "<=", "-", "+", + {";", "=", ":", "@", ",", "(", ")", "{", "}", "==", "<", "<=", "-", "+", "fn", "int", "i8", "i16", "i32", "i64", "u8", "u16", "u32", "u64", "float", "f32", "f64"}; /* Returns KW_COUNT if it's not a keyword */ /* OPTIM: don't use strncmp so much */ -static Keyword tokenize_keyword(char **s) { +static Keyword tokenize_kw(char **s) { for (Keyword k = 0; k < KW_COUNT; k++) { size_t len = strlen(keywords[k]); if (strncmp(*s, keywords[k], len) == 0) { + if (k > KW_LAST_SYMBOL) { + /* + it's not a symbol, so we need to check if it's something like "intfoo" + */ + if (isident((*s)[len])) { + return KW_COUNT; + } + } *s += len; return k; } @@ -194,12 +204,6 @@ static void tokr_err_(const char *src_file, int src_line, Tokenizer *t, const ch va_start(args, fmt); err_vprint(t->token->where, fmt, args); va_end(args); - LineNo line = t->token->where.line; - while (1) { - if (t->token->where.line != line) break; - if (t->token->kind == TOKEN_EOF) break; - t->token++; - } } #define tokr_err(...) tokr_err_(__FILE__, __LINE__, __VA_ARGS__) @@ -267,7 +271,7 @@ static bool tokenize_string(Tokenizer *tokr, char *str) { } { char *start_s = t.s; - Keyword kw = tokenize_keyword(&t.s); + Keyword kw = tokenize_kw(&t.s); if (kw != KW_COUNT) { /* it's a keyword */ Token *token = tokr_add(&t); @@ -498,14 +502,7 @@ static bool tokenize_string(Tokenizer *tokr, char *str) { return !has_err; } +/* Does NOT free string literals!!! */ static void tokr_free(Tokenizer *t) { - arr_foreach(&t->tokens, Token, token) { - switch (token->kind) { - case TOKEN_STR_LITERAL: - free(token->str.str); - break; - default: break; - } - } arr_clear(&t->tokens); } @@ -3,10 +3,12 @@ #if USE_COLORED_TEXT #define TEXT_ERROR(x) "\x1b[91m" x "\x1b[0m" #define TEXT_INFO(x) "\x1b[94m" x "\x1b[0m" +#define TEXT_WARN(x) "\x1b[93m" x "\x1b[0m" #define TEXT_IMPORTANT(x) "\x1b[1m" x "\x1b[0m" #else #define TEXT_ERROR(x) x #define TEXT_INFO(x) x +#define TEXT_WARN(x) x #define TEXT_IMPORTANT(x) x #endif @@ -44,6 +46,10 @@ static void info_print_header_(LineNo line) { err_fprint(TEXT_INFO("info:") " at line %lu of %s:\n", (unsigned long)line, err_filename); } +static void warn_print_header_(LineNo line) { + err_fprint(TEXT_WARN("warning:") " at line %lu of %s:\n", (unsigned long)line, err_filename); +} + static void err_print_footer_(const char *context) { err_fprint("\n\there --> "); const char *end = strchr(context, '\n'); @@ -82,6 +88,14 @@ static void info_print(Location where, const char *fmt, ...) { va_end(args); } +static void warn_print(Location where, const char *fmt, ...) { + va_list args; + va_start(args, fmt); + warn_print_header_(where.line); + err_vfprint(fmt, args); + err_print_footer_(where.code); + va_end(args); +} static void *err_malloc(size_t size) { void *ret = malloc(size); |