summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLeo Tenenbaum <pommicket@gmail.com>2019-08-22 10:38:44 -0400
committerLeo Tenenbaum <pommicket@gmail.com>2019-08-22 10:38:44 -0400
commiteda9077ce320fd683f5574441749f9ed6ffbb4b2 (patch)
tree8193c9d90f894117af77f258d9a09a85ea6e5a34
parentd63a28aa4d227544b912b3edc2479de622d18a32 (diff)
Started code generation
-rw-r--r--cgen.c127
-rw-r--r--main.c11
-rw-r--r--out.c3
-rw-r--r--parse.c98
-rw-r--r--test.toc6
-rw-r--r--toc.c1
-rw-r--r--tokenizer.c33
-rw-r--r--util/err.c14
8 files changed, 217 insertions, 76 deletions
diff --git a/cgen.c b/cgen.c
new file mode 100644
index 0000000..831a22e
--- /dev/null
+++ b/cgen.c
@@ -0,0 +1,127 @@
+/* the generation of C code */
+
+typedef struct {
+ FILE *out;
+} CGenerator;
+
+
+static void cgen_vwrite(CGenerator *g, const char *fmt, va_list args) {
+ vfprintf(g->out, fmt, args);
+}
+
+static void cgen_write(CGenerator *g, const char *fmt, ...) {
+ va_list args;
+ va_start(args, fmt);
+ cgen_vwrite(g, fmt, args);
+ va_end(args);
+}
+
+static void cgen_writeln(CGenerator *g, const char *fmt, ...) {
+ va_list args;
+ va_start(args, fmt);
+ cgen_vwrite(g, fmt, args);
+ va_end(args);
+ cgen_write(g, "\n");
+}
+
+static void cgen_write_comment(CGenerator *g, const char *fmt, ...) {
+ cgen_write(g, "/* ");
+ va_list args;
+ va_start(args, fmt);
+ cgen_vwrite(g, fmt, args);
+ va_end(args);
+ cgen_write(g, " */");
+}
+
+static void cgen_write_line_comment(CGenerator *g, const char *fmt, ...) {
+ /* could switch to // for c99 */
+ cgen_write(g, "/* ");
+ va_list args;
+ va_start(args, fmt);
+ cgen_vwrite(g, fmt, args);
+ va_end(args);
+ cgen_write(g, " */\n");
+}
+
+static void cgen_write_ident(CGenerator *g, Identifier i) {
+ ident_fprint(g->out, i);
+}
+
+static void cgen_create(CGenerator *g, FILE *out) {
+ g->out = out;
+}
+
+static bool expr_cgen(Expression *e, CGenerator *g) {
+ switch (e->kind) {
+ case EXPR_INT_LITERAL:
+ cgen_write(g, "%lld", e->intl);
+ break;
+ case EXPR_FLOAT_LITERAL:
+ /* TODO: more precision */
+ cgen_write(g, "%f", e->floatl);
+ break;
+ case EXPR_STR_LITERAL:
+ cgen_write(g, "\"");
+ /* OPTIM: Maybe don't use i? this will probably be optimized by the compiler though... */
+ for (size_t i = 0; i < e->strl.len; i++) {
+ /* TODO: Print ordinary characters nicely */
+ cgen_write(g, "\\x%02x", e->strl.str[i]);
+ }
+ cgen_write(g, "\"");
+ break;
+ case EXPR_IDENT:
+ /* TODO: check if declared */
+ cgen_write_ident(g, e->ident);
+ break;
+ case EXPR_BINARY_OP:
+ cgen_write(g, "(");
+ expr_cgen(e->binary.lhs, g);
+ switch (e->binary.op) {
+ case BINARY_PLUS:
+ cgen_write(g, "+");
+ break;
+ case BINARY_MINUS:
+ cgen_write(g, "-");
+ break;
+ }
+ expr_cgen(e->binary.rhs, g);
+ cgen_write(g, ")");
+ break;
+ case EXPR_UNARY_OP:
+ cgen_write(g, "(");
+ switch (e->unary.op) {
+ case UNARY_MINUS:
+ cgen_write(g, "-");
+ break;
+ }
+ expr_cgen(e->unary.of, g);
+ cgen_write(g, ")");
+ break;
+ }
+ return true;
+}
+
+static bool stmt_cgen(Statement *s, CGenerator *g) {
+ switch (s->kind) {
+ case STMT_EXPR:
+ if (!expr_cgen(&s->expr, g))
+ return false;
+ cgen_write(g, ";\n");
+ break;
+ }
+ return true;
+}
+
+static bool file_cgen(ParsedFile *f, CGenerator *g) {
+ cgen_write_line_comment(g, "toc");
+ bool ret = true;
+ arr_foreach(&f->stmts, Statement, stmt) {
+ if (stmt->kind == STMT_EXPR) {
+ /* TODO: eventually make this an error / compile-time statement */
+ warn_print(stmt->where, "Expression statement at top level.");
+ }
+ if (!stmt_cgen(stmt, g))
+ ret = false;
+ }
+ return ret;
+}
diff --git a/main.c b/main.c
index 9946625..3f903d4 100644
--- a/main.c
+++ b/main.c
@@ -54,6 +54,17 @@ int main(int argc, char **argv) {
parsed_file_fprint(stdout, &f);
tokr_free(&t);
+
+ const char *out_filename = "out.c";
+
+ FILE *out = fopen(out_filename, "w");
+ CGenerator cgen;
+ cgen_create(&cgen, out);
+ if (!file_cgen(&f, &cgen)) {
+ err_fprint(TEXT_IMPORTANT("Errors occured while generating C code.\n"));
+ return EXIT_FAILURE;
+ }
+
free(contents);
fclose(in);
diff --git a/out.c b/out.c
new file mode 100644
index 0000000..3260a0d
--- /dev/null
+++ b/out.c
@@ -0,0 +1,3 @@
+/* toc */
+"\x48\x65\x6c\x6c\x6f\x20\x74\x68\x65\x72\x65";
+((5+(((3-34)-134)-(-(-(-(-(-(-(-164423)))))))))+(235443-(-543)));
diff --git a/parse.c b/parse.c
index 8632d15..e8e557a 100644
--- a/parse.c
+++ b/parse.c
@@ -1,9 +1,3 @@
-/*
- TODO:
- all of these functions should leave the tokenizer at a "reasonable" place
- for parsing to continue.
-*/
-
typedef enum {
TYPE_VOID,
TYPE_BUILTIN
@@ -398,7 +392,6 @@ static bool expr_parse(Expression *e, Parser *p, Token *end) {
e->where = t->token->where;
if (end <= t->token) {
tokr_err(t, "Empty expression.");
- t->token = end + 1;
return false;
}
if (end - t->token == 1) {
@@ -432,7 +425,6 @@ static bool expr_parse(Expression *e, Parser *p, Token *end) {
break;
default:
tokr_err(t, "Unrecognized expression.");
- t->token = end + 1;
return false;
}
t->token = end;
@@ -471,7 +463,6 @@ static bool expr_parse(Expression *e, Parser *p, Token *end) {
if (brace_level < 0) {
t->token = token;
tokr_err(t, "Excessive closing brace.");
- t->token = end + 1;
return false;
}
break;
@@ -494,22 +485,18 @@ static bool expr_parse(Expression *e, Parser *p, Token *end) {
*/
if (paren_level > 0) {
tokr_err(t, "Too many opening parentheses.");
- t->token = end + 1;
return false;
}
if (brace_level > 0) {
tokr_err(t, "Too many opening braces.");
- t->token = end + 1;
return false;
}
if (entirely_within_parentheses) {
t->token++; /* move past opening ( */
Token *new_end = end - 1; /* parse to ending ) */
- if (!expr_parse(e, p, new_end)) {
- t->token = end + 1;
+ if (!expr_parse(e, p, new_end))
return false;
- }
t->token++; /* move past closing ) */
return true;
}
@@ -519,14 +506,12 @@ static bool expr_parse(Expression *e, Parser *p, Token *end) {
if (token_is_kw(t->token, KW_FN)) {
/* this is a function */
e->kind = EXPR_FN;
- if (!fn_expr_parse(&e->fn, p)) {
- t->token = end + 1; /* move token past end for further parsing */
+ if (!fn_expr_parse(&e->fn, p))
return false;
- }
+
if (t->token != end) {
tokr_err(t, "Direct function calling in an expression is not supported yet.\nYou can wrap the function in parentheses.");
/* TODO */
- t->token = end + 1;
return false;
}
return true;
@@ -559,7 +544,6 @@ static bool expr_parse(Expression *e, Parser *p, Token *end) {
e->kind = EXPR_CALL;
e->call.fn = parser_new_expr(p);
if (!expr_parse(e->call.fn, p, token)) { /* parse up to ( as function */
- t->token = end + 1;
return false;
}
arr_create(&e->call.args, sizeof(Expression));
@@ -569,12 +553,10 @@ static bool expr_parse(Expression *e, Parser *p, Token *end) {
while (1) {
if (t->token->kind == TOKEN_EOF) {
tokr_err(t, "Expected argument list to continue.");
- t->token = end + 1;
return false;
}
Expression *arg = arr_add(&e->call.args);
if (!expr_parse(arg, p, expr_find_end(p, EXPR_END_RPAREN_OR_COMMA))) {
- t->token = end + 1;
return false;
}
if (token_is_kw(t->token, KW_RPAREN))
@@ -587,7 +569,6 @@ static bool expr_parse(Expression *e, Parser *p, Token *end) {
}
/* array accesses, etc. */
tokr_err(t, "Not implemented yet.");
- t->token = end + 1;
return false;
}
@@ -618,7 +599,6 @@ static bool expr_parse(Expression *e, Parser *p, Token *end) {
}
if (!is_unary) {
tokr_err(t, "%s is not a unary operator.", keywords[lowest_precedence_op->kw]);
- t->token = end + 1;
return false;
}
e->unary.op = op;
@@ -646,7 +626,6 @@ static bool expr_parse(Expression *e, Parser *p, Token *end) {
Expression *lhs = parser_new_expr(p);
e->binary.lhs = lhs;
if (!expr_parse(lhs, p, lowest_precedence_op)) {
- t->token = end + 1;
return false;
}
@@ -654,7 +633,6 @@ static bool expr_parse(Expression *e, Parser *p, Token *end) {
t->token = lowest_precedence_op + 1;
e->binary.rhs = rhs;
if (!expr_parse(rhs, p, end)) {
- t->token = end + 1;
return false;
}
@@ -667,6 +645,8 @@ static bool decl_parse(Declaration *d, Parser *p) {
d->where = t->token->where;
arr_create(&d->idents, sizeof(Identifier));
+ d->flags = 0;
+
while (1) {
Identifier *ident = arr_add(&d->idents);
if (t->token->kind != TOKEN_IDENT) {
@@ -701,48 +681,50 @@ static bool decl_parse(Declaration *d, Parser *p) {
t->token++;
break;
}
- tokr_err(t, "Expected ',' to continue listing variables or ':' to indicate type.");
+ if (token_is_kw(t->token, KW_AT)) {
+ d->flags |= DECL_FLAG_CONST;
+ t->token++;
+ break;
+ }
+ tokr_err(t, "Expected ',' to continue listing variables or ':' / '@' to indicate type.");
return false;
}
- d->flags = 0;
+ if (token_is_kw(t->token, KW_SEMICOLON)) {
+ /* e.g. foo :; */
+ tokr_err(t, "Cannot infer type without expression.");
+ return false;
+ }
-
- if (!token_is_kw(t->token, KW_MINUS)
- && !token_is_kw(t->token, KW_EQ)
- && !token_is_kw(t->token, KW_SEMICOLON)) {
- if (!type_parse(&d->type, p))
- return false;
- } else {
+ if (token_is_kw(t->token, KW_EQ)) {
+ /* := / @= */
d->flags |= DECL_FLAG_INFER_TYPE;
- }
-
- if (token_is_kw(t->token, KW_SEMICOLON)) {
- if (d->flags & DECL_FLAG_INFER_TYPE) {
- tokr_err(t, "Cannot infer type without expression.");
+ } else {
+ if (!type_parse(&d->type, p)) {
return false;
}
- } else if (token_is_kw(t->token, KW_EQ)) {
+ }
+
+ /* OPTIM: switch */
+ if (token_is_kw(t->token, KW_EQ)) {
t->token++;
if (!expr_parse(&d->expr, p, expr_find_end(p, EXPR_END_SEMICOLON)))
return false;
d->flags |= DECL_FLAG_HAS_EXPR;
- } else if (token_is_kw(t->token, KW_MINUS)) {
- t->token++;
- if (!expr_parse(&d->expr, p, expr_find_end(p, EXPR_END_SEMICOLON)))
- return false;
- d->flags |= DECL_FLAG_HAS_EXPR | DECL_FLAG_CONST;
- } else {
- tokr_err(t, "Expected ';', '=', or '-' in delaration.");
+ if (token_is_kw(t->token, KW_SEMICOLON)) {
+ t->token++;
+ return true;
+ }
+ tokr_err(t, "Expected ';' at end of expression"); /* should never happen in theory right now */
return false;
- }
- if (token_is_kw(t->token, KW_SEMICOLON)) {
+ } else if (token_is_kw(t->token, KW_SEMICOLON)) {
t->token++;
return true;
+ } else {
+ tokr_err(t, "Expected ';' or '=' at end of delaration.");
+ return false;
}
- tokr_err(t, "Expected ';' at end of expression"); /* should never happen in theory right now */
- return false;
}
static bool stmt_parse(Statement *s, Parser *p) {
@@ -754,7 +736,8 @@ static bool stmt_parse(Statement *s, Parser *p) {
NOTE: This may cause problems in the future! Other statements might have comma
as the second token.
*/
- if (token_is_kw(t->token + 1, KW_COLON) || token_is_kw(t->token + 1, KW_COMMA)) {
+ if (token_is_kw(t->token + 1, KW_COLON) || token_is_kw(t->token + 1, KW_COMMA)
+ || token_is_kw(t->token + 1, KW_AT)) {
s->kind = STMT_DECL;
if (!decl_parse(&s->decl, p)) {
/* move to next statement */
@@ -762,23 +745,30 @@ static bool stmt_parse(Statement *s, Parser *p) {
while (!token_is_kw(t->token, KW_SEMICOLON)) {
if (t->token->kind == TOKEN_EOF) {
/* don't bother continuing */
+ tokr_err(t, "No semicolon found at end of declaration.");
return false;
}
t->token++;
}
+ t->token++; /* move past ; */
return false;
}
return true;
} else {
s->kind = STMT_EXPR;
Token *end = expr_find_end(p, EXPR_END_SEMICOLON);
+ if (!end) {
+ tokr_err(t, "No semicolon found at end of statement.");
+ while (t->token->kind != TOKEN_EOF) t->token++; /* move to end of file */
+ return false;
+ }
if (!expr_parse(&s->expr, p, end)) {
- t->token = end;
+ t->token = end + 1;
return false;
}
if (!token_is_kw(t->token, KW_SEMICOLON)) {
tokr_err(t, "Expected ';' at end of statement.");
- t->token = end;
+ t->token = end + 1;
return false;
}
t->token++; /* move past ; */
diff --git a/test.toc b/test.toc
index 77b8b44..71d3b0a 100644
--- a/test.toc
+++ b/test.toc
@@ -1,4 +1,2 @@
-main :- fn(){
- printf("Hello!\n");
-};
-main := 7; \ No newline at end of file
+"Hello there";
+5+3-34-134--------164423+235443--543; \ No newline at end of file
diff --git a/toc.c b/toc.c
index 94e6224..e89d824 100644
--- a/toc.c
+++ b/toc.c
@@ -16,3 +16,4 @@
#include "tokenizer.c"
#include "parse.c"
#include "blocks.c"
+#include "cgen.c"
diff --git a/tokenizer.c b/tokenizer.c
index 7f6dcf6..25a53d5 100644
--- a/tokenizer.c
+++ b/tokenizer.c
@@ -11,8 +11,8 @@ typedef enum {
KW_SEMICOLON,
KW_EQ,
KW_COLON,
+ KW_AT,
KW_COMMA,
- KW_FN,
KW_LPAREN,
KW_RPAREN,
KW_LBRACE,
@@ -22,6 +22,8 @@ typedef enum {
KW_LE,
KW_MINUS,
KW_PLUS,
+ KW_LAST_SYMBOL = KW_PLUS, /* last one entirely consisting of symbols */
+ KW_FN,
KW_INT,
KW_I8,
KW_I16,
@@ -38,16 +40,24 @@ typedef enum {
} Keyword;
static const char *keywords[KW_COUNT] =
- {";", "=", ":", ",", "fn", "(", ")", "{", "}", "==", "<", "<=", "-", "+",
+ {";", "=", ":", "@", ",", "(", ")", "{", "}", "==", "<", "<=", "-", "+", "fn",
"int", "i8", "i16", "i32", "i64", "u8", "u16", "u32", "u64", "float", "f32",
"f64"};
/* Returns KW_COUNT if it's not a keyword */
/* OPTIM: don't use strncmp so much */
-static Keyword tokenize_keyword(char **s) {
+static Keyword tokenize_kw(char **s) {
for (Keyword k = 0; k < KW_COUNT; k++) {
size_t len = strlen(keywords[k]);
if (strncmp(*s, keywords[k], len) == 0) {
+ if (k > KW_LAST_SYMBOL) {
+ /*
+ it's not a symbol, so we need to check if it's something like "intfoo"
+ */
+ if (isident((*s)[len])) {
+ return KW_COUNT;
+ }
+ }
*s += len;
return k;
}
@@ -194,12 +204,6 @@ static void tokr_err_(const char *src_file, int src_line, Tokenizer *t, const ch
va_start(args, fmt);
err_vprint(t->token->where, fmt, args);
va_end(args);
- LineNo line = t->token->where.line;
- while (1) {
- if (t->token->where.line != line) break;
- if (t->token->kind == TOKEN_EOF) break;
- t->token++;
- }
}
#define tokr_err(...) tokr_err_(__FILE__, __LINE__, __VA_ARGS__)
@@ -267,7 +271,7 @@ static bool tokenize_string(Tokenizer *tokr, char *str) {
}
{
char *start_s = t.s;
- Keyword kw = tokenize_keyword(&t.s);
+ Keyword kw = tokenize_kw(&t.s);
if (kw != KW_COUNT) {
/* it's a keyword */
Token *token = tokr_add(&t);
@@ -498,14 +502,7 @@ static bool tokenize_string(Tokenizer *tokr, char *str) {
return !has_err;
}
+/* Does NOT free string literals!!! */
static void tokr_free(Tokenizer *t) {
- arr_foreach(&t->tokens, Token, token) {
- switch (token->kind) {
- case TOKEN_STR_LITERAL:
- free(token->str.str);
- break;
- default: break;
- }
- }
arr_clear(&t->tokens);
}
diff --git a/util/err.c b/util/err.c
index 6eaa191..0bd6424 100644
--- a/util/err.c
+++ b/util/err.c
@@ -3,10 +3,12 @@
#if USE_COLORED_TEXT
#define TEXT_ERROR(x) "\x1b[91m" x "\x1b[0m"
#define TEXT_INFO(x) "\x1b[94m" x "\x1b[0m"
+#define TEXT_WARN(x) "\x1b[93m" x "\x1b[0m"
#define TEXT_IMPORTANT(x) "\x1b[1m" x "\x1b[0m"
#else
#define TEXT_ERROR(x) x
#define TEXT_INFO(x) x
+#define TEXT_WARN(x) x
#define TEXT_IMPORTANT(x) x
#endif
@@ -44,6 +46,10 @@ static void info_print_header_(LineNo line) {
err_fprint(TEXT_INFO("info:") " at line %lu of %s:\n", (unsigned long)line, err_filename);
}
+static void warn_print_header_(LineNo line) {
+ err_fprint(TEXT_WARN("warning:") " at line %lu of %s:\n", (unsigned long)line, err_filename);
+}
+
static void err_print_footer_(const char *context) {
err_fprint("\n\there --> ");
const char *end = strchr(context, '\n');
@@ -82,6 +88,14 @@ static void info_print(Location where, const char *fmt, ...) {
va_end(args);
}
+static void warn_print(Location where, const char *fmt, ...) {
+ va_list args;
+ va_start(args, fmt);
+ warn_print_header_(where.line);
+ err_vfprint(fmt, args);
+ err_print_footer_(where.code);
+ va_end(args);
+}
static void *err_malloc(size_t size) {
void *ret = malloc(size);