summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--main.c5
-rw-r--r--parse.c86
-rw-r--r--test.toc7
-rw-r--r--tokenizer.c55
4 files changed, 112 insertions, 41 deletions
diff --git a/main.c b/main.c
index fe5fb14..f9332df 100644
--- a/main.c
+++ b/main.c
@@ -57,7 +57,10 @@ int main(int argc, char **argv) {
printf("\n");
ParsedFile f;
- parse_file(&f, &t);
+ if (!parse_file(&f, &t)) {
+ err_fprint(TEXT_IMPORTANT("Errors occured while parsing.\n"));
+ return EXIT_FAILURE;
+ }
parsed_file_fprint(stdout, &f);
diff --git a/parse.c b/parse.c
index b025d08..775eac3 100644
--- a/parse.c
+++ b/parse.c
@@ -1,17 +1,18 @@
typedef struct {
- LineNo line;
- LineNo col;
-} Location;
-
-typedef struct {
Location where;
- char *var;
+ Identifier var;
+ bool is_const;
+ bool has_expr;
} Declaration;
arr_declaration(Declarations, Declaration, decls_)
+typedef enum {
+ STMT_DECLS
+} StatementKind;
+
typedef struct {
- int type;
+ StatementKind kind;
Location where;
union {
Declarations decls;
@@ -26,11 +27,47 @@ typedef struct {
/* TODO: Add newline tokens back in; give tokens pointer to text */
static bool parse_decls(Declarations *ds, Tokenizer *t) {
- if (t->token->kind != TOKEN_IDENT) {
- tokr_err(t, "Cannot declare non-identifier.");
- return false;
+ decls_create(ds);
+ while (1) {
+ Declaration decl = {0};
+ if (t->token->kind != TOKEN_IDENT) {
+ tokr_err(t, "Cannot declare non-identifier.");
+ return false;
+ }
+
+ decl.where = t->token->where;
+ decl.var = t->token->ident;
+ t->token++;
+
+ if (!token_is_kw(t->token, KW_COLON)) {
+ tokr_err(t, "Expected ':' in declaration.");
+ return false;
+ }
+
+ /* TODO: type */
+
+ t->token++;
+
+ if (token_is_kw(t->token, KW_SEMICOLON)) {
+ } else if (token_is_kw(t->token, KW_EQ)) {
+ t->token++;
+ decl.has_expr = true;
+ } else if (token_is_kw(t->token, KW_MINUS)) {
+ t->token++;
+ decl.has_expr = true;
+ decl.is_const = true;
+ }
+ decls_add(ds, &decl);
+ if (token_is_kw(t->token, KW_SEMICOLON)) {
+ t->token++;
+ break;
+ }
+ if (!token_is_kw(t->token, KW_COMMA)) {
+ tokr_err(t, "Expected ';' or ',' to finish or continue declaration.");
+ return false;
+ }
+ t->token++; /* move past comma */
}
- t->token++;
return true;
}
@@ -38,9 +75,9 @@ static bool parse_stmt(Statement *s, Tokenizer *t) {
if (token_is_kw(t->token + 1, KW_COLON)) {
return parse_decls(&s->decls, t);
} else {
- t->token++; /* TODO: This is temporary */
+ tokr_err(t, "Unreocgnized statement.");
+ return false;
}
- return true;
}
static bool parse_file(ParsedFile *f, Tokenizer *t) {
@@ -55,8 +92,29 @@ static bool parse_file(ParsedFile *f, Tokenizer *t) {
return ret;
}
+static void decl_fprint(FILE *out, Declaration *d) {
+ fprintf(out, "l%lu:", (unsigned long)d->where.line);
+ ident_fprint(out, d->var);
+ if (d->is_const) {
+ fprintf(out, "[const]");
+ }
+ if (d->has_expr) {
+ fprintf(out, "=");
+ }
+}
+
static void stmt_fprint(FILE *out, Statement *s) {
- fprintf(out, "statement!\n");
+ switch (s->kind) {
+ case STMT_DECLS:
+ arr_foreach(s->decls, Declaration, decl) {
+ if (decl != s->decls.data) {
+ fprintf(out, ", ");
+ }
+ decl_fprint(out, decl);
+ }
+ fprintf(out, ";\n");
+ break;
+ }
}
static void parsed_file_fprint(FILE *out, ParsedFile *f) {
diff --git a/test.toc b/test.toc
index 2163792..10a9cff 100644
--- a/test.toc
+++ b/test.toc
@@ -1,3 +1,4 @@
-P :-
-Q :-
-R :- \ No newline at end of file
+P := ;
+Q := ;
+R := , foo :;
+S :, R :-; \ No newline at end of file
diff --git a/tokenizer.c b/tokenizer.c
index 7782ca3..447ffbc 100644
--- a/tokenizer.c
+++ b/tokenizer.c
@@ -11,6 +11,7 @@ typedef enum {
KW_SEMICOLON,
KW_EQ,
KW_COLON,
+ KW_COMMA,
KW_FN,
KW_LPAREN,
KW_RPAREN,
@@ -25,7 +26,7 @@ typedef enum {
/* OPTIM: Use a trie or just a function if this gets too long */
static const char *keywords[KW_COUNT] =
- {";", "=", ":", "fn", "(", ")", "{", "}", "==", "<", "<=", "-"};
+ {";", "=", ":", ",", "fn", "(", ")", "{", "}", "==", "<", "<=", "-"};
#define TOKR_USE_LLONG 1
@@ -51,11 +52,15 @@ typedef struct {
size_t len;
} StrConst;
-/* NOTE: LineNo is typedef'd in util/err.c */
typedef struct {
- TokenKind kind;
LineNo line;
char *code;
+} Location;
+
+/* NOTE: LineNo is typedef'd in util/err.c */
+typedef struct {
+ TokenKind kind;
+ Location where;
union {
Keyword kw;
Identifier ident;
@@ -79,7 +84,7 @@ static bool token_is_kw(Token *t, Keyword kw) {
}
static void token_fprint(FILE *out, Token *t) {
- fprintf(out, "l%lu-", (unsigned long)t->line);
+ fprintf(out, "l%lu-", (unsigned long)t->where.line);
switch (t->kind) {
case TOKEN_KW:
fprintf(out, "keyword: %s", keywords[t->kw]);
@@ -112,10 +117,10 @@ static void token_fprint(FILE *out, Token *t) {
}
static void tokr_add(Tokenizer *t, Token *token) {
- if (!token->line)
- token->line = t->line;
- if (!token->code)
- token->code = t->s;
+ if (!token->where.line)
+ token->where.line = t->line;
+ if (!token->where.code)
+ token->where.code = t->s;
tokens_add(&t->tokens, token);
}
@@ -166,18 +171,28 @@ static void tokenization_err(Tokenizer *t, const char *fmt, ...) {
/* to be used after tokenization */
static void tokr_err(Tokenizer *t, const char *fmt, ...) {
- LineNo line = t->token->line;
+ LineNo line = t->token->where.line;
va_list args;
va_start(args, fmt);
- err_vprint(line, t->token->code, fmt, args);
+ err_vprint(line, t->token->where.code, fmt, args);
va_end(args);
while (1) {
- if (t->token->line != line) break;
+ if (t->token->where.line != line) break;
if (t->token->kind == TOKEN_EOF) break;
t->token++;
}
}
+static void tokr_put_location(Tokenizer *tokr, Token *t) {
+ t->where.line = tokr->line;
+ t->where.code = tokr->s;
+}
+
+static void tokr_get_location(Tokenizer *tokr, Token *t) {
+ tokr->line = t->where.line;
+ tokr->s = t->where.code;
+}
+
static bool tokenize_string(Tokenizer *tokr, char *str) {
int has_err = 0;
Tokenizer t;
@@ -256,8 +271,7 @@ static bool tokenize_string(Tokenizer *tokr, char *str) {
n.kind = NUM_CONST_INT;
n.intval = 0;
Token token = {0};
- token.line = t.line;
- token.code = t.s;
+ tokr_put_location(&t, &token);
if (*t.s == '0') {
tokr_nextchar(&t);
/* octal/hexadecimal/binary (or zero) */
@@ -374,8 +388,7 @@ static bool tokenize_string(Tokenizer *tokr, char *str) {
/* it's a character constant! */
tokr_nextchar(&t);
Token token = {0};
- token.line = t.line;
- token.code = t.s;
+ tokr_put_location(&t, &token);
char c;
if (*t.s == '\\') {
/* escape sequence */
@@ -403,8 +416,7 @@ static bool tokenize_string(Tokenizer *tokr, char *str) {
if (*t.s == '"') {
/* it's a string constant! */
Token token;
- token.line = t.line;
- token.code = t.s;
+ tokr_put_location(&t, &token);
tokr_nextchar(&t);
size_t len = 0;
size_t backslashes = 0;
@@ -413,8 +425,7 @@ static bool tokenize_string(Tokenizer *tokr, char *str) {
backslashes++;
} else if (*t.s == 0) {
/* return t to opening " so that we go to the next line */
- t.line = token.line;
- t.s = token.code;
+ tokr_get_location(&t, &token);
tokenization_err(&t, "No matching \" found.");
goto err;
} else {
@@ -425,8 +436,7 @@ static bool tokenize_string(Tokenizer *tokr, char *str) {
}
char *str = malloc(len + 1);
char *strptr = str;
- t.s = token.code;
- t.line = token.line;
+ tokr_get_location(&t, &token);
tokr_nextchar(&t); /* past opening " */
while (*t.s != '"') {
assert(*t.s);
@@ -455,8 +465,7 @@ static bool tokenize_string(Tokenizer *tokr, char *str) {
if (isidentstart(*t.s)) {
/* it's an identifier */
Token token = {0};
- token.line = t.line;
- token.code = t.s;
+ tokr_put_location(&t, &token);
Identifier ident = ident_insert(&t.s);
token.kind = TOKEN_IDENT;
token.ident = ident;