summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLeo Tenenbaum <pommicket@gmail.com>2019-08-21 20:42:16 -0400
committerLeo Tenenbaum <pommicket@gmail.com>2019-08-21 20:42:16 -0400
commitd63a28aa4d227544b912b3edc2479de622d18a32 (patch)
tree42aac6845611365d4243dcc7f9b36214da3dcd09
parent1c214758924bbfcd0cbafb6a4240210f057de007 (diff)
started identification
-rw-r--r--blocks.c49
-rw-r--r--identifiers.c11
-rw-r--r--parse.c71
-rw-r--r--test.toc7
-rw-r--r--toc.c1
-rw-r--r--tokenizer.c15
-rw-r--r--util/arr.c5
-rw-r--r--util/err.c44
8 files changed, 165 insertions, 38 deletions
diff --git a/blocks.c b/blocks.c
new file mode 100644
index 0000000..5c06a6e
--- /dev/null
+++ b/blocks.c
@@ -0,0 +1,49 @@
+/* identifies identifiers in this block */
+static bool block_enter(Block *b) {
+ bool ret = true;
+ arr_foreach(&b->stmts, Statement, stmt) {
+ if (stmt->kind == STMT_DECL) {
+ Declaration *decl = &stmt->decl;
+ arr_foreach(&decl->idents, Identifier, ident) {
+ Array *decls = &(*ident)->decls;
+ if (decls->item_sz) {
+ /* check that it hasn't been declared in this block */
+ IdentDecl *prev = decls->last;
+ if (prev->scope == b) {
+ err_print(decl->where, "Re-declaration of identifier in the same block.");
+ info_print(prev->decl->where, "Previous declaration was here.");
+ ret = false;
+ continue;
+ }
+ } else {
+ /* array not initialized yet */
+ arr_create(&(*ident)->decls, sizeof(IdentDecl));
+ }
+ IdentDecl *ident_decl = arr_add(decls);
+ ident_decl->decl = decl;
+ ident_decl->scope = b;
+ }
+ }
+ }
+ return ret;
+}
+
+/* de-identifies identifiers in this block */
+static bool block_exit(Block *b) {
+ /* OPTIM: figure out some way of not re-iterating over everything */
+ bool ret = true;
+ arr_foreach(&b->stmts, Statement, stmt) {
+ if (stmt->kind == STMT_DECL) {
+ Declaration *decl = &stmt->decl;
+ arr_foreach(&decl->idents, Identifier, ident) {
+ Array *decls = &(*ident)->decls;
+ assert(decls->item_sz);
+ IdentDecl *last_decl = decls->last;
+ if (last_decl->scope == b)
+ arr_remove_last(decls); /* remove that declaration */
+
+ }
+ }
+ }
+ return ret;
+}
diff --git a/identifiers.c b/identifiers.c
index 572aa4e..30fd755 100644
--- a/identifiers.c
+++ b/identifiers.c
@@ -16,20 +16,27 @@ static int ident_char_index(int c) {
/* can this character be used in an identifier? */
static int isident(int c) {
- return ident_char_index(c) != -1; /* OPTIM: Write separate function */
+ /* NOTE: . is only used internally in identifiers */
+ return ident_char_index(c) != -1 && c != '.'; /* OPTIM: Write separate function */
}
/* can this character be used as the first character in an identifier? */
static int isidentstart(int c) {
- return isident(c) && c != '.';
+ return isident(c);
}
+typedef struct {
+ struct Block *scope; /* NULL for file scope */
+ struct Declaration *decl;
+} IdentDecl;
+
typedef struct IdentTree {
/* zero value is an empty trie */
long id;
int len; /* length of identifier = depth in tree */
struct IdentTree *children;
struct IdentTree *parent;
+ Array decls; /* array of declarations of this identifier */
} IdentTree;
typedef IdentTree *Identifier;
diff --git a/parse.c b/parse.c
index 9fb6dfd..8632d15 100644
--- a/parse.c
+++ b/parse.c
@@ -1,3 +1,9 @@
+/*
+ TODO:
+ all of these functions should leave the tokenizer at a "reasonable" place
+ for parsing to continue.
+*/
+
typedef enum {
TYPE_VOID,
TYPE_BUILTIN
@@ -33,7 +39,7 @@ typedef struct {
Type type;
} Param;
-typedef struct {
+typedef struct Block {
Array stmts;
} Block;
@@ -95,7 +101,9 @@ typedef struct Expression {
#define DECL_FLAG_INFER_TYPE 0x01
#define DECL_FLAG_CONST 0x02
#define DECL_FLAG_HAS_EXPR 0x04
-typedef struct {
+
+/* OPTIM: Instead of using dynamic arrays, do two passes. */
+typedef struct Declaration {
Location where;
Array idents;
Type type;
@@ -103,8 +111,6 @@ typedef struct {
uint16_t flags;
} Declaration;
-/* OPTIM: Instead of using dynamic arrays, do two passes. */
-
typedef enum {
STMT_DECL,
STMT_EXPR
@@ -126,6 +132,7 @@ typedef struct {
typedef struct {
Tokenizer *tokr;
BlockArr exprs; /* a dynamic array of expressions, so that we don't need to call malloc every time we make an expression */
+ Block *block; /* which block are we in? NULL = file scope */
} Parser;
/*
@@ -218,19 +225,23 @@ static bool stmt_parse(Statement *s, Parser *p);
static bool block_parse(Block *b, Parser *p) {
Tokenizer *t = p->tokr;
+ Block *prev_block = p->block;
+ p->block = b;
if (!token_is_kw(t->token, KW_LBRACE)) {
tokr_err(t, "Expected '{' to open block.");
return false;
}
t->token++; /* move past { */
arr_create(&b->stmts, sizeof(Statement));
-
+ bool ret = true;
if (!token_is_kw(t->token, KW_RBRACE)) {
/* non-empty function body */
while (1) {
Statement *stmt = arr_add(&b->stmts);
- if (!stmt_parse(stmt, p))
- return false;
+ if (!stmt_parse(stmt, p)) {
+ ret = false;
+ continue;
+ }
if (token_is_kw(t->token, KW_RBRACE)) break;
if (t->token->kind == TOKEN_EOF) {
tokr_err(t, "Expected '}' to close function body.");
@@ -240,7 +251,8 @@ static bool block_parse(Block *b, Parser *p) {
}
t->token++; /* move past } */
- return true;
+ p->block = prev_block;
+ return ret;
}
static bool fn_expr_parse(FnExpr *f, Parser *p) {
@@ -652,6 +664,7 @@ static bool expr_parse(Expression *e, Parser *p, Token *end) {
static bool decl_parse(Declaration *d, Parser *p) {
Tokenizer *t = p->tokr;
/* OPTIM: Maybe don't use a dynamic array or use parser allocator. */
+ d->where = t->token->where;
arr_create(&d->idents, sizeof(Identifier));
while (1) {
@@ -661,6 +674,24 @@ static bool decl_parse(Declaration *d, Parser *p) {
return false;
}
*ident = t->token->ident;
+ /*
+ only keep track of file scoped declarations---
+ blocks.c will handle the rest
+ */
+ if (p->block == NULL) {
+ if ((*ident)->decls.len) {
+ /* this was already declared! */
+ IdentDecl *prev = (*ident)->decls.data;
+ tokr_err(t, "Re-declaration of identifier in global scope.");
+ info_print(prev->decl->where, "Previous declaration was here.");
+ return false;
+ }
+ assert(!(*ident)->decls.item_sz);
+ arr_create(&(*ident)->decls, sizeof(IdentDecl));
+ IdentDecl *ident_decl = arr_add(&(*ident)->decls);
+ ident_decl->decl = d;
+ ident_decl->scope = NULL;
+ }
t->token++;
if (token_is_kw(t->token, KW_COMMA)) {
t->token++;
@@ -670,7 +701,8 @@ static bool decl_parse(Declaration *d, Parser *p) {
t->token++;
break;
}
- tokr_err(t, "Expected ',' to continue listing variables or ':' to indicate type.");
+ tokr_err(t, "Expected ',' to continue listing variables or ':' to indicate type.");
+ return false;
}
d->flags = 0;
@@ -724,13 +756,29 @@ static bool stmt_parse(Statement *s, Parser *p) {
*/
if (token_is_kw(t->token + 1, KW_COLON) || token_is_kw(t->token + 1, KW_COMMA)) {
s->kind = STMT_DECL;
- return decl_parse(&s->decl, p);
+ if (!decl_parse(&s->decl, p)) {
+ /* move to next statement */
+ /* TODO: This might cause unhelpful errors if the first semicolon is inside a block, etc. */
+ while (!token_is_kw(t->token, KW_SEMICOLON)) {
+ if (t->token->kind == TOKEN_EOF) {
+ /* don't bother continuing */
+ return false;
+ }
+ t->token++;
+ }
+ return false;
+ }
+ return true;
} else {
s->kind = STMT_EXPR;
- if (!expr_parse(&s->expr, p, expr_find_end(p, EXPR_END_SEMICOLON)))
+ Token *end = expr_find_end(p, EXPR_END_SEMICOLON);
+ if (!expr_parse(&s->expr, p, end)) {
+ t->token = end;
return false;
+ }
if (!token_is_kw(t->token, KW_SEMICOLON)) {
tokr_err(t, "Expected ';' at end of statement.");
+ t->token = end;
return false;
}
t->token++; /* move past ; */
@@ -740,6 +788,7 @@ static bool stmt_parse(Statement *s, Parser *p) {
static void parser_from_tokenizer(Parser *p, Tokenizer *t) {
p->tokr = t;
+ p->block = NULL;
block_arr_create(&p->exprs, 10, sizeof(Expression)); /* block size = 1024 */
}
diff --git a/test.toc b/test.toc
index 1171218..77b8b44 100644
--- a/test.toc
+++ b/test.toc
@@ -1,3 +1,4 @@
-main :- (fn(){
- f(1,2+3---4-3-34-+a+-234-234);
-})(1,2,3,4,5--23423-2342);
+main :- fn(){
+ printf("Hello!\n");
+};
+main := 7; \ No newline at end of file
diff --git a/toc.c b/toc.c
index ee05b2a..94e6224 100644
--- a/toc.c
+++ b/toc.c
@@ -15,3 +15,4 @@
#include "identifiers.c"
#include "tokenizer.c"
#include "parse.c"
+#include "blocks.c"
diff --git a/tokenizer.c b/tokenizer.c
index 2f022e0..7f6dcf6 100644
--- a/tokenizer.c
+++ b/tokenizer.c
@@ -76,12 +76,7 @@ typedef struct {
size_t len;
} StrLiteral;
-typedef struct {
- LineNo line;
- char *code;
-} Location;
-
-/* NOTE: LineNo is typedef'd in util/err.c */
+/* NOTE: Location is typedef'd in util/err.c */
typedef struct {
TokenKind kind;
Location where;
@@ -178,10 +173,10 @@ static char tokr_esc_seq(Tokenizer *t) {
/* to be used during tokenization */
static void tokenization_err(Tokenizer *t, const char *fmt, ...) {
va_list args;
+ Location where = {t->line, t->s};
va_start(args, fmt);
- err_vprint(t->line, t->s, fmt, args);
+ err_vprint(where, fmt, args);
va_end(args);
-
char *end_of_line = strchr(t->s, '\n');
if (end_of_line) {
t->s = end_of_line;
@@ -194,12 +189,12 @@ static void tokenization_err(Tokenizer *t, const char *fmt, ...) {
/* to be used after tokenization */
static void tokr_err_(const char *src_file, int src_line, Tokenizer *t, const char *fmt, ...) {
- LineNo line = t->token->where.line;
err_fprint("At line %d of %s:\n", src_line, src_file); /* RELEASE: Remove this */
va_list args;
va_start(args, fmt);
- err_vprint(line, t->token->where.code, fmt, args);
+ err_vprint(t->token->where, fmt, args);
va_end(args);
+ LineNo line = t->token->where.line;
while (1) {
if (t->token->where.line != line) break;
if (t->token->kind == TOKEN_EOF) break;
diff --git a/util/arr.c b/util/arr.c
index aeeeb92..6cbf9eb 100644
--- a/util/arr.c
+++ b/util/arr.c
@@ -29,6 +29,11 @@ void *arr_add(Array *arr) {
return item;
}
+void arr_remove_last(Array *arr) {
+ arr->len--;
+ /* OPTIM (memory): Shorten array. */
+}
+
void arr_free(Array *arr) {
free(arr->data);
}
diff --git a/util/err.c b/util/err.c
index 5385cf1..6eaa191 100644
--- a/util/err.c
+++ b/util/err.c
@@ -2,14 +2,21 @@
#if USE_COLORED_TEXT
#define TEXT_ERROR(x) "\x1b[91m" x "\x1b[0m"
+#define TEXT_INFO(x) "\x1b[94m" x "\x1b[0m"
#define TEXT_IMPORTANT(x) "\x1b[1m" x "\x1b[0m"
#else
#define TEXT_ERROR(x) x
+#define TEXT_INFO(x) x
#define TEXT_IMPORTANT(x) x
#endif
typedef uint32_t LineNo;
+typedef struct {
+ LineNo line;
+ char *code;
+} Location;
+
/* file name of file being processed */
static const char *err_filename;
@@ -33,6 +40,10 @@ static void err_print_header_(LineNo line) {
err_fprint(TEXT_ERROR("error:") " at line %lu of %s:\n", (unsigned long)line, err_filename);
}
+static void info_print_header_(LineNo line) {
+ err_fprint(TEXT_INFO("info:") " at line %lu of %s:\n", (unsigned long)line, err_filename);
+}
+
static void err_print_footer_(const char *context) {
err_fprint("\n\there --> ");
const char *end = strchr(context, '\n');
@@ -48,21 +59,30 @@ static void err_print_footer_(const char *context) {
/* Write nicely-formatted errors to the error file */
-/* static void err_print(LineNo line, const char *context, const char *fmt, ...) { */
-/* err_print_header_(line); */
-/* va_list args; */
-/* va_start(args, fmt); */
-/* err_vfprint(fmt, args); */
-/* va_end(args); */
-/* err_print_footer_(context); */
-/* } */
-
-static void err_vprint(LineNo line, const char *context, const char *fmt, va_list args) {
- err_print_header_(line);
+
+static void err_vprint(Location where, const char *fmt, va_list args) {
+ err_print_header_(where.line);
+ err_vfprint(fmt, args);
+ err_print_footer_(where.code);
+}
+
+static void err_print(Location where, const char *fmt, ...) {
+ va_list args;
+ va_start(args, fmt);
+ err_vprint(where, fmt, args);
+ va_end(args);
+}
+
+static void info_print(Location where, const char *fmt, ...) {
+ va_list args;
+ va_start(args, fmt);
+ info_print_header_(where.line);
err_vfprint(fmt, args);
- err_print_footer_(context);
+ err_print_footer_(where.code);
+ va_end(args);
}
+
static void *err_malloc(size_t size) {
void *ret = malloc(size);
if (!ret) {