summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--abbrevs.txt1
-rw-r--r--base_cgen.c23
-rwxr-xr-xbuild.sh15
-rw-r--r--cgen.c18
-rw-r--r--eval.c1
-rw-r--r--identifiers.c160
-rw-r--r--main.c5
-rw-r--r--out.c4
-rw-r--r--out.h2
-rw-r--r--parse.c15
-rw-r--r--test.toc8
-rw-r--r--tokenizer.c2
-rw-r--r--types.c10
-rw-r--r--util/blockarr.c15
-rw-r--r--util/err.c16
15 files changed, 175 insertions, 120 deletions
diff --git a/abbrevs.txt b/abbrevs.txt
index 89ba9b4..57d5361 100644
--- a/abbrevs.txt
+++ b/abbrevs.txt
@@ -6,4 +6,5 @@ stmt - statement
tokr - tokenizer
str - string
num - number
+fn - function
eof - end of file
diff --git a/base_cgen.c b/base_cgen.c
index 1e79aaa..125146b 100644
--- a/base_cgen.c
+++ b/base_cgen.c
@@ -12,6 +12,7 @@ typedef struct {
int indent_level;
bool indent_next; /* should the next thing written be indented? */
CGenWritingTo writing_to;
+ Identifier main_ident;
} CGenerator;
static FILE *cgen_writing_to(CGenerator *g) {
@@ -79,23 +80,6 @@ static void cgen_write_line_comment(CGenerator *g, const char *fmt, ...) {
cgen_write(g, " */\n");
}
-static void cgen_create(CGenerator *g, FILE *c_out, FILE *h_out, const char *h_filename) {
- g->c_out = c_out;
- g->h_out = h_out;
- g->anon_fn_count = 0;
- g->indent_level = 0;
- g->block = NULL;
- g->indent_next = true;
-
- g->writing_to = CGEN_WRITING_TO_H;
- cgen_write(g, "#include <stddef.h>\n"
- "#include <stdint.h>\n");
-
- g->writing_to = CGEN_WRITING_TO_C;
- cgen_write(g, "#include \"%s\"\n", h_filename);
- cgen_writeln(g, ""); /* extra newline between includes and code */
-}
-
/* Pass NULL for where if you don't want to check if it's declared */
static bool cgen_fn_name(CGenerator *g, FnExpr *f, Location *where);
@@ -247,10 +231,11 @@ static void cgen_type_post(CGenerator *g, Type *t) {
static bool cgen_fn_name(CGenerator *g, FnExpr *f, Location *where) {
if (f->name) {
- if (ident_eq_str(f->name, "main"))
+ if (f->name == g->main_ident) {
cgen_write(g, "main__");
- else
+ } else {
return cgen_ident(g, f->name, where);
+ }
} else {
cgen_write(g, "a___");
}
diff --git a/build.sh b/build.sh
index 2cce5bb..bf4b3d8 100755
--- a/build.sh
+++ b/build.sh
@@ -1,3 +1,16 @@
#!/bin/bash
CC=gcc
-$CC -o toc main.c -O0 -g3 -Wall -Wextra -Wpedantic -Wconversion -Wshadow -std=c11 || exit 1
+
+# Possible extra build flags
+# these are for compiling the compiler, and NOT for compiling the program itself.
+# -DNONZERO_NULL_PTRS
+# - must be set if the zero value of a pointer (as might be set by calloc/memset)
+# is not the NULL pointer.
+
+ADDITIONAL_FLAGS=
+
+WARNINGS='-Wall -Wextra -Wpedantic -Wconversion -Wshadow'
+DEBUG_FLAGS="-O0 -g3 $WARNINGS -std=c11"
+RELEASE_FLAGS="-O3 -DNDEBUG $WARNINGS -std=c11"
+
+$CC $DEBUG_FLAGS $ADDITIONAL_FLAGS -o toc main.c || exit 1
diff --git a/cgen.c b/cgen.c
index c42cbe4..9353084 100644
--- a/cgen.c
+++ b/cgen.c
@@ -1,3 +1,21 @@
+static void cgen_create(CGenerator *g, Identifiers *ids, FILE *c_out, FILE *h_out, const char *h_filename) {
+ g->c_out = c_out;
+ g->h_out = h_out;
+ g->anon_fn_count = 0;
+ g->indent_level = 0;
+ g->block = NULL;
+ g->indent_next = true;
+ g->main_ident = ident_get(ids, "main");
+
+ g->writing_to = CGEN_WRITING_TO_H;
+ cgen_write(g, "#include <stddef.h>\n"
+ "#include <stdint.h>\n");
+
+ g->writing_to = CGEN_WRITING_TO_C;
+ cgen_write(g, "#include \"%s\"\n", h_filename);
+ cgen_writeln(g, ""); /* extra newline between includes and code */
+}
+
static bool cgen_expr(CGenerator *g, Expression *e) {
switch (e->kind) {
case EXPR_INT_LITERAL:
diff --git a/eval.c b/eval.c
index 6e6195d..a131c03 100644
--- a/eval.c
+++ b/eval.c
@@ -86,6 +86,7 @@ static bool eval_expr_as_int(Expression *e, Integer *i) {
info_print(d->where, "Declaration was here.");
return false;
}
+ /* TODO: tuples */
eval_expr_as_int(&d->expr, i);
return true;
diff --git a/identifiers.c b/identifiers.c
index a960fb7..b7a2b3c 100644
--- a/identifiers.c
+++ b/identifiers.c
@@ -1,121 +1,155 @@
-/* OPTIM: This is not ideal. There should be one dynamic array of tree nodes. */
-
typedef struct {
struct Block *scope; /* NULL for file scope */
struct Declaration *decl;
} IdentDecl;
+/*
+The way you search an identifier in a tree is:
+root.children[low part of 1st char].children[high part of 1st char]
+.children[low part of 2nd char]...
+
+*/
+
+#define TREE_NCHILDREN 16
typedef struct IdentTree {
/* zero value is an empty trie */
- long id;
- int len; /* length of identifier = depth in tree */
- struct IdentTree *children;
+ uint16_t depth;
+ unsigned char index_in_parent; /* index of this in .parent.children */
struct IdentTree *parent;
+ struct IdentTree *children[TREE_NCHILDREN];
Array decls; /* array of declarations of this identifier */
unsigned long c_fn_reps; /* number of repetitions of this identifier in the C output--only used for functions */
- size_t depth;
} IdentTree;
typedef IdentTree *Identifier;
-/* MUST be zero-initialized before use */
typedef struct {
- IdentTree tree_root;
- long curr_id;
+ BlockArr trees;
+ IdentTree *root;
} Identifiers;
-static const char identifier_chars[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_";
-
-#define NIDENTIFIER_CHARS ((int)((sizeof identifier_chars) - 1)) /* -1 for null char */
+#if CHAR_MAX - CHAR_MIN > 255
+#error "Currently only systems with 8-bit characters can compile toc."
+/* TODO: maybe do a run-time error for large characters? */
+#endif
-/* returns -1 if c is not a valid identifier character, its index in identifier_chars otherwise */
-static int ident_char_index(int c) {
+/* can this character be used in an identifier? */
+static int isident(int c) {
if (c >= 'a' && c <= 'z')
- return c - 'a';
+ return 1;
if (c >= 'A' && c <= 'Z')
- return c - 'A' + 26;
+ return 1;
if (c >= '0' && c <= '9')
- return c - '0' + 52;
- if (c == '_') return 62;
- return -1;
+ return 1;
+ if (c == '_') return 1;
+#if CHAR_MIN < 0
+ if (c < 0) /* on systems where char = signed char, UTF-8 characters are probably < 0? */
+ return 1;
+#endif
+ if (c > 127) /* UTF-8 */
+ return 1;
+ return 0;
}
-/* can this character be used in an identifier? */
-static int isident(int c) {
- return ident_char_index(c) != -1; /* OPTIM: Write separate function */
+
+/* used internally to allocate identifiers */
+static Identifier ident_new(Identifiers *ids, Identifier parent, unsigned char index_in_parent) {
+ IdentTree *tree = block_arr_add(&ids->trees);
+ memset(tree, 0, sizeof *tree); /* use zero value of IdentTree */
+#if NONZERO_NULL_PTRS
+ tree->parent = NULL;
+ for (size_t i = 0; i < TREE_NCHILDREN; i++)
+ tree->children[i] = NULL;
+#endif
+ tree->parent = parent;
+ tree->index_in_parent = index_in_parent;
+ return tree;
+}
+
+/* Initialize Identifiers. */
+static void idents_create(Identifiers *ids) {
+ block_arr_create(&ids->trees, 10, sizeof(IdentTree)); /* blocks of 1 << 10 = 1024 */
+ ids->root = ident_new(ids, NULL, 0); /* create root tree */
}
+
/* moves s to the char after the identifier */
/* inserts if does not exist. reads until non-ident char is found. */
/* advances past identifier */
static Identifier ident_insert(Identifiers *ids, char **s) {
- IdentTree *t = &ids->tree_root;
+ IdentTree *tree = ids->root;
while (1) {
- char c = **s;
- if (!isident(c)) {
- if (t->id == 0) t->id = ++ids->curr_id;
- return t;
+ if (!isident(**s)) {
+ return tree;
}
+ int c = (**s) - CHAR_MIN;
+ assert(c >= 0 && c <= 255);
+ unsigned char c_low = (unsigned char)(c & 0xf);
+ unsigned char c_high = (unsigned char)(c >> 4);
+ if (!tree->children[c_low]) {
+ tree->children[c_low] = ident_new(ids, tree, c_low);
+ }
+ tree = tree->children[c_low];
- if (!t->children) {
- /* allocate children */
- t->children = err_calloc(NIDENTIFIER_CHARS, sizeof *t->children);
- for (int i = 0; i < NIDENTIFIER_CHARS; i++) {
- t->children[i].parent = t; /* child's parent = self */
- t->children[i].depth = t->depth + 1;
- }
+ if (!tree->children[c_high]) {
+ tree->children[c_high] = ident_new(ids, tree, c_high);
}
- t = &t->children[ident_char_index(c)];
+ tree = tree->children[c_high];
(*s)++;
}
}
static void fprint_ident(FILE *out, Identifier id) {
+ assert(id);
if (id->parent == NULL) return; /* at root */
- /* OPTIM: Use malloc(id->len)???? would probably use less mem for long idents, but
- it's on the heap */
- fprint_ident(out, id->parent);
- fputc(identifier_chars[id - id->parent->children /* index of self in parent */], out);
+ fprint_ident(out, id->parent->parent); /* to go up one character, we need to go to the grandparent */
+ int c_low = id->parent->index_in_parent;
+ int c_high = id->index_in_parent;
+ int c = c_low + (c_high << 4) + CHAR_MIN;
+ fputc(c, out);
}
-static bool ident_eq_str(Identifier i, const char *s) {
- size_t len = strlen(s);
- if (i->depth != len) return false;
- s += len - 1;
- while (i->parent != NULL) {
- if (identifier_chars[i - i->parent->children /* index of self in parent */] != *s)
- return false;
- i = i->parent;
- if (i->parent != NULL)
- s--;
+/* NULL = no such identifier */
+static Identifier ident_get(Identifiers *ids, const char *s) {
+ IdentTree *tree = ids->root;
+ while (*s) {
+ int c = (*s) - CHAR_MIN;
+ assert(c >= 0 && c <= 255);
+ unsigned char c_low = (unsigned char)(c & 0xf);
+ unsigned char c_high = (unsigned char)(c >> 4);
+ tree = tree->children[c_low];
+ if (!tree) return NULL;
+ tree = tree->children[c_high];
+ if (!tree) return NULL;
+ s++;
}
- return true;
+ return tree;
}
static char *ident_to_str(Identifier i) {
- char *str = malloc(i->depth + 1);
- str += i->depth;
+ size_t i_len = (size_t)(i->depth / 2); /* length = depth / 2 */
+ char *str = malloc(i_len + 1);
+ str += i_len;
*str = 0;
- while (i->parent != NULL) {
+
+ while (i) {
str--;
- *str = identifier_chars[i - i->parent->children];
- i = i->parent;
+ unsigned char c_high = i->index_in_parent;
+ unsigned char c_low = i->parent->index_in_parent;
+ char c = (char)(CHAR_MIN + (int)(c_low + (c_high << 4)));
+ *str = c;
+ i = i->parent->parent; /* go to grandparent (prev char) */
}
+
return str;
}
static IdentDecl *ident_decl(Identifier i) {
+ assert(i->decls.item_sz);
return (IdentDecl*)arr_last(&i->decls);
}
-static void idents_free_tree(IdentTree *tree) {
- if (!tree->children) return;
- for (int i = 0; i < NIDENTIFIER_CHARS; i++)
- idents_free_tree(&tree->children[i]);
- free(tree->children);
-}
-
static void idents_free(Identifiers *ids) {
- idents_free_tree(&ids->tree_root);
+ block_arr_free(&ids->trees);
}
diff --git a/main.c b/main.c
index babecc7..1721269 100644
--- a/main.c
+++ b/main.c
@@ -36,7 +36,8 @@ int main(int argc, char **argv) {
fclose(in);
err_filename = in_filename;
- Identifiers file_idents = {0};
+ Identifiers file_idents;
+ idents_create(&file_idents);
Tokenizer t;
tokr_create(&t, &file_idents);
if (!tokenize_string(&t, contents)) {
@@ -72,7 +73,7 @@ int main(int argc, char **argv) {
FILE *c_out = fopen(c_out_filename, "w");
FILE *h_out = fopen(h_out_filename, "w");
CGenerator cgen;
- cgen_create(&cgen, c_out, h_out, h_out_filename);
+ cgen_create(&cgen, &file_idents, c_out, h_out, h_out_filename);
if (!cgen_file(&cgen, &f)) {
err_fprint(TEXT_IMPORTANT("Errors occured while generating C code.\n"));
return EXIT_FAILURE;
diff --git a/out.c b/out.c
index d3c1aa0..d363960 100644
--- a/out.c
+++ b/out.c
@@ -1,10 +1,10 @@
#include "out.h"
/* toc */
-void foo(float a[10], int64_t (*out__)[3]) {
+void foo(void) {
}
void main__(void) {
- void (*asdkofhj)(int64_t (*out__)[3]) = NULL;
+ foo();
}
int main(void) {
diff --git a/out.h b/out.h
index 0e7be5f..8d1ba2b 100644
--- a/out.h
+++ b/out.h
@@ -1,4 +1,4 @@
#include <stddef.h>
#include <stdint.h>
-void foo(float a[10], int64_t (*out__)[3]);
+void foo(void);
void main__(void);
diff --git a/parse.c b/parse.c
index f12c683..e7f1016 100644
--- a/parse.c
+++ b/parse.c
@@ -561,7 +561,7 @@ static bool parse_fn_expr(Parser *p, FnExpr *f) {
Tokenizer *t = p->tokr;
/* only called when token is fn */
assert(token_is_kw(t->token, KW_FN));
- f->name = NULL;
+ f->name = 0;
t->token++;
if (!token_is_kw(t->token, KW_LPAREN)) {
tokr_err(t, "Expected '(' after 'fn'.");
@@ -947,17 +947,18 @@ static bool parse_single_type_in_decl(Parser *p, Declaration *d) {
only keep track of file scoped declarations---
block enter/exit code will handle the rest
*/
+ IdentTree *ident_info = *ident;
if (p->block == NULL) {
- if ((*ident)->decls.len) {
+ if (ident_info->decls.len) {
/* this was already declared! */
- IdentDecl *prev = (*ident)->decls.data;
+ IdentDecl *prev = ident_info->decls.data;
tokr_err(t, "Re-declaration of identifier in global scope.");
info_print(prev->decl->where, "Previous declaration was here.");
return false;
}
- assert(!(*ident)->decls.item_sz);
- arr_create(&(*ident)->decls, sizeof(IdentDecl));
- IdentDecl *ident_decl = arr_add(&(*ident)->decls);
+ assert(!ident_info->decls.item_sz); /* we shouldn't have created this array already */
+ arr_create(&ident_info->decls, sizeof(IdentDecl));
+ IdentDecl *ident_decl = arr_add(&ident_info->decls);
ident_decl->decl = d;
ident_decl->scope = NULL;
}
@@ -1179,7 +1180,7 @@ static void fprint_param(FILE *out, Param *p) {
static void fprint_stmt(FILE *out, Statement *s);
-static void fprint_block(FILE *out, Block *b) {
+static void fprint_block(FILE *out, Block *b) {
fprintf(out, "{\n");
arr_foreach(&b->stmts, Statement, stmt) {
fprint_stmt(out, stmt);
diff --git a/test.toc b/test.toc
index e468430..10e160d 100644
--- a/test.toc
+++ b/test.toc
@@ -1,7 +1,5 @@
-foo @= fn(a: [10]float) [3]int {
+foo @= fn() {
};
-
-main @ fn() = fn() {
-#C #C #C
- asdkofhj : fn() [3]int;
+main @= fn() {
+ foo();
};
diff --git a/tokenizer.c b/tokenizer.c
index abf6477..032085a 100644
--- a/tokenizer.c
+++ b/tokenizer.c
@@ -145,7 +145,7 @@ static void fprint_token(FILE *out, Token *t) {
fprintf(out, "keyword: %s", keywords[t->kw]);
break;
case TOKEN_IDENT:
- fprintf(out, "identifier: %ld:", t->ident->id);
+ fprintf(out, "identifier: %p: ", (void*)t->ident);
fprint_ident(out, t->ident);
break;
case TOKEN_NUM_LITERAL:
diff --git a/types.c b/types.c
index 0b8c052..0bfc3ff 100644
--- a/types.c
+++ b/types.c
@@ -4,7 +4,8 @@ static bool block_enter(Block *b) {
if (stmt->kind == STMT_DECL) {
Declaration *decl = &stmt->decl;
arr_foreach(&decl->idents, Identifier, ident) {
- Array *decls = &(*ident)->decls;
+ IdentTree *id_info = *ident;
+ Array *decls = &id_info->decls;
if (decls->len) {
/* check that it hasn't been declared in this block */
IdentDecl *prev = arr_last(decls);
@@ -16,7 +17,7 @@ static bool block_enter(Block *b) {
}
} else {
/* array not initialized yet */
- arr_create(&(*ident)->decls, sizeof(IdentDecl));
+ arr_create(decls, sizeof(IdentDecl));
}
IdentDecl *ident_decl = arr_add(decls);
@@ -35,7 +36,8 @@ static bool block_exit(Block *b) {
if (stmt->kind == STMT_DECL) {
Declaration *decl = &stmt->decl;
arr_foreach(&decl->idents, Identifier, ident) {
- Array *decls = &(*ident)->decls;
+ IdentTree *id_info = *ident;
+ Array *decls = &id_info->decls;
assert(decls->item_sz);
IdentDecl *last_decl = arr_last(decls);
if (last_decl->scope == b) {
@@ -249,7 +251,7 @@ static bool type_of_expr(Expression *e, Type *t) {
Type fn_type;
if (f->kind == EXPR_IDENT) {
/* allow calling a function before declaring it */
- if (!type_of_ident(e->where, e->ident, t, true)) return false;
+ if (!type_of_ident(f->where, f->ident, &fn_type, true)) return false;
} else {
if (!type_of_expr(f, &fn_type)) return false;
}
diff --git a/util/blockarr.c b/util/blockarr.c
index c85df5a..03d44b2 100644
--- a/util/blockarr.c
+++ b/util/blockarr.c
@@ -21,13 +21,13 @@ typedef struct {
Note: the block size must be a power of 2, to use right shifting instead of division
(for optimization)!
*/
-void block_arr_create(BlockArr *arr, int lg_block_sz, size_t item_sz) {
+static void block_arr_create(BlockArr *arr, int lg_block_sz, size_t item_sz) {
arr_create(&arr->blocks, sizeof(ArrBlock));
arr->item_sz = item_sz;
arr->lg_block_sz = lg_block_sz;
}
-void *block_arr_add(BlockArr *arr) {
+static void *block_arr_add(BlockArr *arr) {
ArrBlock *last_block;
last_block = arr_last(&arr->blocks);
@@ -46,12 +46,13 @@ void *block_arr_add(BlockArr *arr) {
}
}
-/* Don't need this yet. */
-/* void *block_arr_get(BlockArr *arr, size_t index) { */
-
-/* } */
+static inline void *block_arr_get(BlockArr *arr, size_t index) {
+ size_t block_index = index >> arr->lg_block_sz;
+ ArrBlock *block = (ArrBlock*)arr->blocks.data + block_index;
+ return (char*)block->data + arr->item_sz * index;
+}
-void block_arr_free(BlockArr *arr) {
+static void block_arr_free(BlockArr *arr) {
arr_foreach(&arr->blocks, ArrBlock, block) {
free(block->data);
}
diff --git a/util/err.c b/util/err.c
index e53b16f..1a05e9d 100644
--- a/util/err.c
+++ b/util/err.c
@@ -111,14 +111,14 @@ static void *err_malloc(size_t size) {
return ret;
}
-static void *err_calloc(size_t n, size_t size) {
- void *ret = calloc(n, size);
- if (!ret) {
- fprintf(stderr, "Error: Out of memory.\n");
- abort();
- }
- return ret;
-}
+/* static void *err_calloc(size_t n, size_t size) { */
+/* void *ret = calloc(n, size); */
+/* if (!ret) { */
+/* fprintf(stderr, "Error: Out of memory.\n"); */
+/* abort(); */
+/* } */
+/* return ret; */
+/* } */
static void *err_realloc(void *data, size_t new_size) {
void *ret = realloc(data, new_size);