diff options
-rw-r--r-- | identifiers.c | 75 | ||||
-rw-r--r-- | main.c | 4 | ||||
-rw-r--r-- | test.toc | 8 | ||||
-rw-r--r-- | tokenizer.c | 29 | ||||
-rw-r--r-- | util/err.c | 28 |
5 files changed, 136 insertions, 8 deletions
diff --git a/identifiers.c b/identifiers.c new file mode 100644 index 0000000..6335ed8 --- /dev/null +++ b/identifiers.c @@ -0,0 +1,75 @@ +static char identifier_chars[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_."; +#define NIDENTIFIER_CHARS ((int)((sizeof identifier_chars) - 1)) /* -1 for null char */ + +/* returns -1 if c is not a valid identifier character, its index in identifier_chars otherwise */ +static int ident_char_index(int c) { + if (c >= 'a' && c <= 'z') + return c - 'a'; + if (c >= 'A' && c <= 'Z') + return c - 'A' + 26; + if (c >= '0' && c <= '9') + return c - '0' + 52; + if (c == '_') return 62; + if (c == '.') return 63; + return -1; +} + +/* can this character be used in an identifier? */ +static int isident(int c) { + return ident_char_index(c) != -1; /* OPTIM: Write separate function */ +} + +typedef struct IdentTree { + /* zero value is an empty trie */ + long id; + int len; /* length of identifier = depth in tree */ + struct IdentTree *children; + struct IdentTree *parent; +} IdentTree; + +typedef IdentTree *Identifier; + +static IdentTree ident_base_tree; +static long ident_curr_id; /* NOTE: you should eventually add something to reset this */ + +static Identifier ident_tree_finsert(IdentTree *t, FILE *fp) { + while (1) { + int c = fgetc(fp); + if (!isident(c)) { + if (t->id == 0) t->id = ++ident_curr_id; + return t; + } + if (!t->children) { + /* allocate children */ + t->children = err_calloc(NIDENTIFIER_CHARS, sizeof *t->children); + for (int i = 0; i < NIDENTIFIER_CHARS; i++) + t->children[i].parent = t; /* child's parent = self */ + } + t = &t->children[ident_char_index(c)]; + } +} + +/* inserts if does not exist. reads until non-ident char is found. */ +/* advances past identifier */ +static Identifier ident_finsert(FILE *fp) { + return ident_tree_finsert(&ident_base_tree, fp); +} + + +static void ident_fprint(FILE *out, Identifier id) { + if (id->parent == NULL) return; /* at root */ + /* OPTIM: Use malloc(id->len)???? */ + ident_fprint(out, id->parent); + fputc(identifier_chars[id - id->parent->children /* index of self in parent */], out); +} + +static void idents_free_tree(IdentTree *tree) { + if (!tree->children) return; + for (int i = 0; i < NIDENTIFIER_CHARS; i++) + idents_free_tree(&tree->children[i]); + free(tree->children); +} + +static void idents_free(void) { + idents_free_tree(&ident_base_tree); +} @@ -7,6 +7,7 @@ #include <ctype.h> #include "util/err.c" #include "util/files.c" +#include "identifiers.c" #include "tokenizer.c" int main(int argc, char **argv) { @@ -31,6 +32,7 @@ int main(int argc, char **argv) { printf("\n"); tokenizer_free(&t); - + fclose(in); + idents_free(); } @@ -1,3 +1,9 @@ == < -<<<<<
\ No newline at end of file +<<foo<<< +bar +foo +bar +baz +bar +foo
\ No newline at end of file diff --git a/tokenizer.c b/tokenizer.c index 00e9979..768693d 100644 --- a/tokenizer.c +++ b/tokenizer.c @@ -1,5 +1,6 @@ typedef enum { TOKEN_KW, + TOKEN_IDENT, TOKEN_EOF } TokenKind; @@ -12,9 +13,9 @@ typedef enum { KW_COUNT } Keyword; +/* OPTIM: Use a trie or just a function if this gets too long */ static const char *keywords[KW_COUNT] = - {";", "==", "<", "<=", "="}; - + {";", "==", "<", "<=", "="}; /* NOTE: LineNo is typedef'd in util/err.c */ typedef struct { @@ -23,6 +24,7 @@ typedef struct { LineNo col; union { Keyword kw; + Identifier ident; }; } Token; @@ -39,6 +41,10 @@ static void token_fprint(FILE *out, Token *t) { case TOKEN_KW: fprintf(out, "keyword: %s", keywords[t->kw]); break; + case TOKEN_IDENT: + fprintf(out, "identifier: %ld:", t->ident->id); + ident_fprint(out, t->ident); + break; case TOKEN_EOF: fprintf(out, "eof"); break; @@ -87,13 +93,24 @@ static Tokenizer tokenize_file(FILE *fp) { } } if (kw != KW_COUNT) { - Token kw_token; - kw_token.kind = TOKEN_KW; - kw_token.kw = kw; - tokenizer_add(&t, &kw_token, line, col); + /* it's a keyword */ + Token token; + token.kind = TOKEN_KW; + token.kw = kw; + tokenizer_add(&t, &token, line, col); col += (LineNo)strlen(keywords[kw]); continue; } + + if (isident(c)) { + /* it's an identifier */ + Identifier ident = ident_finsert(fp); + Token token; + token.kind = TOKEN_IDENT; + token.ident = ident; + tokenizer_add(&t, &token, line, col); + continue; + } fgets(errbuf, sizeof errbuf, fp); size_t len = strlen(errbuf); @@ -8,3 +8,31 @@ static void err_print(LineNo line, LineNo col, const char *fmt, ...) { vfprintf(stderr, fmt, args); va_end(args); } + +static void *err_malloc(size_t size) { + void *ret = malloc(size); + if (!ret) { + fprintf(stderr, "Error: Out of memory.\n"); + abort(); + } + return ret; +} + +static void *err_calloc(size_t n, size_t size) { + void *ret = calloc(n, size); + if (!ret) { + fprintf(stderr, "Error: Out of memory.\n"); + abort(); + } + return ret; +} + +static void *err_realloc(void *data, size_t new_size) { + void *ret = realloc(data, new_size); + if (!ret) { + fprintf(stderr, "Error: Out of memory.\n"); + abort(); + } + return ret; +} + |