From 54097d6dbd03ee483034dee39f669afd9333aeb1 Mon Sep 17 00:00:00 2001 From: Leo Tenenbaum Date: Fri, 16 Aug 2019 15:01:24 -0400 Subject: Added identifierS --- identifiers.c | 75 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ main.c | 4 +++- test.toc | 8 ++++++- tokenizer.c | 29 ++++++++++++++++++----- util/err.c | 28 ++++++++++++++++++++++ 5 files changed, 136 insertions(+), 8 deletions(-) create mode 100644 identifiers.c diff --git a/identifiers.c b/identifiers.c new file mode 100644 index 0000000..6335ed8 --- /dev/null +++ b/identifiers.c @@ -0,0 +1,75 @@ +static char identifier_chars[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_."; +#define NIDENTIFIER_CHARS ((int)((sizeof identifier_chars) - 1)) /* -1 for null char */ + +/* returns -1 if c is not a valid identifier character, its index in identifier_chars otherwise */ +static int ident_char_index(int c) { + if (c >= 'a' && c <= 'z') + return c - 'a'; + if (c >= 'A' && c <= 'Z') + return c - 'A' + 26; + if (c >= '0' && c <= '9') + return c - '0' + 52; + if (c == '_') return 62; + if (c == '.') return 63; + return -1; +} + +/* can this character be used in an identifier? */ +static int isident(int c) { + return ident_char_index(c) != -1; /* OPTIM: Write separate function */ +} + +typedef struct IdentTree { + /* zero value is an empty trie */ + long id; + int len; /* length of identifier = depth in tree */ + struct IdentTree *children; + struct IdentTree *parent; +} IdentTree; + +typedef IdentTree *Identifier; + +static IdentTree ident_base_tree; +static long ident_curr_id; /* NOTE: you should eventually add something to reset this */ + +static Identifier ident_tree_finsert(IdentTree *t, FILE *fp) { + while (1) { + int c = fgetc(fp); + if (!isident(c)) { + if (t->id == 0) t->id = ++ident_curr_id; + return t; + } + if (!t->children) { + /* allocate children */ + t->children = err_calloc(NIDENTIFIER_CHARS, sizeof *t->children); + for (int i = 0; i < NIDENTIFIER_CHARS; i++) + t->children[i].parent = t; /* child's parent = self */ + } + t = &t->children[ident_char_index(c)]; + } +} + +/* inserts if does not exist. reads until non-ident char is found. */ +/* advances past identifier */ +static Identifier ident_finsert(FILE *fp) { + return ident_tree_finsert(&ident_base_tree, fp); +} + + +static void ident_fprint(FILE *out, Identifier id) { + if (id->parent == NULL) return; /* at root */ + /* OPTIM: Use malloc(id->len)???? */ + ident_fprint(out, id->parent); + fputc(identifier_chars[id - id->parent->children /* index of self in parent */], out); +} + +static void idents_free_tree(IdentTree *tree) { + if (!tree->children) return; + for (int i = 0; i < NIDENTIFIER_CHARS; i++) + idents_free_tree(&tree->children[i]); + free(tree->children); +} + +static void idents_free(void) { + idents_free_tree(&ident_base_tree); +} diff --git a/main.c b/main.c index f4c6280..900b723 100644 --- a/main.c +++ b/main.c @@ -7,6 +7,7 @@ #include #include "util/err.c" #include "util/files.c" +#include "identifiers.c" #include "tokenizer.c" int main(int argc, char **argv) { @@ -31,6 +32,7 @@ int main(int argc, char **argv) { printf("\n"); tokenizer_free(&t); - + fclose(in); + idents_free(); } diff --git a/test.toc b/test.toc index 54c9345..a0b4c4e 100644 --- a/test.toc +++ b/test.toc @@ -1,3 +1,9 @@ == < -<<<<< \ No newline at end of file +<kw]); break; + case TOKEN_IDENT: + fprintf(out, "identifier: %ld:", t->ident->id); + ident_fprint(out, t->ident); + break; case TOKEN_EOF: fprintf(out, "eof"); break; @@ -87,13 +93,24 @@ static Tokenizer tokenize_file(FILE *fp) { } } if (kw != KW_COUNT) { - Token kw_token; - kw_token.kind = TOKEN_KW; - kw_token.kw = kw; - tokenizer_add(&t, &kw_token, line, col); + /* it's a keyword */ + Token token; + token.kind = TOKEN_KW; + token.kw = kw; + tokenizer_add(&t, &token, line, col); col += (LineNo)strlen(keywords[kw]); continue; } + + if (isident(c)) { + /* it's an identifier */ + Identifier ident = ident_finsert(fp); + Token token; + token.kind = TOKEN_IDENT; + token.ident = ident; + tokenizer_add(&t, &token, line, col); + continue; + } fgets(errbuf, sizeof errbuf, fp); size_t len = strlen(errbuf); diff --git a/util/err.c b/util/err.c index 62886c4..7a38017 100644 --- a/util/err.c +++ b/util/err.c @@ -8,3 +8,31 @@ static void err_print(LineNo line, LineNo col, const char *fmt, ...) { vfprintf(stderr, fmt, args); va_end(args); } + +static void *err_malloc(size_t size) { + void *ret = malloc(size); + if (!ret) { + fprintf(stderr, "Error: Out of memory.\n"); + abort(); + } + return ret; +} + +static void *err_calloc(size_t n, size_t size) { + void *ret = calloc(n, size); + if (!ret) { + fprintf(stderr, "Error: Out of memory.\n"); + abort(); + } + return ret; +} + +static void *err_realloc(void *data, size_t new_size) { + void *ret = realloc(data, new_size); + if (!ret) { + fprintf(stderr, "Error: Out of memory.\n"); + abort(); + } + return ret; +} + -- cgit v1.2.3