4 files changed, 124 insertions, 115 deletions
diff --git a/identifiers.c b/identifiers.c
index f491a06..a960fb7 100644
--- a/identifiers.c
+++ b/identifiers.c
@@ -1,9 +1,10 @@
+/* OPTIM: This is not ideal. There should be one dynamic array of tree nodes. */
+
 typedef struct {
 	struct Block *scope; /* NULL for file scope */
 	struct Declaration *decl;
 } IdentDecl;
 
-/* OPTIM: This is not ideal. There should be one dynamic array of tree nodes. */
 typedef struct IdentTree {
 	/* zero value is an empty trie */
 	long id;
@@ -17,9 +18,13 @@ typedef struct IdentTree {
 
 typedef IdentTree *Identifier;
 
-static IdentTree ident_base_tree;
-static long ident_curr_id; /* NOTE: you should eventually add something to reset this */
-static char identifier_chars[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_";
+/* MUST be zero-initialized before use */
+typedef struct {
+	IdentTree tree_root;
+	long curr_id;
+} Identifiers;
+
+static const char identifier_chars[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_";
 
 #define NIDENTIFIER_CHARS ((int)((sizeof identifier_chars) - 1)) /* -1 for null char */
 
@@ -41,11 +46,14 @@ static int isident(int c) {
 }
 
 /* moves s to the char after the identifier */
-static Identifier ident_tree_insert(IdentTree *t, char **s) {
+/* inserts if does not exist. reads until non-ident char is found. */
+/* advances past identifier */
+static Identifier ident_insert(Identifiers *ids, char **s) {
+	IdentTree *t = &ids->tree_root;
 	while (1) {
 		char c = **s;
 		if (!isident(c)) {
-			if (t->id == 0) t->id = ++ident_curr_id;
+			if (t->id == 0) t->id = ++ids->curr_id;
 			return t;
 		}
 		
@@ -62,16 +70,11 @@ static Identifier ident_tree_insert(IdentTree *t, char **s) {
 	}
 }
 
-/* inserts if does not exist. reads until non-ident char is found. */
-/* advances past identifier */
-static Identifier ident_insert(char **s) {
-	return ident_tree_insert(&ident_base_tree, s);
-}
-
 
 static void fprint_ident(FILE *out, Identifier id) {
 	if (id->parent == NULL) return; /* at root */
-	/* OPTIM: Use malloc(id->len)???? */
+	/* OPTIM: Use malloc(id->len)???? would probably use less mem for long idents, but
+	   it's on the heap */
 	fprint_ident(out, id->parent);
 	fputc(identifier_chars[id - id->parent->children /* index of self in parent */], out);
 }
@@ -113,6 +116,6 @@ static void idents_free_tree(IdentTree *tree) {
 	free(tree->children);
 }
 
-static void idents_free(void) {
-	idents_free_tree(&ident_base_tree);
+static void idents_free(Identifiers *ids) {
+	idents_free_tree(&ids->tree_root);
 }
diff --git a/main.c b/main.c
index f22b3c3..b4caeeb 100644
--- a/main.c
+++ b/main.c
@@ -36,7 +36,9 @@ int main(int argc, char **argv) {
 	fclose(in);
 
 	err_filename = in_filename;
+	Identifiers file_idents = {0};
 	Tokenizer t;
+	tokr_create(&t, &file_idents);
 	if (!tokenize_string(&t, contents)) {
 		err_fprint(TEXT_IMPORTANT("Errors occured while preprocessing.\n"));
 		return EXIT_FAILURE;
@@ -80,5 +82,5 @@ int main(int argc, char **argv) {
 	
 	fclose(c_out);
 	fclose(h_out);
-	idents_free();
+	idents_free(&file_idents);
 }
diff --git a/tokenizer.c b/tokenizer.c
index a8932e2..73ae7d0 100644
--- a/tokenizer.c
+++ b/tokenizer.c
@@ -101,6 +101,7 @@ typedef struct {
 	char *s; /* string being parsed */
 	LineNo line;
 	Token *token; /* token currently being processed */
+	Identifiers *idents;
 } Tokenizer;
 
 
@@ -214,49 +215,52 @@ static void tokr_get_location(Tokenizer *tokr, Token *t) {
 	tokr->s = t->where.code;
 }
 
-static bool tokenize_string(Tokenizer *tokr, char *str) {
+static void tokr_create(Tokenizer *t, Identifiers *idents) {
+	arr_create(&t->tokens, sizeof(Token));
+	arr_reserve(&t->tokens, 256);
+	t->idents = idents;
+}
+
+static bool tokenize_string(Tokenizer *t, char *str) {
 	int has_err = 0;
-	Tokenizer t;
-	arr_create(&t.tokens, sizeof(Token));
-	arr_reserve(&t.tokens, 256);
-	t.s = str;
-	t.line = 1;
+	t->s = str;
+	t->line = 1;
 	
 	while (1) {
-	    if (*t.s == 0) break;
-		if (isspace(*t.s)) {
-			tokr_nextchar(&t);
+	    if (*t->s == 0) break;
+		if (isspace(*t->s)) {
+			tokr_nextchar(t);
 	    	continue;
 		}
 
-		if (*t.s == '/') {
+		if (*t->s == '/') {
 			/* maybe it's a comment */
 			int is_comment = 1;
-			switch (t.s[1]) {
+			switch (t->s[1]) {
 			case '/': /* single line comment */
-				tokr_nextchar(&t);
-				for (t.s++; *t.s != '\n' && *t.s; t.s++);
-				t.line++;
+				tokr_nextchar(t);
+				for (t->s++; *t->s != '\n' && *t->s; t->s++);
+				t->line++;
 				break;
 			case '*': { /* multi line comment */
-				tokr_nextchar(&t);
+				tokr_nextchar(t);
 				int comment_level = 1; /* allow nested multi-line comments */
-			    while (*t.s) {
-					if (t.s[0] == '*' && t.s[1] == '/') {
-						t.s += 2;
+			    while (*t->s) {
+					if (t->s[0] == '*' && t->s[1] == '/') {
+						t->s += 2;
 						comment_level--;
 						if (comment_level == 0) {
 							break;
 						}
-					} else if (t.s[0] == '/' && t.s[1] == '*') {
-						t.s += 2;
+					} else if (t->s[0] == '/' && t->s[1] == '*') {
+						t->s += 2;
 						comment_level++;
 					} else {
-						tokr_nextchar(&t);
+						tokr_nextchar(t);
 					}
 				}
-				if (*t.s == 0) {
-					tokenization_err(&t, "End of file reached inside multi-line comment.");
+				if (*t->s == 0) {
+					tokenization_err(t, "End of file reached inside multi-line comment.");
 					abort(); /* there won't be any further errors, of course */
 				}
 			} break;
@@ -267,12 +271,12 @@ static bool tokenize_string(Tokenizer *tokr, char *str) {
 			if (is_comment) continue;
 		}
 		{
-			char *start_s = t.s;
-			Keyword kw = tokenize_kw(&t.s);
+			char *start_s = t->s;
+			Keyword kw = tokenize_kw(&t->s);
 			if (kw != KW_COUNT) {
 				/* it's a keyword */
-				Token *token = tokr_add(&t);
-				token->where.line = t.line;
+				Token *token = tokr_add(t);
+				token->where.line = t->line;
 				token->where.code = start_s;
 				token->kind = TOKEN_KW;
 				token->kw = kw;
@@ -282,30 +286,30 @@ static bool tokenize_string(Tokenizer *tokr, char *str) {
 		
 		/* check if it's a number */
 
-		if (isdigit(*t.s)) {
+		if (isdigit(*t->s)) {
 			/* it's a numeric literal */
 			int base = 10;
 			Floating decimal_pow10;
 			NumLiteral n;
 			n.kind = NUM_LITERAL_INT;
 			n.intval = 0;
-			Token *token = tokr_add(&t);
-			tokr_put_location(&t, token);
-			if (*t.s == '0') {
-				tokr_nextchar(&t);
+			Token *token = tokr_add(t);
+			tokr_put_location(t, token);
+			if (*t->s == '0') {
+				tokr_nextchar(t);
 				/* octal/hexadecimal/binary (or zero) */
-				char format = *t.s;
+				char format = *t->s;
 				if (isdigit(format)) /* octal */
 					base = 8;
 				else {
 					switch (format) {
 					case 'b':
 						base = 2;
-						tokr_nextchar(&t);
+						tokr_nextchar(t);
 						break;
 					case 'x':
 						base = 16;
-						tokr_nextchar(&t);
+						tokr_nextchar(t);
 						break;
 					default:
 						/* it's 0/0.something etc.  */
@@ -315,39 +319,39 @@ static bool tokenize_string(Tokenizer *tokr, char *str) {
 			}
 
 			while (1) {
-				if (*t.s == '.') {
+				if (*t->s == '.') {
 					if (n.kind == NUM_LITERAL_FLOAT) {
-						tokenization_err(&t, "Double . in number.");
+						tokenization_err(t, "Double . in number.");
 						goto err;
 					}
 					if (base != 10) {
-						tokenization_err(&t, "Decimal point in non base 10 number.");
+						tokenization_err(t, "Decimal point in non base 10 number.");
 						goto err;
 					}
 				    n.kind = NUM_LITERAL_FLOAT;
 					decimal_pow10 = 0.1;
 					n.floatval = (Floating)n.intval;
-					tokr_nextchar(&t);
+					tokr_nextchar(t);
 					continue;
-				} else if (*t.s == 'e') {
-					tokr_nextchar(&t);
+				} else if (*t->s == 'e') {
+					tokr_nextchar(t);
 					if (n.kind == NUM_LITERAL_INT) {
 						n.kind = NUM_LITERAL_FLOAT;
 						n.floatval = (Floating)n.intval;
 					}
 					/* TODO: check if exceeding maximum exponent */
 					int exponent = 0;
-					if (*t.s == '+')
-						tokr_nextchar(&t); /* ignore + after e */
+					if (*t->s == '+')
+						tokr_nextchar(t); /* ignore + after e */
 					
 					int negative_exponent = 0;
-					if (*t.s == '-') {
-						tokr_nextchar(&t);
+					if (*t->s == '-') {
+						tokr_nextchar(t);
 						negative_exponent = 1;
 					}
-					for (; isdigit(*t.s); tokr_nextchar(&t)) {
+					for (; isdigit(*t->s); tokr_nextchar(t)) {
 						exponent *= 10;
-						exponent += *t.s - '0';
+						exponent += *t->s - '0';
 					}
 					/* OPTIM: Slow for very large exponents (unlikely to happen) */
 					for (int i = 0; i < exponent; i++) {
@@ -361,19 +365,19 @@ static bool tokenize_string(Tokenizer *tokr, char *str) {
 				}
 				int digit = -1;
 				if (base == 16) {
-					if (*t.s >= 'a' && *t.s <= 'f')
-						digit = 10 + *t.s - 'a';
-					else if (*t.s >= 'A' && *t.s <= 'F')
-						digit = *t.s - 'A';
+					if (*t->s >= 'a' && *t->s <= 'f')
+						digit = 10 + *t->s - 'a';
+					else if (*t->s >= 'A' && *t->s <= 'F')
+						digit = *t->s - 'A';
 				}
 				if (digit == -1) {
-					if (*t.s >= '0' && *t.s <= '9')
-						digit = *t.s - '0';
+					if (*t->s >= '0' && *t->s <= '9')
+						digit = *t->s - '0';
 				}
 				if (digit < 0 || digit >= base) {
-					if (isdigit(*t.s)) {
+					if (isdigit(*t->s)) {
 						/* something like 0b011012 */
-						tokenization_err(&t, "Digit %d cannot appear in a base %d number.", digit, base);
+						tokenization_err(t, "Digit %d cannot appear in a base %d number.", digit, base);
 						goto err;
 					}
 					/* end of numeric literal */
@@ -384,7 +388,7 @@ static bool tokenize_string(Tokenizer *tokr, char *str) {
 					if (n.intval > ULLONG_MAX / (UInteger)base ||
 						n.intval * (UInteger)base > ULLONG_MAX - (UInteger)digit) {
 						/* too big! */
-						tokenization_err(&t, "Number too big to fit in a numeric literal.");
+						tokenization_err(t, "Number too big to fit in a numeric literal.");
 						goto err;
 					}
 					n.intval *= (UInteger)base;
@@ -395,107 +399,106 @@ static bool tokenize_string(Tokenizer *tokr, char *str) {
 					decimal_pow10 /= 10;
 					break;
 				}
-				tokr_nextchar(&t);
+				tokr_nextchar(t);
 			}
 			token->kind = TOKEN_NUM_LITERAL;
 			token->num = n;
 			continue;
 		}
 
-		if (*t.s == '\'') {
+		if (*t->s == '\'') {
 			/* it's a character literal! */
-			tokr_nextchar(&t);
-			Token *token = tokr_add(&t);
-			tokr_put_location(&t, token);
+			tokr_nextchar(t);
+			Token *token = tokr_add(t);
+			tokr_put_location(t, token);
 			char c;
-			if (*t.s == '\\') {
+			if (*t->s == '\\') {
 				/* escape sequence */
-				tokr_nextchar(&t);
-				c = tokr_esc_seq(&t);
+				tokr_nextchar(t);
+				c = tokr_esc_seq(t);
 				if (c == 0) {
-					tokenization_err(&t, "Unrecognized escape character: '\\%c'.", *t.s);
+					tokenization_err(t, "Unrecognized escape character: '\\%c'.", *t->s);
 					goto err;
 				}
 			} else {
-				c = *t.s;
-				tokr_nextchar(&t);
+				c = *t->s;
+				tokr_nextchar(t);
 			}
-			if (*t.s != '\'') {
-				tokenization_err(&t, "End of character literal expected.");
+			if (*t->s != '\'') {
+				tokenization_err(t, "End of character literal expected.");
 				goto err;
 			}
-			tokr_nextchar(&t);
+			tokr_nextchar(t);
 			token->kind = TOKEN_CHAR_LITERAL;
 			token->chr = c;
 			continue;
 		}
 
-		if (*t.s == '"') {
+		if (*t->s == '"') {
 			/* it's a string literal! */
-			Token *token = tokr_add(&t);
-			tokr_put_location(&t, token);
-			tokr_nextchar(&t);
+			Token *token = tokr_add(t);
+			tokr_put_location(t, token);
+			tokr_nextchar(t);
 			size_t len = 0;
 			size_t backslashes = 0;
-			while (*t.s != '"' || backslashes % 2 == 1) {
-				if (*t.s == '\\') {
+			while (*t->s != '"' || backslashes % 2 == 1) {
+				if (*t->s == '\\') {
 					backslashes++;
-				} else if (*t.s == 0) {
+				} else if (*t->s == 0) {
 					/* return t to opening " so that we go to the next line */
-					tokr_get_location(&t, token);
-					tokenization_err(&t, "No matching \" found.");
+					tokr_get_location(t, token);
+					tokenization_err(t, "No matching \" found.");
 					goto err;
 				} else {
 					backslashes = 0;
 				}
 				len++;
-				tokr_nextchar(&t);
+				tokr_nextchar(t);
 			}
 			char *strlit = malloc(len + 1);
 		    char *strptr = strlit;
-			tokr_get_location(&t, token);
-			tokr_nextchar(&t); /* past opening " */
-			while (*t.s != '"') {
-				assert(*t.s);
-				if (*t.s == '\\') {
-					tokr_nextchar(&t);
-					char c = tokr_esc_seq(&t);
+			tokr_get_location(t, token);
+			tokr_nextchar(t); /* past opening " */
+			while (*t->s != '"') {
+				assert(*t->s);
+				if (*t->s == '\\') {
+					tokr_nextchar(t);
+					char c = tokr_esc_seq(t);
 					if (c == 0) {
-						tokenization_err(&t, "Unrecognized escape character: '\\%c'.", *t.s);
+						tokenization_err(t, "Unrecognized escape character: '\\%c'.", *t->s);
 						goto err;
 					}
 					*strptr++ = c;
 				} else {
-					*strptr++ = *t.s;
-					tokr_nextchar(&t);
+					*strptr++ = *t->s;
+					tokr_nextchar(t);
 				}
 			}
 			*strptr = 0;
 			token->kind = TOKEN_STR_LITERAL;
 			token->str.len = len;
 			token->str.str = strlit;
-			tokr_nextchar(&t); /* move past closing " */
+			tokr_nextchar(t); /* move past closing " */
 			continue;
 		}
 		
-		if (isident(*t.s)) {
+		if (isident(*t->s)) {
 			/* it's an identifier */
-			Token *token = tokr_add(&t);
-			tokr_put_location(&t, token);
-			Identifier ident = ident_insert(&t.s);
+			Token *token = tokr_add(t);
+			tokr_put_location(t, token);
+			Identifier ident = ident_insert(t->idents, &t->s);
 			token->kind = TOKEN_IDENT;
 			token->ident = ident;
 			continue;
 		}		
-		tokenization_err(&t, "Token not recognized");
+		tokenization_err(t, "Token not recognized");
 	err:
 		has_err = 1;
 	}
-	Token *token = tokr_add(&t);
+	Token *token = tokr_add(t);
 	token->kind = TOKEN_EOF;
 	
-	t.token = t.tokens.data;
-	*tokr = t;
+	t->token = t->tokens.data;
 	return !has_err;
 }
 
diff --git a/util/err.c b/util/err.c
index 5ff28ad..e53b16f 100644
--- a/util/err.c
+++ b/util/err.c
@@ -20,6 +20,7 @@ typedef struct {
 } Location;
 
 /* file name of file being processed */
+/* TODO: remove this */
 static const char *err_filename;
 
 /* Write directly to the error file */