summaryrefslogtreecommitdiff
path: root/identifiers.c
blob: a960fb76cc49188a61c7fd44f96557c4b719bc78 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
/* OPTIM: This is not ideal. There should be one dynamic array of tree nodes. */

typedef struct {
	struct Block *scope; /* NULL for file scope */
	struct Declaration *decl;
} IdentDecl;

typedef struct IdentTree {
	/* zero value is an empty trie */
	long id;
	int len; /* length of identifier = depth in tree */
	struct IdentTree *children;
	struct IdentTree *parent;
	Array decls; /* array of declarations of this identifier */
	unsigned long c_fn_reps; /* number of repetitions of this identifier in the C output--only used for functions */
	size_t depth;
} IdentTree;

typedef IdentTree *Identifier;

/* MUST be zero-initialized before use */
typedef struct {
	IdentTree tree_root;
	long curr_id;
} Identifiers;

static const char identifier_chars[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_";

#define NIDENTIFIER_CHARS ((int)((sizeof identifier_chars) - 1)) /* -1 for null char */

/* returns -1 if c is not a valid identifier character, its index in identifier_chars otherwise */
static int ident_char_index(int c) {
	if (c >= 'a' && c <= 'z')
		return c - 'a';	
	if (c >= 'A' && c <= 'Z')
		return c - 'A' + 26;
	if (c >= '0' && c <= '9')
		return c - '0' + 52;
	if (c == '_') return 62;
	return -1;
}

/* can this character be used in an identifier? */
static int isident(int c) {
	return ident_char_index(c) != -1; /* OPTIM: Write separate function */
}

/* moves s to the char after the identifier */
/* inserts if does not exist. reads until non-ident char is found. */
/* advances past identifier */
static Identifier ident_insert(Identifiers *ids, char **s) {
	IdentTree *t = &ids->tree_root;
	while (1) {
		char c = **s;
		if (!isident(c)) {
			if (t->id == 0) t->id = ++ids->curr_id;
			return t;
		}
		
		if (!t->children) {
			/* allocate children */
			t->children = err_calloc(NIDENTIFIER_CHARS, sizeof *t->children);
			for (int i = 0; i < NIDENTIFIER_CHARS; i++) {
				t->children[i].parent = t; /* child's parent = self */
				t->children[i].depth = t->depth + 1;
			}
		}
		t = &t->children[ident_char_index(c)];
		(*s)++;
	}
}


static void fprint_ident(FILE *out, Identifier id) {
	if (id->parent == NULL) return; /* at root */
	/* OPTIM: Use malloc(id->len)???? would probably use less mem for long idents, but
	   it's on the heap */
	fprint_ident(out, id->parent);
	fputc(identifier_chars[id - id->parent->children /* index of self in parent */], out);
}

static bool ident_eq_str(Identifier i, const char *s) {
	size_t len = strlen(s);
	if (i->depth != len) return false;
	s += len - 1;
	while (i->parent != NULL) {
		if (identifier_chars[i - i->parent->children /* index of self in parent */] != *s)
			return false;
		i = i->parent;
		if (i->parent != NULL)
			s--;
	}
	return true;
}

static char *ident_to_str(Identifier i) {
	char *str = malloc(i->depth + 1);
	str += i->depth;
	*str = 0;
	while (i->parent != NULL) {
		str--;
		*str = identifier_chars[i - i->parent->children];
		i = i->parent;
	}
	return str;
}

static IdentDecl *ident_decl(Identifier i) {
	return (IdentDecl*)arr_last(&i->decls);
}

static void idents_free_tree(IdentTree *tree) {
	if (!tree->children) return;
	for (int i = 0; i < NIDENTIFIER_CHARS; i++)
		idents_free_tree(&tree->children[i]);
	free(tree->children);
}

static void idents_free(Identifiers *ids) {
	idents_free_tree(&ids->tree_root);
}