1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
|
typedef enum {
TOKEN_KW,
TOKEN_EOF
} TokenKind;
typedef enum {
KW_SEMICOLON,
KW_EQEQ,
KW_LT,
KW_LE,
KW_EQ,
KW_COUNT
} Keyword;
static const char *keywords[KW_COUNT] =
{";", "==", "<", "<=", "="};
/* NOTE: LineNo is typedef'd in util/err.c */
typedef struct {
TokenKind kind;
LineNo line;
LineNo col;
union {
Keyword kw;
};
} Token;
typedef struct {
Token *tokens;
size_t ntokens;
size_t cap; /* used internally */
Token *token; /* token currently being processed */
} Tokenizer;
static void token_fprint(FILE *out, Token *t) {
fprintf(out, "l%luc%lu-", (unsigned long)t->line, (unsigned long)t->col);
switch (t->kind) {
case TOKEN_KW:
fprintf(out, "keyword: %s", keywords[t->kw]);
break;
case TOKEN_EOF:
fprintf(out, "eof");
break;
}
}
static void tokenizer_add(Tokenizer *t, Token *token, LineNo line, LineNo col) {
if (t->ntokens == t->cap) {
t->cap *= 2;
t->tokens = realloc(t->tokens, t->cap);
}
token->line = line;
token->col = col;
t->tokens[t->ntokens++] = *token;
}
static Tokenizer tokenize_file(FILE *fp) {
char buf[4096];
setvbuf(fp, buf, _IOFBF, sizeof buf);
char errbuf[256] = {0}; /* for errors */
int has_err = 0;
Tokenizer t;
t.cap = 4096;
t.ntokens = 0;
t.tokens = malloc(t.cap * sizeof(*t.tokens));
LineNo line = 1;
LineNo col = 1;
while (1) {
int c = fpeekc(fp);
if (c == EOF) break;
if (isspace(c)) {
if (c == '\n') {
line++;
col = 0;
}
fnextc(fp);
col++;
continue;
}
Keyword kw;
for (kw = 0; kw < KW_COUNT; kw++) {
if (fhasprefix(fp, keywords[kw])) {
break;
}
}
if (kw != KW_COUNT) {
Token kw_token;
kw_token.kind = TOKEN_KW;
kw_token.kw = kw;
tokenizer_add(&t, &kw_token, line, col);
col += (LineNo)strlen(keywords[kw]);
continue;
}
fgets(errbuf, sizeof errbuf, fp);
size_t len = strlen(errbuf);
int has_newline = len && errbuf[len-1] == '\n';
if (has_newline) {
/* remove newline */
errbuf[len-1] = 0;
}
err_print(line, col, "Unrecognized token:\n\there --> %s\n", errbuf);
has_err = 1;
if (has_newline) {
/* increment line counter because of it */
line++;
col = 1;
} else {
col += (LineNo)(sizeof errbuf);
}
}
/* TODO: Check ferror/errno */
if (has_err) {
fprintf(stderr, "Errors occured while preprocessing.\n");
abort();
}
t.token = t.tokens;
return t;
}
static void tokenizer_free(Tokenizer *t) {
free(t->tokens);
}
|