diff options
-rw-r--r-- | abbrevs.txt | 9 | ||||
-rw-r--r-- | main.c | 2 | ||||
-rw-r--r-- | test.toc | 1 | ||||
-rw-r--r-- | tokenizer.c | 57 |
4 files changed, 65 insertions, 4 deletions
diff --git a/abbrevs.txt b/abbrevs.txt new file mode 100644 index 0000000..89ba9b4 --- /dev/null +++ b/abbrevs.txt @@ -0,0 +1,9 @@ +kw - keyword +ident - identifier +direct - directive +decl - declaration +stmt - statement +tokr - tokenizer +str - string +num - number +eof - end of file @@ -47,7 +47,7 @@ int main(int argc, char **argv) { arr_foreach(&t.tokens, Token, token) { if (token != t.tokens.data) printf(" "); - token_fprint(stdout, token); + fprint_token(stdout, token); } printf("\n"); Parser p; @@ -2,5 +2,6 @@ foo @= fn(a: [10]float) [3]int { }; main @ fn() = fn() { +#C #C #C asdkofhj : fn() [3]int; }; diff --git a/tokenizer.c b/tokenizer.c index 73ae7d0..abf6477 100644 --- a/tokenizer.c +++ b/tokenizer.c @@ -1,6 +1,7 @@ typedef enum { TOKEN_KW, TOKEN_IDENT, + TOKEN_DIRECT, TOKEN_NUM_LITERAL, TOKEN_CHAR_LITERAL, TOKEN_STR_LITERAL, @@ -8,6 +9,11 @@ typedef enum { } TokenKind; typedef enum { + DIRECT_C, + DIRECT_COUNT +} Directive; + +typedef enum { KW_SEMICOLON, KW_EQ, KW_COLON, @@ -21,7 +27,7 @@ typedef enum { KW_RSQUARE, KW_EQEQ, KW_LT, - KW_LE, + KW_LE, KW_MINUS, KW_PLUS, KW_LAST_SYMBOL = KW_PLUS, /* last one entirely consisting of symbols */ @@ -44,6 +50,10 @@ static const char *keywords[KW_COUNT] = {";", "=", ":", "@", ",", "(", ")", "{", "}", "[", "]", "==", "<", "<=", "-", "+", "fn", "int", "i8", "i16", "i32", "i64", "u8", "u16", "u32", "u64", "float", "double"}; +static const char *directives[DIRECT_COUNT] = + {"C"}; + + /* Returns KW_COUNT if it's not a keyword */ /* OPTIM: don't use strncmp so much */ static Keyword tokenize_kw(char **s) { @@ -55,7 +65,7 @@ static Keyword tokenize_kw(char **s) { it's not a symbol, so we need to check if it's something like "intfoo" */ if (isident((*s)[len])) { - return KW_COUNT; + continue; } } *s += len; @@ -65,6 +75,23 @@ static Keyword tokenize_kw(char **s) { return KW_COUNT; } + + +/* Returns DIRECT_COUNT if it's not a directive */ +static Directive tokenize_direct(char **s) { + for (Directive d = 0; d < DIRECT_COUNT; d++) { + size_t len = strlen(directives[d]); + if (strncmp(*s, directives[d], len) == 0) { + if (isident((*s)[len])) { + continue; + } + *s += len; + return d; + } + } + return DIRECT_COUNT; +} + typedef enum { NUM_LITERAL_INT, NUM_LITERAL_FLOAT @@ -89,6 +116,7 @@ typedef struct { Location where; union { Keyword kw; + Directive direct; Identifier ident; NumLiteral num; char chr; @@ -110,7 +138,7 @@ static inline bool token_is_kw(Token *t, Keyword kw) { return t->kind == TOKEN_KW && t->kw == kw; } -static void token_fprint(FILE *out, Token *t) { +static void fprint_token(FILE *out, Token *t) { fprintf(out, "l%lu-", (unsigned long)t->where.line); switch (t->kind) { case TOKEN_KW: @@ -137,6 +165,9 @@ static void token_fprint(FILE *out, Token *t) { case TOKEN_STR_LITERAL: fprintf(out, "str: \"%s\"", t->str.str); break; + case TOKEN_DIRECT: + fprintf(out, "directive: #%s", directives[t->direct]); + break; case TOKEN_EOF: fprintf(out, "eof"); break; @@ -270,6 +301,26 @@ static bool tokenize_string(Tokenizer *t, char *str) { } if (is_comment) continue; } + + if (*t->s == '#') { + /* it's a directive */ + char *start_s = t->s; + t->s++; /* move past # */ + Directive direct = tokenize_direct(&t->s); + if (direct != DIRECT_COUNT) { + /* it's a directive */ + Token *token = tokr_add(t); + token->where.line = t->line; + token->where.code = start_s; + token->kind = TOKEN_DIRECT; + token->direct = direct; + continue; + } + t->s--; /* go back to # */ + tokenization_err(t, "Unrecognized directive."); + goto err; + } + { char *start_s = t->s; Keyword kw = tokenize_kw(&t->s); |