summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--abbrevs.txt9
-rw-r--r--main.c2
-rw-r--r--test.toc1
-rw-r--r--tokenizer.c57
4 files changed, 65 insertions, 4 deletions
diff --git a/abbrevs.txt b/abbrevs.txt
new file mode 100644
index 0000000..89ba9b4
--- /dev/null
+++ b/abbrevs.txt
@@ -0,0 +1,9 @@
+kw - keyword
+ident - identifier
+direct - directive
+decl - declaration
+stmt - statement
+tokr - tokenizer
+str - string
+num - number
+eof - end of file
diff --git a/main.c b/main.c
index b4caeeb..babecc7 100644
--- a/main.c
+++ b/main.c
@@ -47,7 +47,7 @@ int main(int argc, char **argv) {
arr_foreach(&t.tokens, Token, token) {
if (token != t.tokens.data)
printf(" ");
- token_fprint(stdout, token);
+ fprint_token(stdout, token);
}
printf("\n");
Parser p;
diff --git a/test.toc b/test.toc
index d2e8110..e468430 100644
--- a/test.toc
+++ b/test.toc
@@ -2,5 +2,6 @@ foo @= fn(a: [10]float) [3]int {
};
main @ fn() = fn() {
+#C #C #C
asdkofhj : fn() [3]int;
};
diff --git a/tokenizer.c b/tokenizer.c
index 73ae7d0..abf6477 100644
--- a/tokenizer.c
+++ b/tokenizer.c
@@ -1,6 +1,7 @@
typedef enum {
TOKEN_KW,
TOKEN_IDENT,
+ TOKEN_DIRECT,
TOKEN_NUM_LITERAL,
TOKEN_CHAR_LITERAL,
TOKEN_STR_LITERAL,
@@ -8,6 +9,11 @@ typedef enum {
} TokenKind;
typedef enum {
+ DIRECT_C,
+ DIRECT_COUNT
+} Directive;
+
+typedef enum {
KW_SEMICOLON,
KW_EQ,
KW_COLON,
@@ -21,7 +27,7 @@ typedef enum {
KW_RSQUARE,
KW_EQEQ,
KW_LT,
- KW_LE,
+ KW_LE,
KW_MINUS,
KW_PLUS,
KW_LAST_SYMBOL = KW_PLUS, /* last one entirely consisting of symbols */
@@ -44,6 +50,10 @@ static const char *keywords[KW_COUNT] =
{";", "=", ":", "@", ",", "(", ")", "{", "}", "[", "]", "==", "<", "<=", "-", "+", "fn",
"int", "i8", "i16", "i32", "i64", "u8", "u16", "u32", "u64", "float", "double"};
+static const char *directives[DIRECT_COUNT] =
+ {"C"};
+
+
/* Returns KW_COUNT if it's not a keyword */
/* OPTIM: don't use strncmp so much */
static Keyword tokenize_kw(char **s) {
@@ -55,7 +65,7 @@ static Keyword tokenize_kw(char **s) {
it's not a symbol, so we need to check if it's something like "intfoo"
*/
if (isident((*s)[len])) {
- return KW_COUNT;
+ continue;
}
}
*s += len;
@@ -65,6 +75,23 @@ static Keyword tokenize_kw(char **s) {
return KW_COUNT;
}
+
+
+/* Returns DIRECT_COUNT if it's not a directive */
+static Directive tokenize_direct(char **s) {
+ for (Directive d = 0; d < DIRECT_COUNT; d++) {
+ size_t len = strlen(directives[d]);
+ if (strncmp(*s, directives[d], len) == 0) {
+ if (isident((*s)[len])) {
+ continue;
+ }
+ *s += len;
+ return d;
+ }
+ }
+ return DIRECT_COUNT;
+}
+
typedef enum {
NUM_LITERAL_INT,
NUM_LITERAL_FLOAT
@@ -89,6 +116,7 @@ typedef struct {
Location where;
union {
Keyword kw;
+ Directive direct;
Identifier ident;
NumLiteral num;
char chr;
@@ -110,7 +138,7 @@ static inline bool token_is_kw(Token *t, Keyword kw) {
return t->kind == TOKEN_KW && t->kw == kw;
}
-static void token_fprint(FILE *out, Token *t) {
+static void fprint_token(FILE *out, Token *t) {
fprintf(out, "l%lu-", (unsigned long)t->where.line);
switch (t->kind) {
case TOKEN_KW:
@@ -137,6 +165,9 @@ static void token_fprint(FILE *out, Token *t) {
case TOKEN_STR_LITERAL:
fprintf(out, "str: \"%s\"", t->str.str);
break;
+ case TOKEN_DIRECT:
+ fprintf(out, "directive: #%s", directives[t->direct]);
+ break;
case TOKEN_EOF:
fprintf(out, "eof");
break;
@@ -270,6 +301,26 @@ static bool tokenize_string(Tokenizer *t, char *str) {
}
if (is_comment) continue;
}
+
+ if (*t->s == '#') {
+ /* it's a directive */
+ char *start_s = t->s;
+ t->s++; /* move past # */
+ Directive direct = tokenize_direct(&t->s);
+ if (direct != DIRECT_COUNT) {
+ /* it's a directive */
+ Token *token = tokr_add(t);
+ token->where.line = t->line;
+ token->where.code = start_s;
+ token->kind = TOKEN_DIRECT;
+ token->direct = direct;
+ continue;
+ }
+ t->s--; /* go back to # */
+ tokenization_err(t, "Unrecognized directive.");
+ goto err;
+ }
+
{
char *start_s = t->s;
Keyword kw = tokenize_kw(&t->s);