summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitignore1
-rwxr-xr-xbuild.sh3
-rw-r--r--main.c36
-rw-r--r--test.toc3
-rw-r--r--tokenizer.c126
-rw-r--r--util/err.c10
-rw-r--r--util/files.c28
7 files changed, 207 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..b431f19
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+toc
diff --git a/build.sh b/build.sh
new file mode 100755
index 0000000..e8bd52e
--- /dev/null
+++ b/build.sh
@@ -0,0 +1,3 @@
+#!/bin/bash
+
+gcc -o toc main.c -g -o toc -Wall -Wextra -Wpedantic -Wconversion -std=c11 || exit 1
diff --git a/main.c b/main.c
new file mode 100644
index 0000000..f4c6280
--- /dev/null
+++ b/main.c
@@ -0,0 +1,36 @@
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdarg.h>
+#include <string.h>
+#include <ctype.h>
+#include "util/err.c"
+#include "util/files.c"
+#include "tokenizer.c"
+
+int main(int argc, char **argv) {
+ if (argc < 2) {
+ fprintf(stderr, "Please specify an input file.\n");
+ return EXIT_FAILURE;
+ }
+
+ FILE *in = fopen(argv[1], "r");
+ if (!in) {
+ fprintf(stderr, "Could not open file: %s.\n", argv[1]);
+ return EXIT_FAILURE;
+ }
+
+ Tokenizer t = tokenize_file(in);
+
+ for (size_t i = 0; i < t.ntokens; i++) {
+ if (i)
+ printf(" ");
+ token_fprint(stdout, &t.tokens[i]);
+ }
+ printf("\n");
+
+ tokenizer_free(&t);
+
+ fclose(in);
+}
diff --git a/test.toc b/test.toc
new file mode 100644
index 0000000..54c9345
--- /dev/null
+++ b/test.toc
@@ -0,0 +1,3 @@
+
+== <
+<<<<< \ No newline at end of file
diff --git a/tokenizer.c b/tokenizer.c
new file mode 100644
index 0000000..00e9979
--- /dev/null
+++ b/tokenizer.c
@@ -0,0 +1,126 @@
+typedef enum {
+ TOKEN_KW,
+ TOKEN_EOF
+} TokenKind;
+
+typedef enum {
+ KW_SEMICOLON,
+ KW_EQEQ,
+ KW_LT,
+ KW_LE,
+ KW_EQ,
+ KW_COUNT
+} Keyword;
+
+static const char *keywords[KW_COUNT] =
+ {";", "==", "<", "<=", "="};
+
+
+/* NOTE: LineNo is typedef'd in util/err.c */
+typedef struct {
+ TokenKind kind;
+ LineNo line;
+ LineNo col;
+ union {
+ Keyword kw;
+ };
+} Token;
+
+typedef struct {
+ Token *tokens;
+ size_t ntokens;
+ size_t cap; /* used internally */
+ Token *token; /* token currently being processed */
+} Tokenizer;
+
+static void token_fprint(FILE *out, Token *t) {
+ fprintf(out, "l%luc%lu-", (unsigned long)t->line, (unsigned long)t->col);
+ switch (t->kind) {
+ case TOKEN_KW:
+ fprintf(out, "keyword: %s", keywords[t->kw]);
+ break;
+ case TOKEN_EOF:
+ fprintf(out, "eof");
+ break;
+ }
+}
+
+static void tokenizer_add(Tokenizer *t, Token *token, LineNo line, LineNo col) {
+ if (t->ntokens == t->cap) {
+ t->cap *= 2;
+ t->tokens = realloc(t->tokens, t->cap);
+ }
+ token->line = line;
+ token->col = col;
+ t->tokens[t->ntokens++] = *token;
+}
+
+static Tokenizer tokenize_file(FILE *fp) {
+ char buf[4096];
+ setvbuf(fp, buf, _IOFBF, sizeof buf);
+ char errbuf[256] = {0}; /* for errors */
+ int has_err = 0;
+ Tokenizer t;
+ t.cap = 4096;
+ t.ntokens = 0;
+ t.tokens = malloc(t.cap * sizeof(*t.tokens));
+
+ LineNo line = 1;
+ LineNo col = 1;
+
+ while (1) {
+ int c = fpeekc(fp);
+ if (c == EOF) break;
+ if (isspace(c)) {
+ if (c == '\n') {
+ line++;
+ col = 0;
+ }
+ fnextc(fp);
+ col++;
+ continue;
+ }
+ Keyword kw;
+ for (kw = 0; kw < KW_COUNT; kw++) {
+ if (fhasprefix(fp, keywords[kw])) {
+ break;
+ }
+ }
+ if (kw != KW_COUNT) {
+ Token kw_token;
+ kw_token.kind = TOKEN_KW;
+ kw_token.kw = kw;
+ tokenizer_add(&t, &kw_token, line, col);
+ col += (LineNo)strlen(keywords[kw]);
+ continue;
+ }
+
+ fgets(errbuf, sizeof errbuf, fp);
+ size_t len = strlen(errbuf);
+ int has_newline = len && errbuf[len-1] == '\n';
+ if (has_newline) {
+ /* remove newline */
+ errbuf[len-1] = 0;
+ }
+ err_print(line, col, "Unrecognized token:\n\there --> %s\n", errbuf);
+ has_err = 1;
+ if (has_newline) {
+ /* increment line counter because of it */
+ line++;
+ col = 1;
+ } else {
+ col += (LineNo)(sizeof errbuf);
+ }
+ }
+ /* TODO: Check ferror/errno */
+ if (has_err) {
+ fprintf(stderr, "Errors occured while preprocessing.\n");
+ abort();
+ }
+ t.token = t.tokens;
+ return t;
+}
+
+static void tokenizer_free(Tokenizer *t) {
+ free(t->tokens);
+}
diff --git a/util/err.c b/util/err.c
new file mode 100644
index 0000000..62886c4
--- /dev/null
+++ b/util/err.c
@@ -0,0 +1,10 @@
+typedef uint32_t LineNo;
+
+static void err_print(LineNo line, LineNo col, const char *fmt, ...) {
+ /* TODO: Color */
+ va_list args;
+ fprintf(stderr, "Error at line %lu col %lu:\n", (unsigned long)line, (unsigned long)col);
+ va_start(args, fmt);
+ vfprintf(stderr, fmt, args);
+ va_end(args);
+}
diff --git a/util/files.c b/util/files.c
new file mode 100644
index 0000000..0afa843
--- /dev/null
+++ b/util/files.c
@@ -0,0 +1,28 @@
+static int fpeekc(FILE *fp) {
+ int c = getc(fp);
+ if (c == EOF)
+ return c;
+ ungetc(c, fp);
+ return c;
+}
+
+#define fnextc getc /* advance to the next character */
+
+/* NOTE: Advances and returns # of characters advanced iff prefix is found. */
+static int fhasprefix(FILE *fp, const char *prefix) {
+ assert(*prefix);
+ long start = ftell(fp);
+ if (start == -1)
+ return 0;
+ const char *p = prefix;
+ while (*p) {
+ int c = getc(fp);
+ if (c != *p) {
+ /* wrong character / EOF */
+ fseek(fp, start, SEEK_SET);
+ return 0;
+ }
+ p++;
+ }
+ return (int)(p - prefix); /* length of prefix */
+}