summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--identifiers.c5
-rw-r--r--main.c20
-rw-r--r--test.toc5
-rw-r--r--tokenizer.c134
-rw-r--r--util/colored_text.c10
-rw-r--r--util/err.c2
6 files changed, 123 insertions, 53 deletions
diff --git a/identifiers.c b/identifiers.c
index e88c745..5615452 100644
--- a/identifiers.c
+++ b/identifiers.c
@@ -19,6 +19,11 @@ static int isident(int c) {
return ident_char_index(c) != -1; /* OPTIM: Write separate function */
}
+/* can this character be used as the first character in an identifier? */
+static int isidentstart(int c) {
+ return isident(c) && c != '_' && c != '.';
+}
+
typedef struct IdentTree {
/* zero value is an empty trie */
long id;
diff --git a/main.c b/main.c
index 099e311..20407a1 100644
--- a/main.c
+++ b/main.c
@@ -5,6 +5,8 @@
#include <stdarg.h>
#include <string.h>
#include <ctype.h>
+#include <limits.h>
+#include "util/colored_text.c"
#include "util/err.c"
#include "identifiers.c"
#include "tokenizer.c"
@@ -21,17 +23,21 @@ int main(int argc, char **argv) {
return EXIT_FAILURE;
}
- char *contents = err_malloc(4096); /* TODO:check files with >this */
- size_t contents_cap = 4096;
- size_t contents_len = 0;
+ char *contents = err_malloc(4096);
+ long contents_cap = 4096;
+ long contents_len = 0;
while (fgets(contents + contents_len, (int)(contents_cap - contents_len), in)) {
- contents_len += strlen(contents + contents_len);
- if (contents_len >= contents_cap - 1024) {
+ contents_len += (long)strlen(contents + contents_len);
+
+ if (contents_len >= (long)contents_cap - 1024) {
contents_cap *= 2;
- contents = err_realloc(contents, contents_cap);
+ contents = err_realloc(contents, (size_t)contents_cap);
}
}
- /* TODO: check ferror */
+ if (ferror(in)) {
+ fprintf(stderr, "Error reading input file: %s.\n", argv[1]);
+ return EXIT_FAILURE;
+ }
Tokenizer t = tokenize_string(contents);
diff --git a/test.toc b/test.toc
index 654c1e1..8800b33 100644
--- a/test.toc
+++ b/test.toc
@@ -1,4 +1,7 @@
0x3f3a == 0777
-/* /* /*foo*/*/ /**/*/!~~
+923.5808 == 2e-33
+38942187381273e+102
+!/* /* /*foo*/*/ /**/*/
+!!!! \ No newline at end of file
diff --git a/tokenizer.c b/tokenizer.c
index ff03e4f..b2b2eb3 100644
--- a/tokenizer.c
+++ b/tokenizer.c
@@ -21,23 +21,12 @@ static const char *keywords[KW_COUNT] =
#define TOKENIZER_USE_LLONG 1
-#if TOKENIZER_USE_LLONG
-typedef long long LiteralInt;
-typedef unsigned long long LiteralUInt;
-#define LITERAL_INT_FMT "%lld"
-#define LITERAL_UINT_FMT "%llu"
-#else
-typedef long LiteralInt;
-typedef unsigned long LiteralUInt;
-#define LITERAL_INT_FMT "%ld"
-#define LITERAL_UINT_FMT "%lu"
-#endif
+typedef unsigned long long LiteralInt;
-typedef double LiteralReal;
+typedef long double LiteralReal; /* OPTIM: Maybe only use double */
typedef enum {
NUM_LITERAL_INT,
- NUM_LITERAL_UINT,
NUM_LITERAL_REAL
} NumLiteralKind;
@@ -45,7 +34,6 @@ typedef struct {
NumLiteralKind kind;
union {
LiteralInt intval;
- LiteralUInt uintval;
LiteralReal realval;
};
} NumLiteral;
@@ -83,13 +71,10 @@ static void token_fprint(FILE *out, Token *t) {
fprintf(out, "number: ");
switch (t->num.kind) {
case NUM_LITERAL_INT:
- fprintf(out, LITERAL_INT_FMT, t->num.intval);
- break;
- case NUM_LITERAL_UINT:
- fprintf(out, LITERAL_UINT_FMT, t->num.uintval);
+ fprintf(out, "%llu", t->num.intval);
break;
case NUM_LITERAL_REAL:
- fprintf(out, "%f", t->num.realval);
+ fprintf(out, "%g", (double)t->num.realval);
break;
}
break;
@@ -102,7 +87,7 @@ static void token_fprint(FILE *out, Token *t) {
static void tokenizer_add(Tokenizer *t, Token *token, LineNo line, LineNo col) {
if (t->ntokens >= t->cap) {
t->cap *= 2;
- t->tokens = err_realloc(t->tokens, t->cap);
+ t->tokens = err_realloc(t->tokens, t->cap * sizeof(*t->tokens));
}
token->line = line;
token->col = col;
@@ -112,9 +97,9 @@ static void tokenizer_add(Tokenizer *t, Token *token, LineNo line, LineNo col) {
static Tokenizer tokenize_string(char *s) { /* NOTE: May modify string. Don't even try to pass it a literal.*/
int has_err = 0;
Tokenizer t;
- t.cap = 4096; /* TODO: test more tokens than this */
+ t.cap = 256;
t.ntokens = 0;
- t.tokens = malloc(t.cap * sizeof(*t.tokens));
+ t.tokens = err_malloc(t.cap * sizeof(*t.tokens));
LineNo line = 1;
LineNo col = 1;
@@ -190,11 +175,16 @@ static Tokenizer tokenize_string(char *s) { /* NOTE: May modify string. Don't ev
s += (LineNo)strlen(keywords[kw]);
continue;
}
+
+ /* check if it's a number */
if (isdigit(*s)) {
- /* it's a numerical constant */
+ /* it's a numerical literal */
int base = 10;
- LiteralInt intval = 0;
+ LiteralReal decimal_pow10;
+ NumLiteral l;
+ l.kind = NUM_LITERAL_INT;
+ l.intval = 0;
LineNo line_start = line, col_start = col;
if (*s == '0') {
s++; col++;
@@ -218,11 +208,52 @@ static Tokenizer tokenize_string(char *s) { /* NOTE: May modify string. Don't ev
}
}
}
+
while (1) {
if (*s == '.') {
- /* TODO */
+ if (l.kind == NUM_LITERAL_REAL) {
+ err_print(line, col, "Double . in number.");
+ goto err;
+ }
+ if (base != 10) {
+ err_print(line, col, "Decimal point in non base 10 number.");
+ goto err;
+ }
+ l.kind = NUM_LITERAL_REAL;
+ decimal_pow10 = 0.1;
+ l.realval = (LiteralReal)l.intval;
+ s++, col++;
+ continue;
} else if (*s == 'e') {
- /* TODO */
+ s++; col++;
+ if (l.kind == NUM_LITERAL_INT) {
+ l.kind = NUM_LITERAL_REAL;
+ l.realval = (LiteralReal)l.intval;
+ }
+ /* TODO: check if exceeding maximum exponent */
+ int exponent = 0;
+ if (*s == '+') {
+ s++; col++;
+ }
+
+ int negative_exponent = 0;
+ if (*s == '-') {
+ s++; col++;
+ negative_exponent = 1;
+ }
+ for (; isdigit(*s); s++, col++) {
+ exponent *= 10;
+ exponent += *s - '0';
+ }
+ /* OPTIM: Slow for very large exponents (unlikely to happen) */
+ for (int i = 0; i < exponent; i++) {
+ if (negative_exponent)
+ l.realval /= 10;
+ else
+ l.realval *= 10;
+ }
+
+ break;
}
int digit = -1;
if (base == 16) {
@@ -236,23 +267,39 @@ static Tokenizer tokenize_string(char *s) { /* NOTE: May modify string. Don't ev
digit = *s - '0';
}
if (digit < 0 || digit >= base) {
+ if (isdigit(*s)) {
+ /* something like 0b011012 */
+ err_print(line, col, "Digit %d cannot appear in a base %d number.", digit, base);
+ goto err;
+ }
/* end of numerical literal */
break;
}
- /* TODO: check overflow; switch to uint */
- intval *= base;
- intval += digit;
+ switch (l.kind) {
+ case NUM_LITERAL_INT:
+ if (l.intval > ULLONG_MAX / (LiteralInt)base) {
+ /* too big! */
+ err_print(line, col, "Number too big to fit in a numerical literal.");
+ goto err;
+ }
+ l.intval *= (LiteralInt)base;
+ l.intval += (LiteralInt)digit;
+ break;
+ case NUM_LITERAL_REAL:
+ l.realval += decimal_pow10 * (LiteralReal)digit;
+ decimal_pow10 /= 10;
+ break;
+ }
s++; col++;
}
Token token;
token.kind = TOKEN_NUM_LITERAL;
- token.num.kind = NUM_LITERAL_INT;
- token.num.intval = intval;
+ token.num = l;
tokenizer_add(&t, &token, line_start, col_start);
continue;
}
- if (isident(*s)) {
+ if (isidentstart(*s)) {
/* it's an identifier */
Identifier ident = ident_insert(&s);
Token token;
@@ -261,27 +308,26 @@ static Tokenizer tokenize_string(char *s) { /* NOTE: May modify string. Don't ev
tokenizer_add(&t, &token, line, col);
continue;
}
-
int has_newline;
char *end_of_line = strchr(s, '\n');
has_newline = end_of_line != NULL;
if (has_newline)
*end_of_line = 0;
- err_print(line, col, "Unrecognized token:\n\there --> %s\n", s);
+ err_print(line, col, TEXT_IMPORTANT("Unrecognized token:") "\n\there --> %s\n", s);
+ if (has_newline)
+ *end_of_line = '\n';
+ err:
has_err = 1;
- if (has_newline) {
- /* increment line counter because of it */
- line++;
- col = 1;
- } else {
- col += (LineNo)strlen(s);
- }
- s += strlen(s);
+ s = strchr(s, '\n');
+ if (s == NULL) break;
+ s++; /* move past newline */
+ col = 1;
+ line++;
+
}
- /* TODO: Check ferror/errno */
if (has_err) {
- fprintf(stderr, "Errors occured while preprocessing.\n");
+ fprintf(stderr, TEXT_IMPORTANT("Errors occured while preprocessing.\n"));
abort();
}
t.token = t.tokens;
diff --git a/util/colored_text.c b/util/colored_text.c
new file mode 100644
index 0000000..836f357
--- /dev/null
+++ b/util/colored_text.c
@@ -0,0 +1,10 @@
+
+#define USE_COLORED_TEXT 1
+
+#if USE_COLORED_TEXT
+#define TEXT_ERROR(x) "\x1b[91m" x "\x1b[0m"
+#define TEXT_IMPORTANT(x) "\x1b[1m" x "\x1b[0m"
+#else
+#define TEXT_ERROR(x) x
+#define TEXT_IMPORTANT(x) x
+#endif
diff --git a/util/err.c b/util/err.c
index 89a1335..6fd2ac3 100644
--- a/util/err.c
+++ b/util/err.c
@@ -3,7 +3,7 @@ typedef uint32_t LineNo;
static void err_print(LineNo line, LineNo col, const char *fmt, ...) {
/* TODO: Color */
va_list args;
- fprintf(stderr, "Error at line %lu col %lu:\n", (unsigned long)line, (unsigned long)col);
+ fprintf(stderr, TEXT_ERROR("error:") " at line %lu col %lu:\n", (unsigned long)line, (unsigned long)col);
va_start(args, fmt);
vfprintf(stderr, fmt, args);
va_end(args);