diff options
author | Leo Tenenbaum <pommicket@gmail.com> | 2019-08-16 21:54:39 -0400 |
---|---|---|
committer | Leo Tenenbaum <pommicket@gmail.com> | 2019-08-16 21:54:39 -0400 |
commit | cef4ee8d70dc9a06fcdb8eb88cbc9ddf4827c6a8 (patch) | |
tree | 61d7e2a587fb74f44814e2cd2ef01205eb31b02f | |
parent | 95d2bbf3930c39116fe0bee78ed6feb734c38b0a (diff) |
Finished literals; cleaned up code
-rw-r--r-- | identifiers.c | 5 | ||||
-rw-r--r-- | main.c | 20 | ||||
-rw-r--r-- | test.toc | 5 | ||||
-rw-r--r-- | tokenizer.c | 134 | ||||
-rw-r--r-- | util/colored_text.c | 10 | ||||
-rw-r--r-- | util/err.c | 2 |
6 files changed, 123 insertions, 53 deletions
diff --git a/identifiers.c b/identifiers.c index e88c745..5615452 100644 --- a/identifiers.c +++ b/identifiers.c @@ -19,6 +19,11 @@ static int isident(int c) { return ident_char_index(c) != -1; /* OPTIM: Write separate function */ } +/* can this character be used as the first character in an identifier? */ +static int isidentstart(int c) { + return isident(c) && c != '_' && c != '.'; +} + typedef struct IdentTree { /* zero value is an empty trie */ long id; @@ -5,6 +5,8 @@ #include <stdarg.h> #include <string.h> #include <ctype.h> +#include <limits.h> +#include "util/colored_text.c" #include "util/err.c" #include "identifiers.c" #include "tokenizer.c" @@ -21,17 +23,21 @@ int main(int argc, char **argv) { return EXIT_FAILURE; } - char *contents = err_malloc(4096); /* TODO:check files with >this */ - size_t contents_cap = 4096; - size_t contents_len = 0; + char *contents = err_malloc(4096); + long contents_cap = 4096; + long contents_len = 0; while (fgets(contents + contents_len, (int)(contents_cap - contents_len), in)) { - contents_len += strlen(contents + contents_len); - if (contents_len >= contents_cap - 1024) { + contents_len += (long)strlen(contents + contents_len); + + if (contents_len >= (long)contents_cap - 1024) { contents_cap *= 2; - contents = err_realloc(contents, contents_cap); + contents = err_realloc(contents, (size_t)contents_cap); } } - /* TODO: check ferror */ + if (ferror(in)) { + fprintf(stderr, "Error reading input file: %s.\n", argv[1]); + return EXIT_FAILURE; + } Tokenizer t = tokenize_string(contents); @@ -1,4 +1,7 @@ 0x3f3a == 0777 -/* /* /*foo*/*/ /**/*/!~~ +923.5808 == 2e-33 +38942187381273e+102 +!/* /* /*foo*/*/ /**/*/ +!!!!
\ No newline at end of file diff --git a/tokenizer.c b/tokenizer.c index ff03e4f..b2b2eb3 100644 --- a/tokenizer.c +++ b/tokenizer.c @@ -21,23 +21,12 @@ static const char *keywords[KW_COUNT] = #define TOKENIZER_USE_LLONG 1 -#if TOKENIZER_USE_LLONG -typedef long long LiteralInt; -typedef unsigned long long LiteralUInt; -#define LITERAL_INT_FMT "%lld" -#define LITERAL_UINT_FMT "%llu" -#else -typedef long LiteralInt; -typedef unsigned long LiteralUInt; -#define LITERAL_INT_FMT "%ld" -#define LITERAL_UINT_FMT "%lu" -#endif +typedef unsigned long long LiteralInt; -typedef double LiteralReal; +typedef long double LiteralReal; /* OPTIM: Maybe only use double */ typedef enum { NUM_LITERAL_INT, - NUM_LITERAL_UINT, NUM_LITERAL_REAL } NumLiteralKind; @@ -45,7 +34,6 @@ typedef struct { NumLiteralKind kind; union { LiteralInt intval; - LiteralUInt uintval; LiteralReal realval; }; } NumLiteral; @@ -83,13 +71,10 @@ static void token_fprint(FILE *out, Token *t) { fprintf(out, "number: "); switch (t->num.kind) { case NUM_LITERAL_INT: - fprintf(out, LITERAL_INT_FMT, t->num.intval); - break; - case NUM_LITERAL_UINT: - fprintf(out, LITERAL_UINT_FMT, t->num.uintval); + fprintf(out, "%llu", t->num.intval); break; case NUM_LITERAL_REAL: - fprintf(out, "%f", t->num.realval); + fprintf(out, "%g", (double)t->num.realval); break; } break; @@ -102,7 +87,7 @@ static void token_fprint(FILE *out, Token *t) { static void tokenizer_add(Tokenizer *t, Token *token, LineNo line, LineNo col) { if (t->ntokens >= t->cap) { t->cap *= 2; - t->tokens = err_realloc(t->tokens, t->cap); + t->tokens = err_realloc(t->tokens, t->cap * sizeof(*t->tokens)); } token->line = line; token->col = col; @@ -112,9 +97,9 @@ static void tokenizer_add(Tokenizer *t, Token *token, LineNo line, LineNo col) { static Tokenizer tokenize_string(char *s) { /* NOTE: May modify string. Don't even try to pass it a literal.*/ int has_err = 0; Tokenizer t; - t.cap = 4096; /* TODO: test more tokens than this */ + t.cap = 256; t.ntokens = 0; - t.tokens = malloc(t.cap * sizeof(*t.tokens)); + t.tokens = err_malloc(t.cap * sizeof(*t.tokens)); LineNo line = 1; LineNo col = 1; @@ -190,11 +175,16 @@ static Tokenizer tokenize_string(char *s) { /* NOTE: May modify string. Don't ev s += (LineNo)strlen(keywords[kw]); continue; } + + /* check if it's a number */ if (isdigit(*s)) { - /* it's a numerical constant */ + /* it's a numerical literal */ int base = 10; - LiteralInt intval = 0; + LiteralReal decimal_pow10; + NumLiteral l; + l.kind = NUM_LITERAL_INT; + l.intval = 0; LineNo line_start = line, col_start = col; if (*s == '0') { s++; col++; @@ -218,11 +208,52 @@ static Tokenizer tokenize_string(char *s) { /* NOTE: May modify string. Don't ev } } } + while (1) { if (*s == '.') { - /* TODO */ + if (l.kind == NUM_LITERAL_REAL) { + err_print(line, col, "Double . in number."); + goto err; + } + if (base != 10) { + err_print(line, col, "Decimal point in non base 10 number."); + goto err; + } + l.kind = NUM_LITERAL_REAL; + decimal_pow10 = 0.1; + l.realval = (LiteralReal)l.intval; + s++, col++; + continue; } else if (*s == 'e') { - /* TODO */ + s++; col++; + if (l.kind == NUM_LITERAL_INT) { + l.kind = NUM_LITERAL_REAL; + l.realval = (LiteralReal)l.intval; + } + /* TODO: check if exceeding maximum exponent */ + int exponent = 0; + if (*s == '+') { + s++; col++; + } + + int negative_exponent = 0; + if (*s == '-') { + s++; col++; + negative_exponent = 1; + } + for (; isdigit(*s); s++, col++) { + exponent *= 10; + exponent += *s - '0'; + } + /* OPTIM: Slow for very large exponents (unlikely to happen) */ + for (int i = 0; i < exponent; i++) { + if (negative_exponent) + l.realval /= 10; + else + l.realval *= 10; + } + + break; } int digit = -1; if (base == 16) { @@ -236,23 +267,39 @@ static Tokenizer tokenize_string(char *s) { /* NOTE: May modify string. Don't ev digit = *s - '0'; } if (digit < 0 || digit >= base) { + if (isdigit(*s)) { + /* something like 0b011012 */ + err_print(line, col, "Digit %d cannot appear in a base %d number.", digit, base); + goto err; + } /* end of numerical literal */ break; } - /* TODO: check overflow; switch to uint */ - intval *= base; - intval += digit; + switch (l.kind) { + case NUM_LITERAL_INT: + if (l.intval > ULLONG_MAX / (LiteralInt)base) { + /* too big! */ + err_print(line, col, "Number too big to fit in a numerical literal."); + goto err; + } + l.intval *= (LiteralInt)base; + l.intval += (LiteralInt)digit; + break; + case NUM_LITERAL_REAL: + l.realval += decimal_pow10 * (LiteralReal)digit; + decimal_pow10 /= 10; + break; + } s++; col++; } Token token; token.kind = TOKEN_NUM_LITERAL; - token.num.kind = NUM_LITERAL_INT; - token.num.intval = intval; + token.num = l; tokenizer_add(&t, &token, line_start, col_start); continue; } - if (isident(*s)) { + if (isidentstart(*s)) { /* it's an identifier */ Identifier ident = ident_insert(&s); Token token; @@ -261,27 +308,26 @@ static Tokenizer tokenize_string(char *s) { /* NOTE: May modify string. Don't ev tokenizer_add(&t, &token, line, col); continue; } - int has_newline; char *end_of_line = strchr(s, '\n'); has_newline = end_of_line != NULL; if (has_newline) *end_of_line = 0; - err_print(line, col, "Unrecognized token:\n\there --> %s\n", s); + err_print(line, col, TEXT_IMPORTANT("Unrecognized token:") "\n\there --> %s\n", s); + if (has_newline) + *end_of_line = '\n'; + err: has_err = 1; - if (has_newline) { - /* increment line counter because of it */ - line++; - col = 1; - } else { - col += (LineNo)strlen(s); - } - s += strlen(s); + s = strchr(s, '\n'); + if (s == NULL) break; + s++; /* move past newline */ + col = 1; + line++; + } - /* TODO: Check ferror/errno */ if (has_err) { - fprintf(stderr, "Errors occured while preprocessing.\n"); + fprintf(stderr, TEXT_IMPORTANT("Errors occured while preprocessing.\n")); abort(); } t.token = t.tokens; diff --git a/util/colored_text.c b/util/colored_text.c new file mode 100644 index 0000000..836f357 --- /dev/null +++ b/util/colored_text.c @@ -0,0 +1,10 @@ + +#define USE_COLORED_TEXT 1 + +#if USE_COLORED_TEXT +#define TEXT_ERROR(x) "\x1b[91m" x "\x1b[0m" +#define TEXT_IMPORTANT(x) "\x1b[1m" x "\x1b[0m" +#else +#define TEXT_ERROR(x) x +#define TEXT_IMPORTANT(x) x +#endif @@ -3,7 +3,7 @@ typedef uint32_t LineNo; static void err_print(LineNo line, LineNo col, const char *fmt, ...) { /* TODO: Color */ va_list args; - fprintf(stderr, "Error at line %lu col %lu:\n", (unsigned long)line, (unsigned long)col); + fprintf(stderr, TEXT_ERROR("error:") " at line %lu col %lu:\n", (unsigned long)line, (unsigned long)col); va_start(args, fmt); vfprintf(stderr, fmt, args); va_end(args); |