From 390f1e368cfdc5011e9eb9af76d2fb44cd8dc0b2 Mon Sep 17 00:00:00 2001 From: Leo Tenenbaum Date: Sat, 7 Dec 2019 18:21:03 -0500 Subject: fixed something weird going on with the tokenizer that might be a bug in clang --- README.html | 10 ++++------ README.md | 1 + build.sh | 4 +++- cgen.c | 6 ++++-- decls_cgen.c | 3 +-- docs.sh | 6 ++++++ docs/00.html | 61 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ docs/00.md | 25 +++++++++++++++++-------- docs/01.html | 18 ++++++++++++++++++ docs/01.md | 18 ++++++++++++++++++ main.c | 2 +- parse.c | 6 +++++- scope.c | 7 +++++++ test.toc | 19 +------------------ tokenizer.c | 36 +++++++++++++++++------------------ types.c | 3 +-- 16 files changed, 166 insertions(+), 59 deletions(-) create mode 100755 docs.sh create mode 100644 docs/00.html create mode 100644 docs/01.html create mode 100644 docs/01.md diff --git a/README.html b/README.html index d3476a5..fe1fd75 100644 --- a/README.html +++ b/README.html @@ -22,12 +22,10 @@ x : int; x = 5; // Declare x as an integer, then set it to 5.

toc is statically typed and has many of C's features, but -it is just as fast in theory.

+it is nearly as fast in theory.

-

See docs for more information.

+

See docs for more information (in progress).

-
- -

Help

+

tests has some test programs written in toc.

-

If you find a bug in toc, please report an issue on GitHub.

+

To compile the compiler on a Unix-y system, use

diff --git a/README.md b/README.md index 0447208..4cbe8a7 100644 --- a/README.md +++ b/README.md @@ -28,3 +28,4 @@ See `docs` for more information (in progress). `tests` has some test programs written in `toc`. +To compile the compiler on a Unix-y system, use diff --git a/build.sh b/build.sh index abce67c..da042bd 100755 --- a/build.sh +++ b/build.sh @@ -11,8 +11,10 @@ ADDITIONAL_FLAGS="$CFLAGS -Wno-unused-function" if [ "$CC" = "clang" ]; then WARNINGS='-Wall -Wextra -Wpedantic -Wshadow -Wconversion -Wimplicit-fallthrough' -else +elif [ "$CC" = "gcc" ]; then WARNINGS='-Wall -Wextra -Wpedantic -Wshadow -Wconversion -Wno-pointer-to-int-cast' +else + WARNINGS='' fi DEBUG_FLAGS="-O0 -g3 $WARNINGS -std=c11 -DTOC_DEBUG" diff --git a/cgen.c b/cgen.c index b40870f..22b9fa7 100644 --- a/cgen.c +++ b/cgen.c @@ -1747,8 +1747,10 @@ static bool cgen_decl(CGenerator *g, Declaration *d) { /* don't generate function pointer declaration for constant fns */ continue; } - if (!cgen_val_pre(g, *val, type, d->where)) - return false; + if (has_expr) { + if (!cgen_val_pre(g, *val, type, d->where)) + return false; + } if (g->block != NULL) cgen_write(g, "static "); if (!cgen_type_pre(g, type, d->where)) return false; diff --git a/decls_cgen.c b/decls_cgen.c index 3fe21bd..a3d35d2 100644 --- a/decls_cgen.c +++ b/decls_cgen.c @@ -10,8 +10,7 @@ static bool cgen_decls_decl(CGenerator *g, Declaration *d); static bool cgen_decls_fn_instances(CGenerator *g, Expression *e) { assert(e->kind == EXPR_FN); FnExpr *f = &e->fn; - FnType *type = &e->type.fn; - assert(type->constness); + assert(e->type.fn.constness); Instance **data = f->instances.data; for (U64 i = 0; i < f->instances.cap; i++) { if (f->instances.occupied[i]) { diff --git a/docs.sh b/docs.sh new file mode 100755 index 0000000..81cde33 --- /dev/null +++ b/docs.sh @@ -0,0 +1,6 @@ +#!/bin/bash +markdown README.md > README.html +for x in docs/*.md; do + echo $x + markdown $x > $(dirname $x)/$(basename $x .md).html +done diff --git a/docs/00.html b/docs/00.html new file mode 100644 index 0000000..cd0c352 --- /dev/null +++ b/docs/00.html @@ -0,0 +1,61 @@ +

Declarations

+ +

In toc, declarations have the following syntax: + +<name> :[:] [type] [= expression]; +

+ +

The square brackets ([]) indicate something optional.

+ +

All of the following statements +declare an new variable x which is an integer, and has a value of 0: + +x : int; +x : int = 0; +x := 0; + +Note that in the first of those statements, although no expression +is specified, it defaults to 0. This is not true in C, +and there will eventually probably be an option to +leave x uninitialized.

+ +

If you wanted x to be a floating-point number, you could use: + +x : float; +x : float = 0; +x := 0.0; +

+ +

Note that 0 can be used as both a float and an integer, but +when no type is specified, it defaults to an int, whereas 0.0 +defaults to a float.

+ +

Here are all of toc's builtin types and their ranges of values:

+ + + +

At the moment, it is not technically guaranteed that f32/float is actually 32-bit and that f64 is actually 64-bit; they are platform dependent. Perhaps someday there will be a version of toc which does not compile to C, where that could be guaranteed.

+ +

To make declarations constant, use :: instead of :. e.g.

+ +

+x ::= 5+3;
+y :: float = 5.123; +

+ +

Here, "constant" means constant at compile time, not read-only as it does in C. One interesting thing about toc is that normal functions can run at compile time, so pretty much any expression is a valid initializer for a constant, e.g. doing x ::= some_function(); runs some_function at compile time, not at run time.

diff --git a/docs/00.md b/docs/00.md index e4e8d41..4998684 100644 --- a/docs/00.md +++ b/docs/00.md @@ -1,8 +1,8 @@ -## Declarations in toc +## Declarations -Declarations have the following syntax: +In toc, declarations have the following syntax: ``` - : [type] [= expression]; + :[:] [type] [= expression]; ``` The square brackets (`[]`) indicate something optional. @@ -41,10 +41,19 @@ Here are all of toc's builtin types and their ranges of values: - `u16` - 16-bit unsigned integer, 0 to 65535 - `u32` - 32-bit unsigned integer, 0 to 4294967295 - `u64` - 64-bit unsigned integer, 0 to 18446744073709551615 -- `float` - A 32-bit floating-point number, -- `f32` -- `f64` -- `bool` -- `char` +- `float` - A 32-bit floating-point number, -3.40282347e+38 to 3.40282347e+38 +- `f32` - A 32-bit floating-point number (same as `float`, but more explicit about the size) +- `f64` - A 64-bit floating-point number, -1.7976931348623157e+308 to 1.7976931348623157e+308 +- `bool` - A boolean value, either `false` or `true`. +- `char` - A character. The specific values are technically platform-dependent, but usually there are 256 of them. At the moment, it is not technically guaranteed that `f32`/`float` is actually 32-bit and that `f64` is actually 64-bit; they are platform dependent. Perhaps someday there will be a version of toc which does not compile to C, where that could be guaranteed. + +To make declarations constant, use `::` instead of `:`. e.g. + +``` +x ::= 5+3; +y :: float = 5.123; +``` + +Here, "constant" means constant at compile time, not read-only as it does in C. One interesting thing about toc is that normal functions can run at compile time, so pretty much any expression is a valid initializer for a constant, e.g. doing `x ::= some_function();` runs `some_function` at compile time, not at run time. diff --git a/docs/01.html b/docs/01.html new file mode 100644 index 0000000..633295b --- /dev/null +++ b/docs/01.html @@ -0,0 +1,18 @@ +

A first program

+ +

The main function in toc corresponds to the main function in C. This function is called when your program is run. So, this is a valid toc program which does nothing:

+ +

+main ::= fn() { +}; +

+ +

It declares a constant, main, which is a function with an empty body. Note that the syntax for declaring functions is the same as the syntax for declaring constants (it isn't something like fn main() { ... }).

+ +

Assuming you have compiled the compiler (see README.md for instructions about that), you can compile it with

+ +

+toc <your filename> +

+ +

You will get a file called out.c, which you can then put through your C compiler to get an executable file which does nothing. Congratulations! You've written your first toc program.

diff --git a/docs/01.md b/docs/01.md new file mode 100644 index 0000000..816f3e6 --- /dev/null +++ b/docs/01.md @@ -0,0 +1,18 @@ +### A first program + +The `main` function in toc corresponds to the `main` function in C. This function is called when your program is run. So, this is a valid toc program which does nothing: + +``` +main ::= fn() { +}; +``` + +It declares a constant, `main`, which is a function with an empty body. Note that the syntax for declaring functions is the same as the syntax for declaring constants (it isn't something like `fn main() { ... }`). + +Assuming you have compiled the compiler (see `README.md` for instructions about that), you can compile it with + +``` +toc +``` + +You will get a file called `out.c`, which you can then put through your C compiler to get an executable file which does nothing. Congratulations! You've written your first toc program. diff --git a/main.c b/main.c index 1ca547b..da2303e 100644 --- a/main.c +++ b/main.c @@ -122,10 +122,10 @@ int main(int argc, char **argv) { evalr_create(&ev, &tr, &main_allocr); typer_create(&tr, &ev, &main_allocr); - if (!block_enter(NULL, f.stmts, SCOPE_CHECK_REDECL)) /* enter global scope */ return false; + /* fprint_parsed_file(stdout, &f); */ if (!types_file(&tr, &f)) { err_fprint(TEXT_IMPORTANT("Errors occured while determining types.\n")); return EXIT_FAILURE; diff --git a/parse.c b/parse.c index a1b5950..d80afd1 100644 --- a/parse.c +++ b/parse.c @@ -170,6 +170,10 @@ static Keyword builtin_type_to_kw(BuiltinType t) { return KW_COUNT; } +/* TODO: DELME */ +static void fprint_expr(FILE *out, Expression *expr); + + /* returns the number of characters written, not including the null character */ static size_t type_to_str_(Type *t, char *buffer, size_t bufsize) { /* if ((t->flags & TYPE_IS_RESOLVED) && t->was_expr) { */ @@ -610,7 +614,7 @@ static bool parse_type(Parser *p, Type *type) { default: /* TYPE_EXPR */ if (parse_expr(p, type->expr = parser_new_expr(p), - expr_find_end(p, -1 /* end as soon as possible */))) { + expr_find_end(p, (ExprEndFlags)-1 /* end as soon as possible */))) { type->kind = TYPE_EXPR; } else { tokr_err(t, "Unrecognized type."); diff --git a/scope.c b/scope.c index 0774cfa..5416883 100644 --- a/scope.c +++ b/scope.c @@ -113,4 +113,11 @@ static bool each_enter(Expression *e) { static void each_exit(Expression *e) { assert(e->kind == EXPR_EACH); + EachExpr *ea = &e->each; + if (ea->index) { + arr_remove_last(&ea->index->decls); + } + if (ea->value) { + arr_remove_last(&ea->value->decls); + } } diff --git a/test.toc b/test.toc index c5aa6b5..e17d3a2 100644 --- a/test.toc +++ b/test.toc @@ -1,18 +1 @@ -puti ::= fn(x: int) { - #C("printf(\"%ld\\n\", (long)x); -"); -}; -putf ::= fn(x: float) { - #C("printf(\"%f\\n\", (double)x); -"); -}; - -Point ::= struct { - x, y : int; -}; - -main ::= fn() { - p : Point; - p.x = 5; - puti(p[if 5 > 6 { "x" } else { "y" }]); -}; \ No newline at end of file +x : [1]int; \ No newline at end of file diff --git a/tokenizer.c b/tokenizer.c index 241800f..4a57fd1 100644 --- a/tokenizer.c +++ b/tokenizer.c @@ -288,11 +288,12 @@ static bool tokenize_string(Tokenizer *t, char *str) { /* it's a numeric literal */ int base = 10; Floating decimal_pow10 = 0; - NumLiteral n; - n.kind = NUM_LITERAL_INT; - n.intval = 0; Token *token = tokr_add(t); tokr_put_location(t, token); + NumLiteral *n = &token->num; + n->kind = NUM_LITERAL_INT; + n->intval = 0; + if (*t->s == '0') { tokr_nextchar(t); /* octal/hexadecimal/binary (or zero) */ @@ -321,7 +322,7 @@ static bool tokenize_string(Tokenizer *t, char *str) { /* .. (not a decimal point; end the number here) */ break; } - if (n.kind == NUM_LITERAL_FLOAT) { + if (n->kind == NUM_LITERAL_FLOAT) { tokenization_err(t, "Double . in number."); goto err; } @@ -329,16 +330,16 @@ static bool tokenize_string(Tokenizer *t, char *str) { tokenization_err(t, "Decimal point in non base 10 number."); goto err; } - n.kind = NUM_LITERAL_FLOAT; + n->kind = NUM_LITERAL_FLOAT; decimal_pow10 = 0.1; - n.floatval = (Floating)n.intval; + n->floatval = (Floating)n->intval; tokr_nextchar(t); continue; } else if (*t->s == 'e') { tokr_nextchar(t); - if (n.kind == NUM_LITERAL_INT) { - n.kind = NUM_LITERAL_FLOAT; - n.floatval = (Floating)n.intval; + if (n->kind == NUM_LITERAL_INT) { + n->kind = NUM_LITERAL_FLOAT; + n->floatval = (Floating)n->intval; } /* TODO: check if exceeding maximum exponent */ int exponent = 0; @@ -357,9 +358,9 @@ static bool tokenize_string(Tokenizer *t, char *str) { /* OPTIM: Slow for very large exponents (unlikely to happen) */ for (int i = 0; i < exponent; i++) { if (negative_exponent) - n.floatval /= 10; + n->floatval /= 10; else - n.floatval *= 10; + n->floatval *= 10; } break; @@ -384,19 +385,19 @@ static bool tokenize_string(Tokenizer *t, char *str) { /* end of numeric literal */ break; } - switch (n.kind) { + switch (n->kind) { case NUM_LITERAL_INT: - if ((UInteger)n.intval > (UInteger)UINTEGER_MAX / (UInteger)base || - (UInteger)n.intval * (UInteger)base > (UInteger)UINTEGER_MAX - (UInteger)digit) { + if ((UInteger)n->intval > (UInteger)UINTEGER_MAX / (UInteger)base || + (UInteger)n->intval * (UInteger)base > (UInteger)UINTEGER_MAX - (UInteger)digit) { /* too big! */ tokenization_err(t, "Number too big to fit in a numeric literal."); goto err; } - n.intval *= (UInteger)base; - n.intval += (UInteger)digit; + n->intval *= (UInteger)base; + n->intval += (UInteger)digit; break; case NUM_LITERAL_FLOAT: - n.floatval += decimal_pow10 * (Floating)digit; + n->floatval += decimal_pow10 * (Floating)digit; decimal_pow10 /= 10; break; } @@ -404,7 +405,6 @@ static bool tokenize_string(Tokenizer *t, char *str) { } token->kind = TOKEN_LITERAL_NUM; - token->num = n; continue; } diff --git a/types.c b/types.c index 1af859e..ebe5b0d 100644 --- a/types.c +++ b/types.c @@ -452,7 +452,6 @@ static bool type_resolve(Typer *tr, Type *t, Location where) { Value val; Expression *n_expr = t->arr.n_expr; if (!types_expr(tr, n_expr)) return false; - if (n_expr->type.kind == TYPE_UNKNOWN) { err_print(n_expr->where, "Cannot determine type of array size at compile time."); return false; @@ -1134,7 +1133,7 @@ static bool types_expr(Typer *tr, Expression *e) { } } } - FnExpr *original_fn; + FnExpr *original_fn = NULL; Type table_index_type = {0}; Value table_index = {0}; FnExpr fn_copy; -- cgit v1.2.3