diff options
author | Leo Tenenbaum <pommicket@gmail.com> | 2019-12-07 18:21:03 -0500 |
---|---|---|
committer | Leo Tenenbaum <pommicket@gmail.com> | 2019-12-07 18:21:03 -0500 |
commit | 390f1e368cfdc5011e9eb9af76d2fb44cd8dc0b2 (patch) | |
tree | d299c8e4360a68038f575c16d8083275cb1046f0 | |
parent | 9c44be7b25d61450808e918c14b8dfff49a78a8a (diff) |
fixed something weird going on with the tokenizer that might be a bug in clang
-rw-r--r-- | README.html | 10 | ||||
-rw-r--r-- | README.md | 1 | ||||
-rwxr-xr-x | build.sh | 4 | ||||
-rw-r--r-- | cgen.c | 6 | ||||
-rw-r--r-- | decls_cgen.c | 3 | ||||
-rwxr-xr-x | docs.sh | 6 | ||||
-rw-r--r-- | docs/00.html | 61 | ||||
-rw-r--r-- | docs/00.md | 25 | ||||
-rw-r--r-- | docs/01.html | 18 | ||||
-rw-r--r-- | docs/01.md | 18 | ||||
-rw-r--r-- | main.c | 2 | ||||
-rw-r--r-- | parse.c | 6 | ||||
-rw-r--r-- | scope.c | 7 | ||||
-rw-r--r-- | test.toc | 19 | ||||
-rw-r--r-- | tokenizer.c | 36 | ||||
-rw-r--r-- | types.c | 3 |
16 files changed, 166 insertions, 59 deletions
diff --git a/README.html b/README.html index d3476a5..fe1fd75 100644 --- a/README.html +++ b/README.html @@ -22,12 +22,10 @@ x : int; x = 5; // Declare x as an integer, then set it to 5. </code></p> <p><code>toc</code> is statically typed and has many of C's features, but -it is just as fast in theory.</p> +it is nearly as fast in theory.</p> -<p>See <code>docs</code> for more information.</p> +<p>See <code>docs</code> for more information (in progress).</p> -<hr /> - -<h3>Help</h3> +<p><code>tests</code> has some test programs written in <code>toc</code>.</p> -<p>If you find a bug in <code>toc</code>, please <a href="https://github.com/pommicket/toc/issues">report an issue</a> on GitHub.</p> +<p>To compile the compiler on a Unix-y system, use</p> @@ -28,3 +28,4 @@ See `docs` for more information (in progress). `tests` has some test programs written in `toc`. +To compile the compiler on a Unix-y system, use @@ -11,8 +11,10 @@ ADDITIONAL_FLAGS="$CFLAGS -Wno-unused-function" if [ "$CC" = "clang" ]; then WARNINGS='-Wall -Wextra -Wpedantic -Wshadow -Wconversion -Wimplicit-fallthrough' -else +elif [ "$CC" = "gcc" ]; then WARNINGS='-Wall -Wextra -Wpedantic -Wshadow -Wconversion -Wno-pointer-to-int-cast' +else + WARNINGS='' fi DEBUG_FLAGS="-O0 -g3 $WARNINGS -std=c11 -DTOC_DEBUG" @@ -1747,8 +1747,10 @@ static bool cgen_decl(CGenerator *g, Declaration *d) { /* don't generate function pointer declaration for constant fns */ continue; } - if (!cgen_val_pre(g, *val, type, d->where)) - return false; + if (has_expr) { + if (!cgen_val_pre(g, *val, type, d->where)) + return false; + } if (g->block != NULL) cgen_write(g, "static "); if (!cgen_type_pre(g, type, d->where)) return false; diff --git a/decls_cgen.c b/decls_cgen.c index 3fe21bd..a3d35d2 100644 --- a/decls_cgen.c +++ b/decls_cgen.c @@ -10,8 +10,7 @@ static bool cgen_decls_decl(CGenerator *g, Declaration *d); static bool cgen_decls_fn_instances(CGenerator *g, Expression *e) { assert(e->kind == EXPR_FN); FnExpr *f = &e->fn; - FnType *type = &e->type.fn; - assert(type->constness); + assert(e->type.fn.constness); Instance **data = f->instances.data; for (U64 i = 0; i < f->instances.cap; i++) { if (f->instances.occupied[i]) { @@ -0,0 +1,6 @@ +#!/bin/bash +markdown README.md > README.html +for x in docs/*.md; do + echo $x + markdown $x > $(dirname $x)/$(basename $x .md).html +done diff --git a/docs/00.html b/docs/00.html new file mode 100644 index 0000000..cd0c352 --- /dev/null +++ b/docs/00.html @@ -0,0 +1,61 @@ +<h2>Declarations</h2> + +<p>In toc, declarations have the following syntax: +<code> +<name> :[:] [type] [= expression]; +</code></p> + +<p>The square brackets (<code>[]</code>) indicate something optional.</p> + +<p>All of the following statements +declare an new variable <code>x</code> which is an integer, and has a value of 0: +<code> +x : int; +x : int = 0; +x := 0; +</code> +Note that in the first of those statements, although no expression +is specified, it defaults to 0. This is not true in C, +and there will eventually probably be an option to +leave <code>x</code> uninitialized.</p> + +<p>If you wanted x to be a floating-point number, you could use: +<code> +x : float; +x : float = 0; +x := 0.0; +</code></p> + +<p>Note that <code>0</code> can be used as both a <code>float</code> and an <code>int</code>eger, but +when no type is specified, it defaults to an <code>int</code>, whereas <code>0.0</code> +defaults to a <code>float</code>.</p> + +<p>Here are all of toc's builtin types and their ranges of values:</p> + +<ul> +<li><code>int</code> - A 64-bit signed integer (always), -9223372036854775808 to 9223372036854775807</li> +<li><code>i8</code> - An 8-bit signed integer, -128 to 128</li> +<li><code>i16</code> - 16-bit signed integer, -32768 to 32767</li> +<li><code>i32</code> - 32-bit signed integer, -2147483648 to 2147483647</li> +<li><code>i64</code> - 64-bit signed integer (same as <code>int</code>, but more explicit about the size), -9223372036854775808 to 9223372036854775807</li> +<li><code>u8</code> - An 8-bit unsigned integer, 0 to 255</li> +<li><code>u16</code> - 16-bit unsigned integer, 0 to 65535</li> +<li><code>u32</code> - 32-bit unsigned integer, 0 to 4294967295</li> +<li><code>u64</code> - 64-bit unsigned integer, 0 to 18446744073709551615</li> +<li><code>float</code> - A 32-bit floating-point number, -3.40282347e+38 to 3.40282347e+38</li> +<li><code>f32</code> - A 32-bit floating-point number (same as <code>float</code>, but more explicit about the size)</li> +<li><code>f64</code> - A 64-bit floating-point number, -1.7976931348623157e+308 to 1.7976931348623157e+308</li> +<li><code>bool</code> - A boolean value, either <code>false</code> or <code>true</code>.</li> +<li><code>char</code> - A character. The specific values are technically platform-dependent, but usually there are 256 of them.</li> +</ul> + +<p>At the moment, it is not technically guaranteed that <code>f32</code>/<code>float</code> is actually 32-bit and that <code>f64</code> is actually 64-bit; they are platform dependent. Perhaps someday there will be a version of toc which does not compile to C, where that could be guaranteed.</p> + +<p>To make declarations constant, use <code>::</code> instead of <code>:</code>. e.g.</p> + +<p><code> +x ::= 5+3; <br /> +y :: float = 5.123; +</code></p> + +<p>Here, "constant" means constant at compile time, not read-only as it does in C. One interesting thing about toc is that normal functions can run at compile time, so pretty much any expression is a valid initializer for a constant, e.g. doing <code>x ::= some_function();</code> runs <code>some_function</code> at compile time, not at run time.</p> @@ -1,8 +1,8 @@ -## Declarations in toc +## Declarations -Declarations have the following syntax: +In toc, declarations have the following syntax: ``` -<name> : [type] [= expression]; +<name> :[:] [type] [= expression]; ``` The square brackets (`[]`) indicate something optional. @@ -41,10 +41,19 @@ Here are all of toc's builtin types and their ranges of values: - `u16` - 16-bit unsigned integer, 0 to 65535 - `u32` - 32-bit unsigned integer, 0 to 4294967295 - `u64` - 64-bit unsigned integer, 0 to 18446744073709551615 -- `float` - A 32-bit floating-point number, -- `f32` -- `f64` -- `bool` -- `char` +- `float` - A 32-bit floating-point number, -3.40282347e+38 to 3.40282347e+38 +- `f32` - A 32-bit floating-point number (same as `float`, but more explicit about the size) +- `f64` - A 64-bit floating-point number, -1.7976931348623157e+308 to 1.7976931348623157e+308 +- `bool` - A boolean value, either `false` or `true`. +- `char` - A character. The specific values are technically platform-dependent, but usually there are 256 of them. At the moment, it is not technically guaranteed that `f32`/`float` is actually 32-bit and that `f64` is actually 64-bit; they are platform dependent. Perhaps someday there will be a version of toc which does not compile to C, where that could be guaranteed. + +To make declarations constant, use `::` instead of `:`. e.g. + +``` +x ::= 5+3; +y :: float = 5.123; +``` + +Here, "constant" means constant at compile time, not read-only as it does in C. One interesting thing about toc is that normal functions can run at compile time, so pretty much any expression is a valid initializer for a constant, e.g. doing `x ::= some_function();` runs `some_function` at compile time, not at run time. diff --git a/docs/01.html b/docs/01.html new file mode 100644 index 0000000..633295b --- /dev/null +++ b/docs/01.html @@ -0,0 +1,18 @@ +<h3>A first program</h3> + +<p>The <code>main</code> function in toc corresponds to the <code>main</code> function in C. This function is called when your program is run. So, this is a valid toc program which does nothing:</p> + +<p><code> +main ::= fn() { +}; +</code></p> + +<p>It declares a constant, <code>main</code>, which is a function with an empty body. Note that the syntax for declaring functions is the same as the syntax for declaring constants (it isn't something like <code>fn main() { ... }</code>).</p> + +<p>Assuming you have compiled the compiler (see <code>README.md</code> for instructions about that), you can compile it with</p> + +<p><code> +toc <your filename> +</code></p> + +<p>You will get a file called <code>out.c</code>, which you can then put through your C compiler to get an executable file which does nothing. Congratulations! You've written your first toc program.</p> diff --git a/docs/01.md b/docs/01.md new file mode 100644 index 0000000..816f3e6 --- /dev/null +++ b/docs/01.md @@ -0,0 +1,18 @@ +### A first program + +The `main` function in toc corresponds to the `main` function in C. This function is called when your program is run. So, this is a valid toc program which does nothing: + +``` +main ::= fn() { +}; +``` + +It declares a constant, `main`, which is a function with an empty body. Note that the syntax for declaring functions is the same as the syntax for declaring constants (it isn't something like `fn main() { ... }`). + +Assuming you have compiled the compiler (see `README.md` for instructions about that), you can compile it with + +``` +toc <your filename> +``` + +You will get a file called `out.c`, which you can then put through your C compiler to get an executable file which does nothing. Congratulations! You've written your first toc program. @@ -122,10 +122,10 @@ int main(int argc, char **argv) { evalr_create(&ev, &tr, &main_allocr); typer_create(&tr, &ev, &main_allocr); - if (!block_enter(NULL, f.stmts, SCOPE_CHECK_REDECL)) /* enter global scope */ return false; + /* fprint_parsed_file(stdout, &f); */ if (!types_file(&tr, &f)) { err_fprint(TEXT_IMPORTANT("Errors occured while determining types.\n")); return EXIT_FAILURE; @@ -170,6 +170,10 @@ static Keyword builtin_type_to_kw(BuiltinType t) { return KW_COUNT; } +/* TODO: DELME */ +static void fprint_expr(FILE *out, Expression *expr); + + /* returns the number of characters written, not including the null character */ static size_t type_to_str_(Type *t, char *buffer, size_t bufsize) { /* if ((t->flags & TYPE_IS_RESOLVED) && t->was_expr) { */ @@ -610,7 +614,7 @@ static bool parse_type(Parser *p, Type *type) { default: /* TYPE_EXPR */ if (parse_expr(p, type->expr = parser_new_expr(p), - expr_find_end(p, -1 /* end as soon as possible */))) { + expr_find_end(p, (ExprEndFlags)-1 /* end as soon as possible */))) { type->kind = TYPE_EXPR; } else { tokr_err(t, "Unrecognized type."); @@ -113,4 +113,11 @@ static bool each_enter(Expression *e) { static void each_exit(Expression *e) { assert(e->kind == EXPR_EACH); + EachExpr *ea = &e->each; + if (ea->index) { + arr_remove_last(&ea->index->decls); + } + if (ea->value) { + arr_remove_last(&ea->value->decls); + } } @@ -1,18 +1 @@ -puti ::= fn(x: int) { - #C("printf(\"%ld\\n\", (long)x); -"); -}; -putf ::= fn(x: float) { - #C("printf(\"%f\\n\", (double)x); -"); -}; - -Point ::= struct { - x, y : int; -}; - -main ::= fn() { - p : Point; - p.x = 5; - puti(p[if 5 > 6 { "x" } else { "y" }]); -};
\ No newline at end of file +x : [1]int;
\ No newline at end of file diff --git a/tokenizer.c b/tokenizer.c index 241800f..4a57fd1 100644 --- a/tokenizer.c +++ b/tokenizer.c @@ -288,11 +288,12 @@ static bool tokenize_string(Tokenizer *t, char *str) { /* it's a numeric literal */ int base = 10; Floating decimal_pow10 = 0; - NumLiteral n; - n.kind = NUM_LITERAL_INT; - n.intval = 0; Token *token = tokr_add(t); tokr_put_location(t, token); + NumLiteral *n = &token->num; + n->kind = NUM_LITERAL_INT; + n->intval = 0; + if (*t->s == '0') { tokr_nextchar(t); /* octal/hexadecimal/binary (or zero) */ @@ -321,7 +322,7 @@ static bool tokenize_string(Tokenizer *t, char *str) { /* .. (not a decimal point; end the number here) */ break; } - if (n.kind == NUM_LITERAL_FLOAT) { + if (n->kind == NUM_LITERAL_FLOAT) { tokenization_err(t, "Double . in number."); goto err; } @@ -329,16 +330,16 @@ static bool tokenize_string(Tokenizer *t, char *str) { tokenization_err(t, "Decimal point in non base 10 number."); goto err; } - n.kind = NUM_LITERAL_FLOAT; + n->kind = NUM_LITERAL_FLOAT; decimal_pow10 = 0.1; - n.floatval = (Floating)n.intval; + n->floatval = (Floating)n->intval; tokr_nextchar(t); continue; } else if (*t->s == 'e') { tokr_nextchar(t); - if (n.kind == NUM_LITERAL_INT) { - n.kind = NUM_LITERAL_FLOAT; - n.floatval = (Floating)n.intval; + if (n->kind == NUM_LITERAL_INT) { + n->kind = NUM_LITERAL_FLOAT; + n->floatval = (Floating)n->intval; } /* TODO: check if exceeding maximum exponent */ int exponent = 0; @@ -357,9 +358,9 @@ static bool tokenize_string(Tokenizer *t, char *str) { /* OPTIM: Slow for very large exponents (unlikely to happen) */ for (int i = 0; i < exponent; i++) { if (negative_exponent) - n.floatval /= 10; + n->floatval /= 10; else - n.floatval *= 10; + n->floatval *= 10; } break; @@ -384,19 +385,19 @@ static bool tokenize_string(Tokenizer *t, char *str) { /* end of numeric literal */ break; } - switch (n.kind) { + switch (n->kind) { case NUM_LITERAL_INT: - if ((UInteger)n.intval > (UInteger)UINTEGER_MAX / (UInteger)base || - (UInteger)n.intval * (UInteger)base > (UInteger)UINTEGER_MAX - (UInteger)digit) { + if ((UInteger)n->intval > (UInteger)UINTEGER_MAX / (UInteger)base || + (UInteger)n->intval * (UInteger)base > (UInteger)UINTEGER_MAX - (UInteger)digit) { /* too big! */ tokenization_err(t, "Number too big to fit in a numeric literal."); goto err; } - n.intval *= (UInteger)base; - n.intval += (UInteger)digit; + n->intval *= (UInteger)base; + n->intval += (UInteger)digit; break; case NUM_LITERAL_FLOAT: - n.floatval += decimal_pow10 * (Floating)digit; + n->floatval += decimal_pow10 * (Floating)digit; decimal_pow10 /= 10; break; } @@ -404,7 +405,6 @@ static bool tokenize_string(Tokenizer *t, char *str) { } token->kind = TOKEN_LITERAL_NUM; - token->num = n; continue; } @@ -452,7 +452,6 @@ static bool type_resolve(Typer *tr, Type *t, Location where) { Value val; Expression *n_expr = t->arr.n_expr; if (!types_expr(tr, n_expr)) return false; - if (n_expr->type.kind == TYPE_UNKNOWN) { err_print(n_expr->where, "Cannot determine type of array size at compile time."); return false; @@ -1134,7 +1133,7 @@ static bool types_expr(Typer *tr, Expression *e) { } } } - FnExpr *original_fn; + FnExpr *original_fn = NULL; Type table_index_type = {0}; Value table_index = {0}; FnExpr fn_copy; |