summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLeo Tenenbaum <pommicket@gmail.com>2019-12-07 18:21:03 -0500
committerLeo Tenenbaum <pommicket@gmail.com>2019-12-07 18:21:03 -0500
commit390f1e368cfdc5011e9eb9af76d2fb44cd8dc0b2 (patch)
treed299c8e4360a68038f575c16d8083275cb1046f0
parent9c44be7b25d61450808e918c14b8dfff49a78a8a (diff)
fixed something weird going on with the tokenizer that might be a bug in clang
-rw-r--r--README.html10
-rw-r--r--README.md1
-rwxr-xr-xbuild.sh4
-rw-r--r--cgen.c6
-rw-r--r--decls_cgen.c3
-rwxr-xr-xdocs.sh6
-rw-r--r--docs/00.html61
-rw-r--r--docs/00.md25
-rw-r--r--docs/01.html18
-rw-r--r--docs/01.md18
-rw-r--r--main.c2
-rw-r--r--parse.c6
-rw-r--r--scope.c7
-rw-r--r--test.toc19
-rw-r--r--tokenizer.c36
-rw-r--r--types.c3
16 files changed, 166 insertions, 59 deletions
diff --git a/README.html b/README.html
index d3476a5..fe1fd75 100644
--- a/README.html
+++ b/README.html
@@ -22,12 +22,10 @@ x : int; x = 5; // Declare x as an integer, then set it to 5.
</code></p>
<p><code>toc</code> is statically typed and has many of C's features, but
-it is just as fast in theory.</p>
+it is nearly as fast in theory.</p>
-<p>See <code>docs</code> for more information.</p>
+<p>See <code>docs</code> for more information (in progress).</p>
-<hr />
-
-<h3>Help</h3>
+<p><code>tests</code> has some test programs written in <code>toc</code>.</p>
-<p>If you find a bug in <code>toc</code>, please <a href="https://github.com/pommicket/toc/issues">report an issue</a> on GitHub.</p>
+<p>To compile the compiler on a Unix-y system, use</p>
diff --git a/README.md b/README.md
index 0447208..4cbe8a7 100644
--- a/README.md
+++ b/README.md
@@ -28,3 +28,4 @@ See `docs` for more information (in progress).
`tests` has some test programs written in `toc`.
+To compile the compiler on a Unix-y system, use
diff --git a/build.sh b/build.sh
index abce67c..da042bd 100755
--- a/build.sh
+++ b/build.sh
@@ -11,8 +11,10 @@ ADDITIONAL_FLAGS="$CFLAGS -Wno-unused-function"
if [ "$CC" = "clang" ]; then
WARNINGS='-Wall -Wextra -Wpedantic -Wshadow -Wconversion -Wimplicit-fallthrough'
-else
+elif [ "$CC" = "gcc" ]; then
WARNINGS='-Wall -Wextra -Wpedantic -Wshadow -Wconversion -Wno-pointer-to-int-cast'
+else
+ WARNINGS=''
fi
DEBUG_FLAGS="-O0 -g3 $WARNINGS -std=c11 -DTOC_DEBUG"
diff --git a/cgen.c b/cgen.c
index b40870f..22b9fa7 100644
--- a/cgen.c
+++ b/cgen.c
@@ -1747,8 +1747,10 @@ static bool cgen_decl(CGenerator *g, Declaration *d) {
/* don't generate function pointer declaration for constant fns */
continue;
}
- if (!cgen_val_pre(g, *val, type, d->where))
- return false;
+ if (has_expr) {
+ if (!cgen_val_pre(g, *val, type, d->where))
+ return false;
+ }
if (g->block != NULL)
cgen_write(g, "static ");
if (!cgen_type_pre(g, type, d->where)) return false;
diff --git a/decls_cgen.c b/decls_cgen.c
index 3fe21bd..a3d35d2 100644
--- a/decls_cgen.c
+++ b/decls_cgen.c
@@ -10,8 +10,7 @@ static bool cgen_decls_decl(CGenerator *g, Declaration *d);
static bool cgen_decls_fn_instances(CGenerator *g, Expression *e) {
assert(e->kind == EXPR_FN);
FnExpr *f = &e->fn;
- FnType *type = &e->type.fn;
- assert(type->constness);
+ assert(e->type.fn.constness);
Instance **data = f->instances.data;
for (U64 i = 0; i < f->instances.cap; i++) {
if (f->instances.occupied[i]) {
diff --git a/docs.sh b/docs.sh
new file mode 100755
index 0000000..81cde33
--- /dev/null
+++ b/docs.sh
@@ -0,0 +1,6 @@
+#!/bin/bash
+markdown README.md > README.html
+for x in docs/*.md; do
+ echo $x
+ markdown $x > $(dirname $x)/$(basename $x .md).html
+done
diff --git a/docs/00.html b/docs/00.html
new file mode 100644
index 0000000..cd0c352
--- /dev/null
+++ b/docs/00.html
@@ -0,0 +1,61 @@
+<h2>Declarations</h2>
+
+<p>In toc, declarations have the following syntax:
+<code>
+&lt;name&gt; :[:] [type] [= expression];
+</code></p>
+
+<p>The square brackets (<code>[]</code>) indicate something optional.</p>
+
+<p>All of the following statements
+declare an new variable <code>x</code> which is an integer, and has a value of 0:
+<code>
+x : int;
+x : int = 0;
+x := 0;
+</code>
+Note that in the first of those statements, although no expression
+is specified, it defaults to 0. This is not true in C,
+and there will eventually probably be an option to
+leave <code>x</code> uninitialized.</p>
+
+<p>If you wanted x to be a floating-point number, you could use:
+<code>
+x : float;
+x : float = 0;
+x := 0.0;
+</code></p>
+
+<p>Note that <code>0</code> can be used as both a <code>float</code> and an <code>int</code>eger, but
+when no type is specified, it defaults to an <code>int</code>, whereas <code>0.0</code>
+defaults to a <code>float</code>.</p>
+
+<p>Here are all of toc's builtin types and their ranges of values:</p>
+
+<ul>
+<li><code>int</code> - A 64-bit signed integer (always), -9223372036854775808 to 9223372036854775807</li>
+<li><code>i8</code> - An 8-bit signed integer, -128 to 128</li>
+<li><code>i16</code> - 16-bit signed integer, -32768 to 32767</li>
+<li><code>i32</code> - 32-bit signed integer, -2147483648 to 2147483647</li>
+<li><code>i64</code> - 64-bit signed integer (same as <code>int</code>, but more explicit about the size), -9223372036854775808 to 9223372036854775807</li>
+<li><code>u8</code> - An 8-bit unsigned integer, 0 to 255</li>
+<li><code>u16</code> - 16-bit unsigned integer, 0 to 65535</li>
+<li><code>u32</code> - 32-bit unsigned integer, 0 to 4294967295</li>
+<li><code>u64</code> - 64-bit unsigned integer, 0 to 18446744073709551615</li>
+<li><code>float</code> - A 32-bit floating-point number, -3.40282347e+38 to 3.40282347e+38</li>
+<li><code>f32</code> - A 32-bit floating-point number (same as <code>float</code>, but more explicit about the size)</li>
+<li><code>f64</code> - A 64-bit floating-point number, -1.7976931348623157e+308 to 1.7976931348623157e+308</li>
+<li><code>bool</code> - A boolean value, either <code>false</code> or <code>true</code>.</li>
+<li><code>char</code> - A character. The specific values are technically platform-dependent, but usually there are 256 of them.</li>
+</ul>
+
+<p>At the moment, it is not technically guaranteed that <code>f32</code>/<code>float</code> is actually 32-bit and that <code>f64</code> is actually 64-bit; they are platform dependent. Perhaps someday there will be a version of toc which does not compile to C, where that could be guaranteed.</p>
+
+<p>To make declarations constant, use <code>::</code> instead of <code>:</code>. e.g.</p>
+
+<p><code>
+x ::= 5+3; <br />
+y :: float = 5.123;
+</code></p>
+
+<p>Here, "constant" means constant at compile time, not read-only as it does in C. One interesting thing about toc is that normal functions can run at compile time, so pretty much any expression is a valid initializer for a constant, e.g. doing <code>x ::= some_function();</code> runs <code>some_function</code> at compile time, not at run time.</p>
diff --git a/docs/00.md b/docs/00.md
index e4e8d41..4998684 100644
--- a/docs/00.md
+++ b/docs/00.md
@@ -1,8 +1,8 @@
-## Declarations in toc
+## Declarations
-Declarations have the following syntax:
+In toc, declarations have the following syntax:
```
-<name> : [type] [= expression];
+<name> :[:] [type] [= expression];
```
The square brackets (`[]`) indicate something optional.
@@ -41,10 +41,19 @@ Here are all of toc's builtin types and their ranges of values:
- `u16` - 16-bit unsigned integer, 0 to 65535
- `u32` - 32-bit unsigned integer, 0 to 4294967295
- `u64` - 64-bit unsigned integer, 0 to 18446744073709551615
-- `float` - A 32-bit floating-point number,
-- `f32`
-- `f64`
-- `bool`
-- `char`
+- `float` - A 32-bit floating-point number, -3.40282347e+38 to 3.40282347e+38
+- `f32` - A 32-bit floating-point number (same as `float`, but more explicit about the size)
+- `f64` - A 64-bit floating-point number, -1.7976931348623157e+308 to 1.7976931348623157e+308
+- `bool` - A boolean value, either `false` or `true`.
+- `char` - A character. The specific values are technically platform-dependent, but usually there are 256 of them.
At the moment, it is not technically guaranteed that `f32`/`float` is actually 32-bit and that `f64` is actually 64-bit; they are platform dependent. Perhaps someday there will be a version of toc which does not compile to C, where that could be guaranteed.
+
+To make declarations constant, use `::` instead of `:`. e.g.
+
+```
+x ::= 5+3;
+y :: float = 5.123;
+```
+
+Here, "constant" means constant at compile time, not read-only as it does in C. One interesting thing about toc is that normal functions can run at compile time, so pretty much any expression is a valid initializer for a constant, e.g. doing `x ::= some_function();` runs `some_function` at compile time, not at run time.
diff --git a/docs/01.html b/docs/01.html
new file mode 100644
index 0000000..633295b
--- /dev/null
+++ b/docs/01.html
@@ -0,0 +1,18 @@
+<h3>A first program</h3>
+
+<p>The <code>main</code> function in toc corresponds to the <code>main</code> function in C. This function is called when your program is run. So, this is a valid toc program which does nothing:</p>
+
+<p><code>
+main ::= fn() {
+};
+</code></p>
+
+<p>It declares a constant, <code>main</code>, which is a function with an empty body. Note that the syntax for declaring functions is the same as the syntax for declaring constants (it isn't something like <code>fn main() { ... }</code>).</p>
+
+<p>Assuming you have compiled the compiler (see <code>README.md</code> for instructions about that), you can compile it with</p>
+
+<p><code>
+toc &lt;your filename&gt;
+</code></p>
+
+<p>You will get a file called <code>out.c</code>, which you can then put through your C compiler to get an executable file which does nothing. Congratulations! You've written your first toc program.</p>
diff --git a/docs/01.md b/docs/01.md
new file mode 100644
index 0000000..816f3e6
--- /dev/null
+++ b/docs/01.md
@@ -0,0 +1,18 @@
+### A first program
+
+The `main` function in toc corresponds to the `main` function in C. This function is called when your program is run. So, this is a valid toc program which does nothing:
+
+```
+main ::= fn() {
+};
+```
+
+It declares a constant, `main`, which is a function with an empty body. Note that the syntax for declaring functions is the same as the syntax for declaring constants (it isn't something like `fn main() { ... }`).
+
+Assuming you have compiled the compiler (see `README.md` for instructions about that), you can compile it with
+
+```
+toc <your filename>
+```
+
+You will get a file called `out.c`, which you can then put through your C compiler to get an executable file which does nothing. Congratulations! You've written your first toc program.
diff --git a/main.c b/main.c
index 1ca547b..da2303e 100644
--- a/main.c
+++ b/main.c
@@ -122,10 +122,10 @@ int main(int argc, char **argv) {
evalr_create(&ev, &tr, &main_allocr);
typer_create(&tr, &ev, &main_allocr);
-
if (!block_enter(NULL, f.stmts, SCOPE_CHECK_REDECL)) /* enter global scope */
return false;
+ /* fprint_parsed_file(stdout, &f); */
if (!types_file(&tr, &f)) {
err_fprint(TEXT_IMPORTANT("Errors occured while determining types.\n"));
return EXIT_FAILURE;
diff --git a/parse.c b/parse.c
index a1b5950..d80afd1 100644
--- a/parse.c
+++ b/parse.c
@@ -170,6 +170,10 @@ static Keyword builtin_type_to_kw(BuiltinType t) {
return KW_COUNT;
}
+/* TODO: DELME */
+static void fprint_expr(FILE *out, Expression *expr);
+
+
/* returns the number of characters written, not including the null character */
static size_t type_to_str_(Type *t, char *buffer, size_t bufsize) {
/* if ((t->flags & TYPE_IS_RESOLVED) && t->was_expr) { */
@@ -610,7 +614,7 @@ static bool parse_type(Parser *p, Type *type) {
default:
/* TYPE_EXPR */
if (parse_expr(p, type->expr = parser_new_expr(p),
- expr_find_end(p, -1 /* end as soon as possible */))) {
+ expr_find_end(p, (ExprEndFlags)-1 /* end as soon as possible */))) {
type->kind = TYPE_EXPR;
} else {
tokr_err(t, "Unrecognized type.");
diff --git a/scope.c b/scope.c
index 0774cfa..5416883 100644
--- a/scope.c
+++ b/scope.c
@@ -113,4 +113,11 @@ static bool each_enter(Expression *e) {
static void each_exit(Expression *e) {
assert(e->kind == EXPR_EACH);
+ EachExpr *ea = &e->each;
+ if (ea->index) {
+ arr_remove_last(&ea->index->decls);
+ }
+ if (ea->value) {
+ arr_remove_last(&ea->value->decls);
+ }
}
diff --git a/test.toc b/test.toc
index c5aa6b5..e17d3a2 100644
--- a/test.toc
+++ b/test.toc
@@ -1,18 +1 @@
-puti ::= fn(x: int) {
- #C("printf(\"%ld\\n\", (long)x);
-");
-};
-putf ::= fn(x: float) {
- #C("printf(\"%f\\n\", (double)x);
-");
-};
-
-Point ::= struct {
- x, y : int;
-};
-
-main ::= fn() {
- p : Point;
- p.x = 5;
- puti(p[if 5 > 6 { "x" } else { "y" }]);
-}; \ No newline at end of file
+x : [1]int; \ No newline at end of file
diff --git a/tokenizer.c b/tokenizer.c
index 241800f..4a57fd1 100644
--- a/tokenizer.c
+++ b/tokenizer.c
@@ -288,11 +288,12 @@ static bool tokenize_string(Tokenizer *t, char *str) {
/* it's a numeric literal */
int base = 10;
Floating decimal_pow10 = 0;
- NumLiteral n;
- n.kind = NUM_LITERAL_INT;
- n.intval = 0;
Token *token = tokr_add(t);
tokr_put_location(t, token);
+ NumLiteral *n = &token->num;
+ n->kind = NUM_LITERAL_INT;
+ n->intval = 0;
+
if (*t->s == '0') {
tokr_nextchar(t);
/* octal/hexadecimal/binary (or zero) */
@@ -321,7 +322,7 @@ static bool tokenize_string(Tokenizer *t, char *str) {
/* .. (not a decimal point; end the number here) */
break;
}
- if (n.kind == NUM_LITERAL_FLOAT) {
+ if (n->kind == NUM_LITERAL_FLOAT) {
tokenization_err(t, "Double . in number.");
goto err;
}
@@ -329,16 +330,16 @@ static bool tokenize_string(Tokenizer *t, char *str) {
tokenization_err(t, "Decimal point in non base 10 number.");
goto err;
}
- n.kind = NUM_LITERAL_FLOAT;
+ n->kind = NUM_LITERAL_FLOAT;
decimal_pow10 = 0.1;
- n.floatval = (Floating)n.intval;
+ n->floatval = (Floating)n->intval;
tokr_nextchar(t);
continue;
} else if (*t->s == 'e') {
tokr_nextchar(t);
- if (n.kind == NUM_LITERAL_INT) {
- n.kind = NUM_LITERAL_FLOAT;
- n.floatval = (Floating)n.intval;
+ if (n->kind == NUM_LITERAL_INT) {
+ n->kind = NUM_LITERAL_FLOAT;
+ n->floatval = (Floating)n->intval;
}
/* TODO: check if exceeding maximum exponent */
int exponent = 0;
@@ -357,9 +358,9 @@ static bool tokenize_string(Tokenizer *t, char *str) {
/* OPTIM: Slow for very large exponents (unlikely to happen) */
for (int i = 0; i < exponent; i++) {
if (negative_exponent)
- n.floatval /= 10;
+ n->floatval /= 10;
else
- n.floatval *= 10;
+ n->floatval *= 10;
}
break;
@@ -384,19 +385,19 @@ static bool tokenize_string(Tokenizer *t, char *str) {
/* end of numeric literal */
break;
}
- switch (n.kind) {
+ switch (n->kind) {
case NUM_LITERAL_INT:
- if ((UInteger)n.intval > (UInteger)UINTEGER_MAX / (UInteger)base ||
- (UInteger)n.intval * (UInteger)base > (UInteger)UINTEGER_MAX - (UInteger)digit) {
+ if ((UInteger)n->intval > (UInteger)UINTEGER_MAX / (UInteger)base ||
+ (UInteger)n->intval * (UInteger)base > (UInteger)UINTEGER_MAX - (UInteger)digit) {
/* too big! */
tokenization_err(t, "Number too big to fit in a numeric literal.");
goto err;
}
- n.intval *= (UInteger)base;
- n.intval += (UInteger)digit;
+ n->intval *= (UInteger)base;
+ n->intval += (UInteger)digit;
break;
case NUM_LITERAL_FLOAT:
- n.floatval += decimal_pow10 * (Floating)digit;
+ n->floatval += decimal_pow10 * (Floating)digit;
decimal_pow10 /= 10;
break;
}
@@ -404,7 +405,6 @@ static bool tokenize_string(Tokenizer *t, char *str) {
}
token->kind = TOKEN_LITERAL_NUM;
- token->num = n;
continue;
}
diff --git a/types.c b/types.c
index 1af859e..ebe5b0d 100644
--- a/types.c
+++ b/types.c
@@ -452,7 +452,6 @@ static bool type_resolve(Typer *tr, Type *t, Location where) {
Value val;
Expression *n_expr = t->arr.n_expr;
if (!types_expr(tr, n_expr)) return false;
-
if (n_expr->type.kind == TYPE_UNKNOWN) {
err_print(n_expr->where, "Cannot determine type of array size at compile time.");
return false;
@@ -1134,7 +1133,7 @@ static bool types_expr(Typer *tr, Expression *e) {
}
}
}
- FnExpr *original_fn;
+ FnExpr *original_fn = NULL;
Type table_index_type = {0};
Value table_index = {0};
FnExpr fn_copy;