From 390f1e368cfdc5011e9eb9af76d2fb44cd8dc0b2 Mon Sep 17 00:00:00 2001
From: Leo Tenenbaum
Date: Sat, 7 Dec 2019 18:21:03 -0500
Subject: fixed something weird going on with the tokenizer that might be a bug
in clang
---
README.html | 10 ++++------
README.md | 1 +
build.sh | 4 +++-
cgen.c | 6 ++++--
decls_cgen.c | 3 +--
docs.sh | 6 ++++++
docs/00.html | 61 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
docs/00.md | 25 +++++++++++++++++--------
docs/01.html | 18 ++++++++++++++++++
docs/01.md | 18 ++++++++++++++++++
main.c | 2 +-
parse.c | 6 +++++-
scope.c | 7 +++++++
test.toc | 19 +------------------
tokenizer.c | 36 +++++++++++++++++------------------
types.c | 3 +--
16 files changed, 166 insertions(+), 59 deletions(-)
create mode 100755 docs.sh
create mode 100644 docs/00.html
create mode 100644 docs/01.html
create mode 100644 docs/01.md
diff --git a/README.html b/README.html
index d3476a5..fe1fd75 100644
--- a/README.html
+++ b/README.html
@@ -22,12 +22,10 @@ x : int; x = 5; // Declare x as an integer, then set it to 5.
toc
is statically typed and has many of C's features, but
-it is just as fast in theory.
+it is nearly as fast in theory.
-See docs
for more information.
+See docs
for more information (in progress).
-
-
-Help
+tests
has some test programs written in toc
.
-If you find a bug in toc
, please report an issue on GitHub.
+To compile the compiler on a Unix-y system, use
diff --git a/README.md b/README.md
index 0447208..4cbe8a7 100644
--- a/README.md
+++ b/README.md
@@ -28,3 +28,4 @@ See `docs` for more information (in progress).
`tests` has some test programs written in `toc`.
+To compile the compiler on a Unix-y system, use
diff --git a/build.sh b/build.sh
index abce67c..da042bd 100755
--- a/build.sh
+++ b/build.sh
@@ -11,8 +11,10 @@ ADDITIONAL_FLAGS="$CFLAGS -Wno-unused-function"
if [ "$CC" = "clang" ]; then
WARNINGS='-Wall -Wextra -Wpedantic -Wshadow -Wconversion -Wimplicit-fallthrough'
-else
+elif [ "$CC" = "gcc" ]; then
WARNINGS='-Wall -Wextra -Wpedantic -Wshadow -Wconversion -Wno-pointer-to-int-cast'
+else
+ WARNINGS=''
fi
DEBUG_FLAGS="-O0 -g3 $WARNINGS -std=c11 -DTOC_DEBUG"
diff --git a/cgen.c b/cgen.c
index b40870f..22b9fa7 100644
--- a/cgen.c
+++ b/cgen.c
@@ -1747,8 +1747,10 @@ static bool cgen_decl(CGenerator *g, Declaration *d) {
/* don't generate function pointer declaration for constant fns */
continue;
}
- if (!cgen_val_pre(g, *val, type, d->where))
- return false;
+ if (has_expr) {
+ if (!cgen_val_pre(g, *val, type, d->where))
+ return false;
+ }
if (g->block != NULL)
cgen_write(g, "static ");
if (!cgen_type_pre(g, type, d->where)) return false;
diff --git a/decls_cgen.c b/decls_cgen.c
index 3fe21bd..a3d35d2 100644
--- a/decls_cgen.c
+++ b/decls_cgen.c
@@ -10,8 +10,7 @@ static bool cgen_decls_decl(CGenerator *g, Declaration *d);
static bool cgen_decls_fn_instances(CGenerator *g, Expression *e) {
assert(e->kind == EXPR_FN);
FnExpr *f = &e->fn;
- FnType *type = &e->type.fn;
- assert(type->constness);
+ assert(e->type.fn.constness);
Instance **data = f->instances.data;
for (U64 i = 0; i < f->instances.cap; i++) {
if (f->instances.occupied[i]) {
diff --git a/docs.sh b/docs.sh
new file mode 100755
index 0000000..81cde33
--- /dev/null
+++ b/docs.sh
@@ -0,0 +1,6 @@
+#!/bin/bash
+markdown README.md > README.html
+for x in docs/*.md; do
+ echo $x
+ markdown $x > $(dirname $x)/$(basename $x .md).html
+done
diff --git a/docs/00.html b/docs/00.html
new file mode 100644
index 0000000..cd0c352
--- /dev/null
+++ b/docs/00.html
@@ -0,0 +1,61 @@
+Declarations
+
+In toc, declarations have the following syntax:
+
+<name> :[:] [type] [= expression];
+
+
+The square brackets ([]
) indicate something optional.
+
+All of the following statements
+declare an new variable x
which is an integer, and has a value of 0:
+
+x : int;
+x : int = 0;
+x := 0;
+
+Note that in the first of those statements, although no expression
+is specified, it defaults to 0. This is not true in C,
+and there will eventually probably be an option to
+leave x
uninitialized.
+
+If you wanted x to be a floating-point number, you could use:
+
+x : float;
+x : float = 0;
+x := 0.0;
+
+
+Note that 0
can be used as both a float
and an int
eger, but
+when no type is specified, it defaults to an int
, whereas 0.0
+defaults to a float
.
+
+Here are all of toc's builtin types and their ranges of values:
+
+
+int
- A 64-bit signed integer (always), -9223372036854775808 to 9223372036854775807
+i8
- An 8-bit signed integer, -128 to 128
+i16
- 16-bit signed integer, -32768 to 32767
+i32
- 32-bit signed integer, -2147483648 to 2147483647
+i64
- 64-bit signed integer (same as int
, but more explicit about the size), -9223372036854775808 to 9223372036854775807
+u8
- An 8-bit unsigned integer, 0 to 255
+u16
- 16-bit unsigned integer, 0 to 65535
+u32
- 32-bit unsigned integer, 0 to 4294967295
+u64
- 64-bit unsigned integer, 0 to 18446744073709551615
+float
- A 32-bit floating-point number, -3.40282347e+38 to 3.40282347e+38
+f32
- A 32-bit floating-point number (same as float
, but more explicit about the size)
+f64
- A 64-bit floating-point number, -1.7976931348623157e+308 to 1.7976931348623157e+308
+bool
- A boolean value, either false
or true
.
+char
- A character. The specific values are technically platform-dependent, but usually there are 256 of them.
+
+
+At the moment, it is not technically guaranteed that f32
/float
is actually 32-bit and that f64
is actually 64-bit; they are platform dependent. Perhaps someday there will be a version of toc which does not compile to C, where that could be guaranteed.
+
+To make declarations constant, use ::
instead of :
. e.g.
+
+
+x ::= 5+3;
+y :: float = 5.123;
+
+
+Here, "constant" means constant at compile time, not read-only as it does in C. One interesting thing about toc is that normal functions can run at compile time, so pretty much any expression is a valid initializer for a constant, e.g. doing x ::= some_function();
runs some_function
at compile time, not at run time.
diff --git a/docs/00.md b/docs/00.md
index e4e8d41..4998684 100644
--- a/docs/00.md
+++ b/docs/00.md
@@ -1,8 +1,8 @@
-## Declarations in toc
+## Declarations
-Declarations have the following syntax:
+In toc, declarations have the following syntax:
```
- : [type] [= expression];
+ :[:] [type] [= expression];
```
The square brackets (`[]`) indicate something optional.
@@ -41,10 +41,19 @@ Here are all of toc's builtin types and their ranges of values:
- `u16` - 16-bit unsigned integer, 0 to 65535
- `u32` - 32-bit unsigned integer, 0 to 4294967295
- `u64` - 64-bit unsigned integer, 0 to 18446744073709551615
-- `float` - A 32-bit floating-point number,
-- `f32`
-- `f64`
-- `bool`
-- `char`
+- `float` - A 32-bit floating-point number, -3.40282347e+38 to 3.40282347e+38
+- `f32` - A 32-bit floating-point number (same as `float`, but more explicit about the size)
+- `f64` - A 64-bit floating-point number, -1.7976931348623157e+308 to 1.7976931348623157e+308
+- `bool` - A boolean value, either `false` or `true`.
+- `char` - A character. The specific values are technically platform-dependent, but usually there are 256 of them.
At the moment, it is not technically guaranteed that `f32`/`float` is actually 32-bit and that `f64` is actually 64-bit; they are platform dependent. Perhaps someday there will be a version of toc which does not compile to C, where that could be guaranteed.
+
+To make declarations constant, use `::` instead of `:`. e.g.
+
+```
+x ::= 5+3;
+y :: float = 5.123;
+```
+
+Here, "constant" means constant at compile time, not read-only as it does in C. One interesting thing about toc is that normal functions can run at compile time, so pretty much any expression is a valid initializer for a constant, e.g. doing `x ::= some_function();` runs `some_function` at compile time, not at run time.
diff --git a/docs/01.html b/docs/01.html
new file mode 100644
index 0000000..633295b
--- /dev/null
+++ b/docs/01.html
@@ -0,0 +1,18 @@
+A first program
+
+The main
function in toc corresponds to the main
function in C. This function is called when your program is run. So, this is a valid toc program which does nothing:
+
+
+main ::= fn() {
+};
+
+
+It declares a constant, main
, which is a function with an empty body. Note that the syntax for declaring functions is the same as the syntax for declaring constants (it isn't something like fn main() { ... }
).
+
+Assuming you have compiled the compiler (see README.md
for instructions about that), you can compile it with
+
+
+toc <your filename>
+
+
+You will get a file called out.c
, which you can then put through your C compiler to get an executable file which does nothing. Congratulations! You've written your first toc program.
diff --git a/docs/01.md b/docs/01.md
new file mode 100644
index 0000000..816f3e6
--- /dev/null
+++ b/docs/01.md
@@ -0,0 +1,18 @@
+### A first program
+
+The `main` function in toc corresponds to the `main` function in C. This function is called when your program is run. So, this is a valid toc program which does nothing:
+
+```
+main ::= fn() {
+};
+```
+
+It declares a constant, `main`, which is a function with an empty body. Note that the syntax for declaring functions is the same as the syntax for declaring constants (it isn't something like `fn main() { ... }`).
+
+Assuming you have compiled the compiler (see `README.md` for instructions about that), you can compile it with
+
+```
+toc
+```
+
+You will get a file called `out.c`, which you can then put through your C compiler to get an executable file which does nothing. Congratulations! You've written your first toc program.
diff --git a/main.c b/main.c
index 1ca547b..da2303e 100644
--- a/main.c
+++ b/main.c
@@ -122,10 +122,10 @@ int main(int argc, char **argv) {
evalr_create(&ev, &tr, &main_allocr);
typer_create(&tr, &ev, &main_allocr);
-
if (!block_enter(NULL, f.stmts, SCOPE_CHECK_REDECL)) /* enter global scope */
return false;
+ /* fprint_parsed_file(stdout, &f); */
if (!types_file(&tr, &f)) {
err_fprint(TEXT_IMPORTANT("Errors occured while determining types.\n"));
return EXIT_FAILURE;
diff --git a/parse.c b/parse.c
index a1b5950..d80afd1 100644
--- a/parse.c
+++ b/parse.c
@@ -170,6 +170,10 @@ static Keyword builtin_type_to_kw(BuiltinType t) {
return KW_COUNT;
}
+/* TODO: DELME */
+static void fprint_expr(FILE *out, Expression *expr);
+
+
/* returns the number of characters written, not including the null character */
static size_t type_to_str_(Type *t, char *buffer, size_t bufsize) {
/* if ((t->flags & TYPE_IS_RESOLVED) && t->was_expr) { */
@@ -610,7 +614,7 @@ static bool parse_type(Parser *p, Type *type) {
default:
/* TYPE_EXPR */
if (parse_expr(p, type->expr = parser_new_expr(p),
- expr_find_end(p, -1 /* end as soon as possible */))) {
+ expr_find_end(p, (ExprEndFlags)-1 /* end as soon as possible */))) {
type->kind = TYPE_EXPR;
} else {
tokr_err(t, "Unrecognized type.");
diff --git a/scope.c b/scope.c
index 0774cfa..5416883 100644
--- a/scope.c
+++ b/scope.c
@@ -113,4 +113,11 @@ static bool each_enter(Expression *e) {
static void each_exit(Expression *e) {
assert(e->kind == EXPR_EACH);
+ EachExpr *ea = &e->each;
+ if (ea->index) {
+ arr_remove_last(&ea->index->decls);
+ }
+ if (ea->value) {
+ arr_remove_last(&ea->value->decls);
+ }
}
diff --git a/test.toc b/test.toc
index c5aa6b5..e17d3a2 100644
--- a/test.toc
+++ b/test.toc
@@ -1,18 +1 @@
-puti ::= fn(x: int) {
- #C("printf(\"%ld\\n\", (long)x);
-");
-};
-putf ::= fn(x: float) {
- #C("printf(\"%f\\n\", (double)x);
-");
-};
-
-Point ::= struct {
- x, y : int;
-};
-
-main ::= fn() {
- p : Point;
- p.x = 5;
- puti(p[if 5 > 6 { "x" } else { "y" }]);
-};
\ No newline at end of file
+x : [1]int;
\ No newline at end of file
diff --git a/tokenizer.c b/tokenizer.c
index 241800f..4a57fd1 100644
--- a/tokenizer.c
+++ b/tokenizer.c
@@ -288,11 +288,12 @@ static bool tokenize_string(Tokenizer *t, char *str) {
/* it's a numeric literal */
int base = 10;
Floating decimal_pow10 = 0;
- NumLiteral n;
- n.kind = NUM_LITERAL_INT;
- n.intval = 0;
Token *token = tokr_add(t);
tokr_put_location(t, token);
+ NumLiteral *n = &token->num;
+ n->kind = NUM_LITERAL_INT;
+ n->intval = 0;
+
if (*t->s == '0') {
tokr_nextchar(t);
/* octal/hexadecimal/binary (or zero) */
@@ -321,7 +322,7 @@ static bool tokenize_string(Tokenizer *t, char *str) {
/* .. (not a decimal point; end the number here) */
break;
}
- if (n.kind == NUM_LITERAL_FLOAT) {
+ if (n->kind == NUM_LITERAL_FLOAT) {
tokenization_err(t, "Double . in number.");
goto err;
}
@@ -329,16 +330,16 @@ static bool tokenize_string(Tokenizer *t, char *str) {
tokenization_err(t, "Decimal point in non base 10 number.");
goto err;
}
- n.kind = NUM_LITERAL_FLOAT;
+ n->kind = NUM_LITERAL_FLOAT;
decimal_pow10 = 0.1;
- n.floatval = (Floating)n.intval;
+ n->floatval = (Floating)n->intval;
tokr_nextchar(t);
continue;
} else if (*t->s == 'e') {
tokr_nextchar(t);
- if (n.kind == NUM_LITERAL_INT) {
- n.kind = NUM_LITERAL_FLOAT;
- n.floatval = (Floating)n.intval;
+ if (n->kind == NUM_LITERAL_INT) {
+ n->kind = NUM_LITERAL_FLOAT;
+ n->floatval = (Floating)n->intval;
}
/* TODO: check if exceeding maximum exponent */
int exponent = 0;
@@ -357,9 +358,9 @@ static bool tokenize_string(Tokenizer *t, char *str) {
/* OPTIM: Slow for very large exponents (unlikely to happen) */
for (int i = 0; i < exponent; i++) {
if (negative_exponent)
- n.floatval /= 10;
+ n->floatval /= 10;
else
- n.floatval *= 10;
+ n->floatval *= 10;
}
break;
@@ -384,19 +385,19 @@ static bool tokenize_string(Tokenizer *t, char *str) {
/* end of numeric literal */
break;
}
- switch (n.kind) {
+ switch (n->kind) {
case NUM_LITERAL_INT:
- if ((UInteger)n.intval > (UInteger)UINTEGER_MAX / (UInteger)base ||
- (UInteger)n.intval * (UInteger)base > (UInteger)UINTEGER_MAX - (UInteger)digit) {
+ if ((UInteger)n->intval > (UInteger)UINTEGER_MAX / (UInteger)base ||
+ (UInteger)n->intval * (UInteger)base > (UInteger)UINTEGER_MAX - (UInteger)digit) {
/* too big! */
tokenization_err(t, "Number too big to fit in a numeric literal.");
goto err;
}
- n.intval *= (UInteger)base;
- n.intval += (UInteger)digit;
+ n->intval *= (UInteger)base;
+ n->intval += (UInteger)digit;
break;
case NUM_LITERAL_FLOAT:
- n.floatval += decimal_pow10 * (Floating)digit;
+ n->floatval += decimal_pow10 * (Floating)digit;
decimal_pow10 /= 10;
break;
}
@@ -404,7 +405,6 @@ static bool tokenize_string(Tokenizer *t, char *str) {
}
token->kind = TOKEN_LITERAL_NUM;
- token->num = n;
continue;
}
diff --git a/types.c b/types.c
index 1af859e..ebe5b0d 100644
--- a/types.c
+++ b/types.c
@@ -452,7 +452,6 @@ static bool type_resolve(Typer *tr, Type *t, Location where) {
Value val;
Expression *n_expr = t->arr.n_expr;
if (!types_expr(tr, n_expr)) return false;
-
if (n_expr->type.kind == TYPE_UNKNOWN) {
err_print(n_expr->where, "Cannot determine type of array size at compile time.");
return false;
@@ -1134,7 +1133,7 @@ static bool types_expr(Typer *tr, Expression *e) {
}
}
}
- FnExpr *original_fn;
+ FnExpr *original_fn = NULL;
Type table_index_type = {0};
Value table_index = {0};
FnExpr fn_copy;
--
cgit v1.2.3