summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xbuild.sh3
-rw-r--r--main.c8
-rw-r--r--parse.c351
-rw-r--r--test.toc2
-rw-r--r--tokenizer.c171
-rw-r--r--util/arr.c67
-rw-r--r--util/err.c17
7 files changed, 425 insertions, 194 deletions
diff --git a/build.sh b/build.sh
index abdd474..8ae0039 100755
--- a/build.sh
+++ b/build.sh
@@ -1,3 +1,2 @@
#!/bin/bash
-
-gcc -o toc main.c -g -o toc -Wall -Wextra -Wpedantic -Wconversion -Wno-unused-function -std=c11 || exit 1
+gcc -o toc main.c -g -o toc -Wall -Wextra -Wpedantic -Wconversion -std=c11 || exit 1
diff --git a/main.c b/main.c
index 7fefa70..6aa66a3 100644
--- a/main.c
+++ b/main.c
@@ -9,6 +9,7 @@
#include <stdbool.h>
#include "util/err.c"
#include "util/arr.c"
+#include "util/blockarr.c"
#include "identifiers.c"
#include "tokenizer.c"
#include "parse.c"
@@ -50,15 +51,16 @@ int main(int argc, char **argv) {
return EXIT_FAILURE;
}
- arr_foreach(t.tokens, Token, token) {
+ arr_foreach(&t.tokens, Token, token) {
if (token != t.tokens.data)
printf(" ");
token_fprint(stdout, token);
}
printf("\n");
-
+ Parser p;
+ parser_from_tokenizer(&p, &t);
ParsedFile f;
- if (!parse_file(&f, &t)) {
+ if (!file_parse(&f, &p)) {
err_fprint(TEXT_IMPORTANT("Errors occured while parsing.\n"));
return EXIT_FAILURE;
}
diff --git a/parse.c b/parse.c
index 9ab9ee1..b6ae14f 100644
--- a/parse.c
+++ b/parse.c
@@ -20,6 +20,7 @@ typedef enum {
typedef struct {
+ Location where;
TypeKind kind;
union {
BuiltinType builtin;
@@ -27,52 +28,84 @@ typedef struct {
} Type;
typedef enum {
- EXPR_INT_CONST,
- EXPR_FLOAT_CONST
+ EXPR_INT_LITERAL,
+ EXPR_FLOAT_LITERAL,
+ EXPR_BINARY_OP,
+ EXPR_UNARY_OP
} ExprKind;
-typedef struct {
+typedef enum {
+ UNARY_MINUS
+} UnaryOp;
+
+typedef enum {
+ BINARY_PLUS,
+ BINARY_MINUS
+} BinaryOp;
+
+#define EXPR_FLAG_FLEXIBLE 0x01
+
+typedef struct Expression {
+ Location where;
ExprKind kind;
Type type;
- bool is_flexible_num:1; /* expressions like 5 or 7*8+3 can be any numerical type */
+ uint16_t flags;
union {
- FloatConst floatc;
- IntConst intc;
+ FloatLiteral floatl;
+ IntLiteral intl;
+ struct {
+ UnaryOp op;
+ struct Expression *of;
+ } unary;
+ struct {
+ BinaryOp op;
+ struct Expression *lhs;
+ struct Expression *rhs;
+ } binary;
};
} Expression;
+#define DECL_FLAG_INFER_TYPE 0x01
+#define DECL_FLAG_CONST 0x02
+#define DECL_FLAG_HAS_EXPR 0x04
typedef struct {
Location where;
Identifier var;
Type type;
Expression expr;
- bool infer_type:1;
- bool is_const:1;
- bool has_expr:1;
+ uint16_t flags;
} Declaration;
/* OPTIM: Instead of using dynamic arrays, do two passes. */
-arr_declaration(Declarations, Declaration, decls_)
-
typedef enum {
STMT_DECLS
} StatementKind;
typedef struct {
- StatementKind kind;
Location where;
+ StatementKind kind;
union {
- Declarations decls;
+ Array decls;
};
} Statement;
-arr_declaration(Statements, Statement, stmts_)
-
typedef struct {
- Statements stmts;
+ Array stmts;
} ParsedFile;
+typedef struct {
+ Tokenizer *tokr;
+ BlockArr exprs; /* a dynamic array of expressions, so that we don't need to call malloc every time we make an expression */
+} Parser;
+
+/*
+allocate a new expression.
+IMPORTANT: This invalidates all other parser-allocated Expression pointers.
+ */
+static Expression *parser_new_expr(Parser *p) {
+ return block_arr_add(&p->exprs);
+}
/* returns BUILTIN_TYPE_COUNT on failure */
static BuiltinType kw_to_builtin_type(Keyword kw) {
@@ -93,7 +126,29 @@ static BuiltinType kw_to_builtin_type(Keyword kw) {
}
}
-static bool parse_type(Type *type, Tokenizer *t) {
+static Keyword builtin_type_to_kw(BuiltinType t) {
+ switch (t) {
+ case BUILTIN_INT: return KW_INT;
+ case BUILTIN_I8: return KW_I8;
+ case BUILTIN_I16: return KW_I16;
+ case BUILTIN_I32: return KW_I32;
+ case BUILTIN_I64: return KW_I64;
+ case BUILTIN_U8: return KW_U8;
+ case BUILTIN_U16: return KW_U16;
+ case BUILTIN_U32: return KW_U32;
+ case BUILTIN_U64: return KW_U64;
+ case BUILTIN_FLOAT: return KW_FLOAT;
+ case BUILTIN_F32: return KW_F32;
+ case BUILTIN_F64: return KW_F64;
+ case BUILTIN_TYPE_COUNT: break;
+ }
+ assert(0);
+ return KW_COUNT;
+}
+
+static bool type_parse(Type *type, Parser *p) {
+ Tokenizer *t = p->tokr;
+ type->where = t->token->where;
switch (t->token->kind) {
case TOKEN_KW:
type->kind = TYPE_BUILTIN;
@@ -112,27 +167,45 @@ static bool parse_type(Type *type, Tokenizer *t) {
return false;
}
-static bool parse_expr(Expression *e, Tokenizer *t, Token *end) {
+#define NOT_AN_OP -1
+static int op_precedence(Keyword op) {
+ switch (op) {
+ case KW_PLUS:
+ return 10;
+ case KW_MINUS:
+ return 20;
+ default:
+ return NOT_AN_OP;
+ }
+}
+
+static bool expr_parse(Expression *e, Parser *p, Token *end) {
+ Tokenizer *t = p->tokr;
if (end == NULL) return false;
- memset(e, 0, sizeof *e);
+ e->flags = 0;
+ e->where = t->token->where;
+ if (end <= t->token) {
+ tokr_err(t, "Empty expression.");
+ return false;
+ }
if (end - t->token == 1) {
/* 1-token expression */
switch (t->token->kind) {
- case TOKEN_NUM_CONST: {
- NumConst *num = &t->token->num;
+ case TOKEN_NUM_LITERAL: {
+ NumLiteral *num = &t->token->num;
switch (num->kind) {
- case NUM_CONST_FLOAT:
- e->kind = EXPR_FLOAT_CONST;
+ case NUM_LITERAL_FLOAT:
+ e->kind = EXPR_FLOAT_LITERAL;
e->type.kind = TYPE_BUILTIN;
e->type.builtin = BUILTIN_FLOAT;
- e->floatc = num->floatval;
+ e->floatl = num->floatval;
break;
- case NUM_CONST_INT:
- e->kind = EXPR_INT_CONST;
- e->is_flexible_num = true;
+ case NUM_LITERAL_INT:
+ e->kind = EXPR_INT_LITERAL;
+ e->flags |= EXPR_FLAG_FLEXIBLE;
e->type.kind = TYPE_BUILTIN;
e->type.builtin = BUILTIN_INT; /* TODO: if it's too big, use a u64 instead. */
- e->floatc = num->intval;
+ e->intl = num->intval;
break;
}
} break;
@@ -143,9 +216,104 @@ static bool parse_expr(Expression *e, Tokenizer *t, Token *end) {
t->token = end;
return true;
}
- /* TODO */
- tokr_err(t, "multi-token exprs not supported yet.");
- return false;
+ /* Find the lowest-precedence operator not in parentheses */
+ int paren_level = 0;
+ int lowest_precedence = NOT_AN_OP;
+ Token *lowest_precedence_op;
+ for (Token *token = t->token; token < end; token++) {
+ if (token->kind == TOKEN_KW) {
+ switch (token->kw) {
+ case KW_LPAREN:
+ paren_level++;
+ break;
+ case KW_RPAREN:
+ paren_level--;
+ if (paren_level < 0) {
+ t->token = token;
+ tokr_err(t, "Excessive closing parenthesis.");
+ }
+ break;
+ default: { /* OPTIM: use individual cases for each op */
+ int precedence = op_precedence(token->kw);
+ if (precedence == NOT_AN_OP) break; /* nvm it's not an operator */
+ if (lowest_precedence == NOT_AN_OP || precedence <= lowest_precedence) {
+ lowest_precedence = precedence;
+ lowest_precedence_op = token;
+ }
+ } break;
+ }
+ }
+ }
+ if (lowest_precedence == NOT_AN_OP) {
+ /* function calls, array accesses, etc. OR
+ something like (5+3)*/
+ tokr_err(t, "Not implemented yet.");
+ return false;
+ }
+
+ /* This is a unary op not a binary one. */
+ while (lowest_precedence_op != t->token
+ && lowest_precedence_op[-1].kind == TOKEN_KW
+ && op_precedence(lowest_precedence_op[-1].kw) != NOT_AN_OP) {
+ lowest_precedence_op--;
+ }
+
+ /* Unary */
+ if (lowest_precedence_op == t->token) {
+ UnaryOp op;
+ bool is_unary;
+ switch (lowest_precedence_op->kw) {
+ case KW_PLUS:
+ /* unary + is ignored entirely */
+ t->token++;
+ /* re-parse this expression without + */
+ return expr_parse(e, p, end);
+ case KW_MINUS:
+ is_unary = true;
+ op = UNARY_MINUS;
+ break;
+ default:
+ is_unary = false;
+ break;
+ }
+ if (!is_unary) {
+ tokr_err(t, "%s is not a unary operator.", keywords[lowest_precedence_op->kw]);
+ return false;
+ }
+ e->unary.op = op;
+ e->kind = EXPR_UNARY_OP;
+ t->token++;
+ Expression *of = parser_new_expr(p);
+ e->unary.of = of;
+ return expr_parse(of, p, end);
+ }
+
+
+ BinaryOp op;
+ switch (lowest_precedence_op->kw) {
+ case KW_PLUS:
+ op = BINARY_PLUS;
+ break;
+ case KW_MINUS:
+ op = BINARY_MINUS;
+ break;
+ default: assert(0); break;
+ }
+ e->binary.op = op;
+ e->kind = EXPR_BINARY_OP;
+
+ Expression *lhs = parser_new_expr(p);
+ e->binary.lhs = lhs;
+ if (!expr_parse(lhs, p, lowest_precedence_op))
+ return false;
+
+ Expression *rhs = parser_new_expr(p);
+ t->token = lowest_precedence_op + 1;
+ e->binary.rhs = rhs;
+ if (!expr_parse(rhs, p, end))
+ return false;
+
+ return true;
}
/*
@@ -156,7 +324,8 @@ typedef enum {
EXPR_END_RPAREN_OR_COMMA,
EXPR_END_SEMICOLON
} ExprEndKind;
-static Token *expr_find_end(Tokenizer *t, ExprEndKind ends_with) {
+static Token *expr_find_end(Parser *p, ExprEndKind ends_with) {
+ Tokenizer *t = p->tokr;
long bracket_level = 0;
Token *token = t->token;
while (1) {
@@ -187,7 +356,7 @@ static Token *expr_find_end(Tokenizer *t, ExprEndKind ends_with) {
return NULL;
case EXPR_END_RPAREN_OR_COMMA:
if (bracket_level > 0) {
- tokr_err(t, "Mismatched parentheses."); /* FEATURE: Find out where this is */
+ tokr_err(t, "Opening parenthesis was never closed."); /* FEATURE: Find out where this is */
return NULL;
} else {
tokr_err(t, "Could not find ')' or ',' at end of expression.");
@@ -200,17 +369,19 @@ static Token *expr_find_end(Tokenizer *t, ExprEndKind ends_with) {
}
}
-static bool parse_decls(Declarations *ds, Tokenizer *t) {
- decls_create(ds);
+static bool decls_parse(Array *ds, Parser *p) {
+ Tokenizer *t = p->tokr;
+ arr_create(ds, sizeof(Declaration));
while (1) {
- Declaration decl = {0};
+ Declaration *decl = arr_add(ds);
if (t->token->kind != TOKEN_IDENT) {
tokr_err(t, "Cannot declare non-identifier.");
return false;
}
- decl.where = t->token->where;
- decl.var = t->token->ident;
+ decl->where = t->token->where;
+ decl->var = t->token->ident;
+ decl->flags = 0;
t->token++;
if (!token_is_kw(t->token, KW_COLON)) {
@@ -222,33 +393,31 @@ static bool parse_decls(Declarations *ds, Tokenizer *t) {
if (!token_is_kw(t->token, KW_MINUS)
&& !token_is_kw(t->token, KW_EQ)
&& !token_is_kw(t->token, KW_SEMICOLON)) {
- if (!parse_type(&decl.type, t))
+ if (!type_parse(&decl->type, p))
return false;
} else {
- decl.infer_type = true;
+ decl->flags |= DECL_FLAG_INFER_TYPE;
}
if (token_is_kw(t->token, KW_SEMICOLON)) {
- if (decl.infer_type) {
+ if (decl->flags & DECL_FLAG_INFER_TYPE) {
tokr_err(t, "Cannot infer type without expression.");
return false;
}
} else if (token_is_kw(t->token, KW_EQ)) {
t->token++;
- if (!parse_expr(&decl.expr, t, expr_find_end(t, EXPR_END_SEMICOLON)))
+ if (!expr_parse(&decl->expr, p, expr_find_end(p, EXPR_END_SEMICOLON)))
return false;
- decl.has_expr = true;
+ decl->flags |= DECL_FLAG_HAS_EXPR;
} else if (token_is_kw(t->token, KW_MINUS)) {
t->token++;
- if (!parse_expr(&decl.expr, t, expr_find_end(t, EXPR_END_SEMICOLON)))
+ if (!expr_parse(&decl->expr, p, expr_find_end(p, EXPR_END_SEMICOLON)))
return false;
- decl.has_expr = true;
- decl.is_const = true;
+ decl->flags |= DECL_FLAG_HAS_EXPR | DECL_FLAG_CONST;
} else {
tokr_err(t, "Expected ';', '=', or '-' in delaration.");
return false;
}
- decls_add(ds, &decl);
if (token_is_kw(t->token, KW_SEMICOLON)) {
t->token++;
break;
@@ -262,49 +431,103 @@ static bool parse_decls(Declarations *ds, Tokenizer *t) {
return true;
}
-static bool parse_stmt(Statement *s, Tokenizer *t) {
+static bool stmt_parse(Statement *s, Parser *p) {
+ Tokenizer *t = p->tokr;
+ s->where = t->token->where;
if (token_is_kw(t->token + 1, KW_COLON)) {
- return parse_decls(&s->decls, t);
+ s->kind = STMT_DECLS;
+ return decls_parse(&s->decls, p);
} else {
tokr_err(t, "Unreocgnized statement.");
return false;
}
}
-static bool parse_file(ParsedFile *f, Tokenizer *t) {
- stmts_create(&f->stmts);
+static void parser_from_tokenizer(Parser *p, Tokenizer *t) {
+ p->tokr = t;
+ block_arr_create(&p->exprs, 10, sizeof(Expression)); /* block size = 1024 */
+}
+
+static bool file_parse(ParsedFile *f, Parser *p) {
+ Tokenizer *t = p->tokr;
+ arr_create(&f->stmts, sizeof(Statement));
bool ret = true;
while (t->token->kind != TOKEN_EOF) {
- Statement stmt = {0};
- if (!parse_stmt(&stmt, t))
+ Statement *stmt = arr_add(&f->stmts);
+ if (!stmt_parse(stmt, p))
ret = false;
- stmts_add(&f->stmts, &stmt);
}
return ret;
}
+#define PARSE_PRINT_LOCATION(l) //fprintf(out, "[l%lu]", (unsigned long)(l).line);
+
static void expr_fprint(FILE *out, Expression *e) {
- /* TODO */
-/* switch (e->kind) { */
-/* case : */
-/* } */
+ PARSE_PRINT_LOCATION(e->where);
+ switch (e->kind) {
+ case EXPR_INT_LITERAL:
+ fprintf(out, "%lld", (long long)e->intl);
+ break;
+ case EXPR_FLOAT_LITERAL:
+ fprintf(out, "%f", (double)e->floatl);
+ break;
+ case EXPR_BINARY_OP:
+ switch (e->binary.op) {
+ case BINARY_PLUS:
+ fprintf(out, "add");
+ break;
+ case BINARY_MINUS:
+ fprintf(out, "subtract");
+ break;
+ }
+ fprintf(out, "(");
+ expr_fprint(out, e->binary.lhs);
+ fprintf(out, ",");
+ expr_fprint(out, e->binary.rhs);
+ fprintf(out, ")");
+ break;
+ case EXPR_UNARY_OP:
+ switch (e->unary.op) {
+ case UNARY_MINUS:
+ fprintf(out, "negate");
+ break;
+ }
+ fprintf(out, "(");
+ expr_fprint(out, e->unary.of);
+ fprintf(out, ")");
+ }
+}
+
+static void type_fprint(FILE *out, Type *t) {
+ PARSE_PRINT_LOCATION(t->where);
+ switch (t->kind) {
+ case TYPE_BUILTIN:
+ fprintf(out, "%s", keywords[builtin_type_to_kw(t->builtin)]);
+ break;
+ }
}
static void decl_fprint(FILE *out, Declaration *d) {
- fprintf(out, "l%lu:", (unsigned long)d->where.line);
+ PARSE_PRINT_LOCATION(d->where);
ident_fprint(out, d->var);
- if (d->is_const) {
+ if (d->flags & DECL_FLAG_CONST) {
fprintf(out, "[const]");
}
- if (d->has_expr) {
+ fprintf(out, ":");
+ if (!(d->flags & DECL_FLAG_INFER_TYPE)) {
+ type_fprint(out, &d->type);
+ }
+ if (d->flags & DECL_FLAG_HAS_EXPR) {
fprintf(out, "=");
+ expr_fprint(out, &d->expr);
}
}
static void stmt_fprint(FILE *out, Statement *s) {
+ PARSE_PRINT_LOCATION(s->where);
switch (s->kind) {
case STMT_DECLS:
- arr_foreach(s->decls, Declaration, decl) {
+ arr_foreach(&s->decls, Declaration, decl) {
if (decl != s->decls.data) {
fprintf(out, ", ");
}
@@ -316,7 +539,9 @@ static void stmt_fprint(FILE *out, Statement *s) {
}
static void parsed_file_fprint(FILE *out, ParsedFile *f) {
- arr_foreach(f->stmts, Statement, stmt) {
+ arr_foreach(&f->stmts, Statement, stmt) {
stmt_fprint(out, stmt);
}
}
+
+/* TODO: Freeing parser */
diff --git a/test.toc b/test.toc
index a178ceb..89dce12 100644
--- a/test.toc
+++ b/test.toc
@@ -1,3 +1,3 @@
foo:i8=3;
-foo:i8=3;
+foo:i8=3-+++---+++---3;
a:float-4; \ No newline at end of file
diff --git a/tokenizer.c b/tokenizer.c
index e65b2e3..587d080 100644
--- a/tokenizer.c
+++ b/tokenizer.c
@@ -1,9 +1,9 @@
typedef enum {
TOKEN_KW,
TOKEN_IDENT,
- TOKEN_NUM_CONST,
- TOKEN_CHAR_CONST,
- TOKEN_STR_CONST,
+ TOKEN_NUM_LITERAL,
+ TOKEN_CHAR_LITERAL,
+ TOKEN_STR_LITERAL,
TOKEN_EOF
} TokenKind;
@@ -21,6 +21,7 @@ typedef enum {
KW_LT,
KW_LE,
KW_MINUS,
+ KW_PLUS,
KW_INT,
KW_I8,
KW_I16,
@@ -37,7 +38,7 @@ typedef enum {
} Keyword;
static const char *keywords[KW_COUNT] =
- {";", "=", ":", ",", "fn", "(", ")", "{", "}", "==", "<", "<=", "-",
+ {";", "=", ":", ",", "fn", "(", ")", "{", "}", "==", "<", "<=", "-", "+",
"int", "i8", "i16", "i32", "i64", "u8", "u16", "u32", "u64", "float", "f32",
"f64"};
@@ -54,29 +55,26 @@ static Keyword tokenize_keyword(char **s) {
return KW_COUNT;
}
-#define TOKR_USE_LLONG 1
-
-typedef unsigned long long IntConst;
-
-typedef long double FloatConst; /* OPTIM: Switch to double */
+typedef unsigned long long IntLiteral;
+typedef long double FloatLiteral; /* OPTIM: Switch to double */
typedef enum {
- NUM_CONST_INT,
- NUM_CONST_FLOAT
-} NumConstKind;
+ NUM_LITERAL_INT,
+ NUM_LITERAL_FLOAT
+} NumLiteralKind;
typedef struct {
- NumConstKind kind;
+ NumLiteralKind kind;
union {
- IntConst intval;
- FloatConst floatval;
+ IntLiteral intval;
+ FloatLiteral floatval;
};
-} NumConst;
+} NumLiteral;
typedef struct {
char *str;
size_t len;
-} StrConst;
+} StrLiteral;
typedef struct {
LineNo line;
@@ -90,16 +88,14 @@ typedef struct {
union {
Keyword kw;
Identifier ident;
- NumConst num;
+ NumLiteral num;
char chr;
- StrConst str;
+ StrLiteral str;
};
} Token;
-arr_declaration(Tokens, Token, tokens_)
-
typedef struct {
- Tokens tokens;
+ Array tokens;
char *s; /* string being parsed */
LineNo line;
Token *token; /* token currently being processed */
@@ -121,21 +117,21 @@ static void token_fprint(FILE *out, Token *t) {
fprintf(out, "identifier: %ld:", t->ident->id);
ident_fprint(out, t->ident);
break;
- case TOKEN_NUM_CONST:
+ case TOKEN_NUM_LITERAL:
fprintf(out, "number: ");
switch (t->num.kind) {
- case NUM_CONST_INT:
+ case NUM_LITERAL_INT:
fprintf(out, "%llu", t->num.intval);
break;
- case NUM_CONST_FLOAT:
+ case NUM_LITERAL_FLOAT:
fprintf(out, "%g", (double)t->num.floatval);
break;
}
break;
- case TOKEN_CHAR_CONST:
+ case TOKEN_CHAR_LITERAL:
fprintf(out, "char: '%c' (%d)", t->chr, t->chr);
break;
- case TOKEN_STR_CONST:
+ case TOKEN_STR_LITERAL:
fprintf(out, "str: \"%s\"", t->str.str);
break;
case TOKEN_EOF:
@@ -144,12 +140,11 @@ static void token_fprint(FILE *out, Token *t) {
}
}
-static void tokr_add(Tokenizer *t, Token *token) {
- if (!token->where.line)
- token->where.line = t->line;
- if (!token->where.code)
- token->where.code = t->s;
- tokens_add(&t->tokens, token);
+static Token *tokr_add(Tokenizer *t) {
+ Token *token = arr_add(&t->tokens);
+ token->where.line = t->line;
+ token->where.code = t->s;
+ return token;
}
static void tokr_nextchar(Tokenizer *t) {
@@ -224,8 +219,8 @@ static void tokr_get_location(Tokenizer *tokr, Token *t) {
static bool tokenize_string(Tokenizer *tokr, char *str) {
int has_err = 0;
Tokenizer t;
- tokens_create(&t.tokens);
- tokens_reserve(&t.tokens, 256);
+ arr_create(&t.tokens, sizeof(Token));
+ arr_reserve(&t.tokens, 256);
t.s = str;
t.line = 1;
@@ -274,14 +269,13 @@ static bool tokenize_string(Tokenizer *tokr, char *str) {
if (is_comment) continue;
}
{
- Token token = {0};
- tokr_put_location(&t, &token);
Keyword kw = tokenize_keyword(&t.s);
if (kw != KW_COUNT) {
/* it's a keyword */
- token.kind = TOKEN_KW;
- token.kw = kw;
- tokr_add(&t, &token);
+ Token *token = tokr_add(&t);
+ tokr_put_location(&t, token);
+ token->kind = TOKEN_KW;
+ token->kw = kw;
continue;
}
}
@@ -289,14 +283,14 @@ static bool tokenize_string(Tokenizer *tokr, char *str) {
/* check if it's a number */
if (isdigit(*t.s)) {
- /* it's a numeric constant */
+ /* it's a numeric literal */
int base = 10;
- FloatConst decimal_pow10;
- NumConst n;
- n.kind = NUM_CONST_INT;
+ FloatLiteral decimal_pow10;
+ NumLiteral n;
+ n.kind = NUM_LITERAL_INT;
n.intval = 0;
- Token token = {0};
- tokr_put_location(&t, &token);
+ Token *token = tokr_add(&t);
+ tokr_put_location(&t, token);
if (*t.s == '0') {
tokr_nextchar(&t);
/* octal/hexadecimal/binary (or zero) */
@@ -322,7 +316,7 @@ static bool tokenize_string(Tokenizer *tokr, char *str) {
while (1) {
if (*t.s == '.') {
- if (n.kind == NUM_CONST_FLOAT) {
+ if (n.kind == NUM_LITERAL_FLOAT) {
tokenization_err(&t, "Double . in number.");
goto err;
}
@@ -330,16 +324,16 @@ static bool tokenize_string(Tokenizer *tokr, char *str) {
tokenization_err(&t, "Decimal point in non base 10 number.");
goto err;
}
- n.kind = NUM_CONST_FLOAT;
+ n.kind = NUM_LITERAL_FLOAT;
decimal_pow10 = 0.1;
- n.floatval = (FloatConst)n.intval;
+ n.floatval = (FloatLiteral)n.intval;
tokr_nextchar(&t);
continue;
} else if (*t.s == 'e') {
tokr_nextchar(&t);
- if (n.kind == NUM_CONST_INT) {
- n.kind = NUM_CONST_FLOAT;
- n.floatval = (FloatConst)n.intval;
+ if (n.kind == NUM_LITERAL_INT) {
+ n.kind = NUM_LITERAL_FLOAT;
+ n.floatval = (FloatLiteral)n.intval;
}
/* TODO: check if exceeding maximum exponent */
int exponent = 0;
@@ -382,38 +376,37 @@ static bool tokenize_string(Tokenizer *tokr, char *str) {
tokenization_err(&t, "Digit %d cannot appear in a base %d number.", digit, base);
goto err;
}
- /* end of numeric constant */
+ /* end of numeric literal */
break;
}
switch (n.kind) {
- case NUM_CONST_INT:
- if (n.intval > ULLONG_MAX / (IntConst)base ||
- n.intval * (IntConst)base > ULLONG_MAX - (IntConst)digit) {
+ case NUM_LITERAL_INT:
+ if (n.intval > ULLONG_MAX / (IntLiteral)base ||
+ n.intval * (IntLiteral)base > ULLONG_MAX - (IntLiteral)digit) {
/* too big! */
- tokenization_err(&t, "Number too big to fit in a numeric constant.");
+ tokenization_err(&t, "Number too big to fit in a numeric literal.");
goto err;
}
- n.intval *= (IntConst)base;
- n.intval += (IntConst)digit;
+ n.intval *= (IntLiteral)base;
+ n.intval += (IntLiteral)digit;
break;
- case NUM_CONST_FLOAT:
- n.floatval += decimal_pow10 * (FloatConst)digit;
+ case NUM_LITERAL_FLOAT:
+ n.floatval += decimal_pow10 * (FloatLiteral)digit;
decimal_pow10 /= 10;
break;
}
tokr_nextchar(&t);
}
- token.kind = TOKEN_NUM_CONST;
- token.num = n;
- tokr_add(&t, &token);
+ token->kind = TOKEN_NUM_LITERAL;
+ token->num = n;
continue;
}
if (*t.s == '\'') {
- /* it's a character constant! */
+ /* it's a character literal! */
tokr_nextchar(&t);
- Token token = {0};
- tokr_put_location(&t, &token);
+ Token *token = tokr_add(&t);
+ tokr_put_location(&t, token);
char c;
if (*t.s == '\\') {
/* escape sequence */
@@ -428,20 +421,19 @@ static bool tokenize_string(Tokenizer *tokr, char *str) {
tokr_nextchar(&t);
}
if (*t.s != '\'') {
- tokenization_err(&t, "End of character constant expected.");
+ tokenization_err(&t, "End of character literal expected.");
goto err;
}
tokr_nextchar(&t);
- token.kind = TOKEN_CHAR_CONST;
- token.chr = c;
- tokr_add(&t, &token);
+ token->kind = TOKEN_CHAR_LITERAL;
+ token->chr = c;
continue;
}
if (*t.s == '"') {
- /* it's a string constant! */
- Token token;
- tokr_put_location(&t, &token);
+ /* it's a string literal! */
+ Token *token = tokr_add(&t);
+ tokr_put_location(&t, token);
tokr_nextchar(&t);
size_t len = 0;
size_t backslashes = 0;
@@ -450,7 +442,7 @@ static bool tokenize_string(Tokenizer *tokr, char *str) {
backslashes++;
} else if (*t.s == 0) {
/* return t to opening " so that we go to the next line */
- tokr_get_location(&t, &token);
+ tokr_get_location(&t, token);
tokenization_err(&t, "No matching \" found.");
goto err;
} else {
@@ -461,7 +453,7 @@ static bool tokenize_string(Tokenizer *tokr, char *str) {
}
char *str = malloc(len + 1);
char *strptr = str;
- tokr_get_location(&t, &token);
+ tokr_get_location(&t, token);
tokr_nextchar(&t); /* past opening " */
while (*t.s != '"') {
assert(*t.s);
@@ -479,31 +471,28 @@ static bool tokenize_string(Tokenizer *tokr, char *str) {
}
}
*strptr = 0;
- token.kind = TOKEN_STR_CONST;
- token.str.len = len;
- token.str.str = str;
- tokr_add(&t, &token);
+ token->kind = TOKEN_STR_LITERAL;
+ token->str.len = len;
+ token->str.str = str;
tokr_nextchar(&t); /* move past closing " */
continue;
}
if (isidentstart(*t.s)) {
/* it's an identifier */
- Token token = {0};
- tokr_put_location(&t, &token);
+ Token *token = tokr_add(&t);
+ tokr_put_location(&t, token);
Identifier ident = ident_insert(&t.s);
- token.kind = TOKEN_IDENT;
- token.ident = ident;
- tokr_add(&t, &token);
+ token->kind = TOKEN_IDENT;
+ token->ident = ident;
continue;
}
tokenization_err(&t, "Token not recognized");
err:
has_err = 1;
}
- Token token = {0};
- token.kind = TOKEN_EOF;
- tokr_add(&t, &token);
+ Token *token = tokr_add(&t);
+ token->kind = TOKEN_EOF;
t.token = t.tokens.data;
*tokr = t;
@@ -511,13 +500,13 @@ static bool tokenize_string(Tokenizer *tokr, char *str) {
}
static void tokr_free(Tokenizer *t) {
- arr_foreach(t->tokens, Token, token) {
+ arr_foreach(&t->tokens, Token, token) {
switch (token->kind) {
- case TOKEN_STR_CONST:
+ case TOKEN_STR_LITERAL:
free(token->str.str);
break;
default: break;
}
}
- tokens_clear(&t->tokens);
+ arr_clear(&t->tokens);
}
diff --git a/util/arr.c b/util/arr.c
index 7595b4b..aeeeb92 100644
--- a/util/arr.c
+++ b/util/arr.c
@@ -1,28 +1,43 @@
-#define arr_declaration(arr_type, type, prefix) typedef struct { \
- type *data; \
- size_t cap; \
- size_t len; \
- } arr_type; \
- static void prefix##create(arr_type *arr) { \
- arr->data = NULL; \
- arr->cap = 0; \
- arr->len = 0; \
- } \
- static void prefix##reserve(arr_type *arr, size_t n) { \
- arr->data = err_realloc(arr->data, n * sizeof(*arr->data)); \
- arr->cap = n; \
- } \
- static void prefix##add(arr_type *arr, type *item) { \
- if (arr->len >= arr->cap) { \
- prefix##reserve(arr, 2 * arr->len + 2); \
- } \
- arr->data[arr->len++] = *item; \
- } \
- static void prefix##clear(arr_type *arr) { \
- free(arr->data); \
- arr->data = NULL; \
- arr->cap = 0; \
- arr->len = 0; \
+typedef struct {
+ void *data;
+ void *last;
+ size_t len;
+ size_t cap;
+ size_t item_sz;
+} Array;
+
+void arr_create(Array *arr, size_t item_sz) {
+ arr->len = arr->cap = 0;
+ arr->item_sz = item_sz;
+ arr->data = NULL;
+ arr->last = NULL;
+}
+
+void arr_reserve(Array *arr, size_t n) {
+ arr->cap = n;
+ arr->data = realloc(arr->data, arr->item_sz * arr->cap);
+ arr->last = (void*)((char*)arr->data + arr->item_sz * (arr->len - 1));
+}
+
+void *arr_add(Array *arr) {
+ if (arr->len >= arr->cap) {
+ arr_reserve(arr, (arr->cap + 2) * 2);
}
+ arr->len++;
+ arr->last = (char*)arr->last + arr->item_sz;
+ void *item = arr->last;
+ return item;
+}
+
+void arr_free(Array *arr) {
+ free(arr->data);
+}
+
+void arr_clear(Array *arr) {
+ free(arr->data);
+ arr->len = arr->cap = 0;
+ arr->data = NULL;
+ arr->last = NULL;
+}
-#define arr_foreach(arr, type, var) for (type *var = (arr).data, *arr_iterate_last_ = (arr).data + ((arr).len - 1); var; (var == arr_iterate_last_) ? var = NULL : var++)
+#define arr_foreach(arr, type, var) for (type *var = (arr)->data; var; var == (arr)->last ? var = NULL : var++)
diff --git a/util/err.c b/util/err.c
index 5e296da..5385cf1 100644
--- a/util/err.c
+++ b/util/err.c
@@ -47,14 +47,15 @@ static void err_print_footer_(const char *context) {
}
/* Write nicely-formatted errors to the error file */
-static void err_print(LineNo line, const char *context, const char *fmt, ...) {
- err_print_header_(line);
- va_list args;
- va_start(args, fmt);
- err_vfprint(fmt, args);
- va_end(args);
- err_print_footer_(context);
-}
+
+/* static void err_print(LineNo line, const char *context, const char *fmt, ...) { */
+/* err_print_header_(line); */
+/* va_list args; */
+/* va_start(args, fmt); */
+/* err_vfprint(fmt, args); */
+/* va_end(args); */
+/* err_print_footer_(context); */
+/* } */
static void err_vprint(LineNo line, const char *context, const char *fmt, va_list args) {
err_print_header_(line);