diff options
author | Leo Tenenbaum <pommicket@gmail.com> | 2019-08-19 15:40:46 -0400 |
---|---|---|
committer | Leo Tenenbaum <pommicket@gmail.com> | 2019-08-19 15:40:46 -0400 |
commit | 4ac607e47f0b046f627aaa1df618c43bf096260e (patch) | |
tree | fcde7844b26dee841177ab61f202636ba8f83543 | |
parent | 00cb291c4bf2c64342b00152e58f2544e66ddb2c (diff) |
Added basic unary/binary expressions
-rwxr-xr-x | build.sh | 3 | ||||
-rw-r--r-- | main.c | 8 | ||||
-rw-r--r-- | parse.c | 351 | ||||
-rw-r--r-- | test.toc | 2 | ||||
-rw-r--r-- | tokenizer.c | 171 | ||||
-rw-r--r-- | util/arr.c | 67 | ||||
-rw-r--r-- | util/err.c | 17 |
7 files changed, 425 insertions, 194 deletions
@@ -1,3 +1,2 @@ #!/bin/bash - -gcc -o toc main.c -g -o toc -Wall -Wextra -Wpedantic -Wconversion -Wno-unused-function -std=c11 || exit 1 +gcc -o toc main.c -g -o toc -Wall -Wextra -Wpedantic -Wconversion -std=c11 || exit 1 @@ -9,6 +9,7 @@ #include <stdbool.h> #include "util/err.c" #include "util/arr.c" +#include "util/blockarr.c" #include "identifiers.c" #include "tokenizer.c" #include "parse.c" @@ -50,15 +51,16 @@ int main(int argc, char **argv) { return EXIT_FAILURE; } - arr_foreach(t.tokens, Token, token) { + arr_foreach(&t.tokens, Token, token) { if (token != t.tokens.data) printf(" "); token_fprint(stdout, token); } printf("\n"); - + Parser p; + parser_from_tokenizer(&p, &t); ParsedFile f; - if (!parse_file(&f, &t)) { + if (!file_parse(&f, &p)) { err_fprint(TEXT_IMPORTANT("Errors occured while parsing.\n")); return EXIT_FAILURE; } @@ -20,6 +20,7 @@ typedef enum { typedef struct { + Location where; TypeKind kind; union { BuiltinType builtin; @@ -27,52 +28,84 @@ typedef struct { } Type; typedef enum { - EXPR_INT_CONST, - EXPR_FLOAT_CONST + EXPR_INT_LITERAL, + EXPR_FLOAT_LITERAL, + EXPR_BINARY_OP, + EXPR_UNARY_OP } ExprKind; -typedef struct { +typedef enum { + UNARY_MINUS +} UnaryOp; + +typedef enum { + BINARY_PLUS, + BINARY_MINUS +} BinaryOp; + +#define EXPR_FLAG_FLEXIBLE 0x01 + +typedef struct Expression { + Location where; ExprKind kind; Type type; - bool is_flexible_num:1; /* expressions like 5 or 7*8+3 can be any numerical type */ + uint16_t flags; union { - FloatConst floatc; - IntConst intc; + FloatLiteral floatl; + IntLiteral intl; + struct { + UnaryOp op; + struct Expression *of; + } unary; + struct { + BinaryOp op; + struct Expression *lhs; + struct Expression *rhs; + } binary; }; } Expression; +#define DECL_FLAG_INFER_TYPE 0x01 +#define DECL_FLAG_CONST 0x02 +#define DECL_FLAG_HAS_EXPR 0x04 typedef struct { Location where; Identifier var; Type type; Expression expr; - bool infer_type:1; - bool is_const:1; - bool has_expr:1; + uint16_t flags; } Declaration; /* OPTIM: Instead of using dynamic arrays, do two passes. */ -arr_declaration(Declarations, Declaration, decls_) - typedef enum { STMT_DECLS } StatementKind; typedef struct { - StatementKind kind; Location where; + StatementKind kind; union { - Declarations decls; + Array decls; }; } Statement; -arr_declaration(Statements, Statement, stmts_) - typedef struct { - Statements stmts; + Array stmts; } ParsedFile; +typedef struct { + Tokenizer *tokr; + BlockArr exprs; /* a dynamic array of expressions, so that we don't need to call malloc every time we make an expression */ +} Parser; + +/* +allocate a new expression. +IMPORTANT: This invalidates all other parser-allocated Expression pointers. + */ +static Expression *parser_new_expr(Parser *p) { + return block_arr_add(&p->exprs); +} /* returns BUILTIN_TYPE_COUNT on failure */ static BuiltinType kw_to_builtin_type(Keyword kw) { @@ -93,7 +126,29 @@ static BuiltinType kw_to_builtin_type(Keyword kw) { } } -static bool parse_type(Type *type, Tokenizer *t) { +static Keyword builtin_type_to_kw(BuiltinType t) { + switch (t) { + case BUILTIN_INT: return KW_INT; + case BUILTIN_I8: return KW_I8; + case BUILTIN_I16: return KW_I16; + case BUILTIN_I32: return KW_I32; + case BUILTIN_I64: return KW_I64; + case BUILTIN_U8: return KW_U8; + case BUILTIN_U16: return KW_U16; + case BUILTIN_U32: return KW_U32; + case BUILTIN_U64: return KW_U64; + case BUILTIN_FLOAT: return KW_FLOAT; + case BUILTIN_F32: return KW_F32; + case BUILTIN_F64: return KW_F64; + case BUILTIN_TYPE_COUNT: break; + } + assert(0); + return KW_COUNT; +} + +static bool type_parse(Type *type, Parser *p) { + Tokenizer *t = p->tokr; + type->where = t->token->where; switch (t->token->kind) { case TOKEN_KW: type->kind = TYPE_BUILTIN; @@ -112,27 +167,45 @@ static bool parse_type(Type *type, Tokenizer *t) { return false; } -static bool parse_expr(Expression *e, Tokenizer *t, Token *end) { +#define NOT_AN_OP -1 +static int op_precedence(Keyword op) { + switch (op) { + case KW_PLUS: + return 10; + case KW_MINUS: + return 20; + default: + return NOT_AN_OP; + } +} + +static bool expr_parse(Expression *e, Parser *p, Token *end) { + Tokenizer *t = p->tokr; if (end == NULL) return false; - memset(e, 0, sizeof *e); + e->flags = 0; + e->where = t->token->where; + if (end <= t->token) { + tokr_err(t, "Empty expression."); + return false; + } if (end - t->token == 1) { /* 1-token expression */ switch (t->token->kind) { - case TOKEN_NUM_CONST: { - NumConst *num = &t->token->num; + case TOKEN_NUM_LITERAL: { + NumLiteral *num = &t->token->num; switch (num->kind) { - case NUM_CONST_FLOAT: - e->kind = EXPR_FLOAT_CONST; + case NUM_LITERAL_FLOAT: + e->kind = EXPR_FLOAT_LITERAL; e->type.kind = TYPE_BUILTIN; e->type.builtin = BUILTIN_FLOAT; - e->floatc = num->floatval; + e->floatl = num->floatval; break; - case NUM_CONST_INT: - e->kind = EXPR_INT_CONST; - e->is_flexible_num = true; + case NUM_LITERAL_INT: + e->kind = EXPR_INT_LITERAL; + e->flags |= EXPR_FLAG_FLEXIBLE; e->type.kind = TYPE_BUILTIN; e->type.builtin = BUILTIN_INT; /* TODO: if it's too big, use a u64 instead. */ - e->floatc = num->intval; + e->intl = num->intval; break; } } break; @@ -143,9 +216,104 @@ static bool parse_expr(Expression *e, Tokenizer *t, Token *end) { t->token = end; return true; } - /* TODO */ - tokr_err(t, "multi-token exprs not supported yet."); - return false; + /* Find the lowest-precedence operator not in parentheses */ + int paren_level = 0; + int lowest_precedence = NOT_AN_OP; + Token *lowest_precedence_op; + for (Token *token = t->token; token < end; token++) { + if (token->kind == TOKEN_KW) { + switch (token->kw) { + case KW_LPAREN: + paren_level++; + break; + case KW_RPAREN: + paren_level--; + if (paren_level < 0) { + t->token = token; + tokr_err(t, "Excessive closing parenthesis."); + } + break; + default: { /* OPTIM: use individual cases for each op */ + int precedence = op_precedence(token->kw); + if (precedence == NOT_AN_OP) break; /* nvm it's not an operator */ + if (lowest_precedence == NOT_AN_OP || precedence <= lowest_precedence) { + lowest_precedence = precedence; + lowest_precedence_op = token; + } + } break; + } + } + } + if (lowest_precedence == NOT_AN_OP) { + /* function calls, array accesses, etc. OR + something like (5+3)*/ + tokr_err(t, "Not implemented yet."); + return false; + } + + /* This is a unary op not a binary one. */ + while (lowest_precedence_op != t->token + && lowest_precedence_op[-1].kind == TOKEN_KW + && op_precedence(lowest_precedence_op[-1].kw) != NOT_AN_OP) { + lowest_precedence_op--; + } + + /* Unary */ + if (lowest_precedence_op == t->token) { + UnaryOp op; + bool is_unary; + switch (lowest_precedence_op->kw) { + case KW_PLUS: + /* unary + is ignored entirely */ + t->token++; + /* re-parse this expression without + */ + return expr_parse(e, p, end); + case KW_MINUS: + is_unary = true; + op = UNARY_MINUS; + break; + default: + is_unary = false; + break; + } + if (!is_unary) { + tokr_err(t, "%s is not a unary operator.", keywords[lowest_precedence_op->kw]); + return false; + } + e->unary.op = op; + e->kind = EXPR_UNARY_OP; + t->token++; + Expression *of = parser_new_expr(p); + e->unary.of = of; + return expr_parse(of, p, end); + } + + + BinaryOp op; + switch (lowest_precedence_op->kw) { + case KW_PLUS: + op = BINARY_PLUS; + break; + case KW_MINUS: + op = BINARY_MINUS; + break; + default: assert(0); break; + } + e->binary.op = op; + e->kind = EXPR_BINARY_OP; + + Expression *lhs = parser_new_expr(p); + e->binary.lhs = lhs; + if (!expr_parse(lhs, p, lowest_precedence_op)) + return false; + + Expression *rhs = parser_new_expr(p); + t->token = lowest_precedence_op + 1; + e->binary.rhs = rhs; + if (!expr_parse(rhs, p, end)) + return false; + + return true; } /* @@ -156,7 +324,8 @@ typedef enum { EXPR_END_RPAREN_OR_COMMA, EXPR_END_SEMICOLON } ExprEndKind; -static Token *expr_find_end(Tokenizer *t, ExprEndKind ends_with) { +static Token *expr_find_end(Parser *p, ExprEndKind ends_with) { + Tokenizer *t = p->tokr; long bracket_level = 0; Token *token = t->token; while (1) { @@ -187,7 +356,7 @@ static Token *expr_find_end(Tokenizer *t, ExprEndKind ends_with) { return NULL; case EXPR_END_RPAREN_OR_COMMA: if (bracket_level > 0) { - tokr_err(t, "Mismatched parentheses."); /* FEATURE: Find out where this is */ + tokr_err(t, "Opening parenthesis was never closed."); /* FEATURE: Find out where this is */ return NULL; } else { tokr_err(t, "Could not find ')' or ',' at end of expression."); @@ -200,17 +369,19 @@ static Token *expr_find_end(Tokenizer *t, ExprEndKind ends_with) { } } -static bool parse_decls(Declarations *ds, Tokenizer *t) { - decls_create(ds); +static bool decls_parse(Array *ds, Parser *p) { + Tokenizer *t = p->tokr; + arr_create(ds, sizeof(Declaration)); while (1) { - Declaration decl = {0}; + Declaration *decl = arr_add(ds); if (t->token->kind != TOKEN_IDENT) { tokr_err(t, "Cannot declare non-identifier."); return false; } - decl.where = t->token->where; - decl.var = t->token->ident; + decl->where = t->token->where; + decl->var = t->token->ident; + decl->flags = 0; t->token++; if (!token_is_kw(t->token, KW_COLON)) { @@ -222,33 +393,31 @@ static bool parse_decls(Declarations *ds, Tokenizer *t) { if (!token_is_kw(t->token, KW_MINUS) && !token_is_kw(t->token, KW_EQ) && !token_is_kw(t->token, KW_SEMICOLON)) { - if (!parse_type(&decl.type, t)) + if (!type_parse(&decl->type, p)) return false; } else { - decl.infer_type = true; + decl->flags |= DECL_FLAG_INFER_TYPE; } if (token_is_kw(t->token, KW_SEMICOLON)) { - if (decl.infer_type) { + if (decl->flags & DECL_FLAG_INFER_TYPE) { tokr_err(t, "Cannot infer type without expression."); return false; } } else if (token_is_kw(t->token, KW_EQ)) { t->token++; - if (!parse_expr(&decl.expr, t, expr_find_end(t, EXPR_END_SEMICOLON))) + if (!expr_parse(&decl->expr, p, expr_find_end(p, EXPR_END_SEMICOLON))) return false; - decl.has_expr = true; + decl->flags |= DECL_FLAG_HAS_EXPR; } else if (token_is_kw(t->token, KW_MINUS)) { t->token++; - if (!parse_expr(&decl.expr, t, expr_find_end(t, EXPR_END_SEMICOLON))) + if (!expr_parse(&decl->expr, p, expr_find_end(p, EXPR_END_SEMICOLON))) return false; - decl.has_expr = true; - decl.is_const = true; + decl->flags |= DECL_FLAG_HAS_EXPR | DECL_FLAG_CONST; } else { tokr_err(t, "Expected ';', '=', or '-' in delaration."); return false; } - decls_add(ds, &decl); if (token_is_kw(t->token, KW_SEMICOLON)) { t->token++; break; @@ -262,49 +431,103 @@ static bool parse_decls(Declarations *ds, Tokenizer *t) { return true; } -static bool parse_stmt(Statement *s, Tokenizer *t) { +static bool stmt_parse(Statement *s, Parser *p) { + Tokenizer *t = p->tokr; + s->where = t->token->where; if (token_is_kw(t->token + 1, KW_COLON)) { - return parse_decls(&s->decls, t); + s->kind = STMT_DECLS; + return decls_parse(&s->decls, p); } else { tokr_err(t, "Unreocgnized statement."); return false; } } -static bool parse_file(ParsedFile *f, Tokenizer *t) { - stmts_create(&f->stmts); +static void parser_from_tokenizer(Parser *p, Tokenizer *t) { + p->tokr = t; + block_arr_create(&p->exprs, 10, sizeof(Expression)); /* block size = 1024 */ +} + +static bool file_parse(ParsedFile *f, Parser *p) { + Tokenizer *t = p->tokr; + arr_create(&f->stmts, sizeof(Statement)); bool ret = true; while (t->token->kind != TOKEN_EOF) { - Statement stmt = {0}; - if (!parse_stmt(&stmt, t)) + Statement *stmt = arr_add(&f->stmts); + if (!stmt_parse(stmt, p)) ret = false; - stmts_add(&f->stmts, &stmt); } return ret; } +#define PARSE_PRINT_LOCATION(l) //fprintf(out, "[l%lu]", (unsigned long)(l).line); + static void expr_fprint(FILE *out, Expression *e) { - /* TODO */ -/* switch (e->kind) { */ -/* case : */ -/* } */ + PARSE_PRINT_LOCATION(e->where); + switch (e->kind) { + case EXPR_INT_LITERAL: + fprintf(out, "%lld", (long long)e->intl); + break; + case EXPR_FLOAT_LITERAL: + fprintf(out, "%f", (double)e->floatl); + break; + case EXPR_BINARY_OP: + switch (e->binary.op) { + case BINARY_PLUS: + fprintf(out, "add"); + break; + case BINARY_MINUS: + fprintf(out, "subtract"); + break; + } + fprintf(out, "("); + expr_fprint(out, e->binary.lhs); + fprintf(out, ","); + expr_fprint(out, e->binary.rhs); + fprintf(out, ")"); + break; + case EXPR_UNARY_OP: + switch (e->unary.op) { + case UNARY_MINUS: + fprintf(out, "negate"); + break; + } + fprintf(out, "("); + expr_fprint(out, e->unary.of); + fprintf(out, ")"); + } +} + +static void type_fprint(FILE *out, Type *t) { + PARSE_PRINT_LOCATION(t->where); + switch (t->kind) { + case TYPE_BUILTIN: + fprintf(out, "%s", keywords[builtin_type_to_kw(t->builtin)]); + break; + } } static void decl_fprint(FILE *out, Declaration *d) { - fprintf(out, "l%lu:", (unsigned long)d->where.line); + PARSE_PRINT_LOCATION(d->where); ident_fprint(out, d->var); - if (d->is_const) { + if (d->flags & DECL_FLAG_CONST) { fprintf(out, "[const]"); } - if (d->has_expr) { + fprintf(out, ":"); + if (!(d->flags & DECL_FLAG_INFER_TYPE)) { + type_fprint(out, &d->type); + } + if (d->flags & DECL_FLAG_HAS_EXPR) { fprintf(out, "="); + expr_fprint(out, &d->expr); } } static void stmt_fprint(FILE *out, Statement *s) { + PARSE_PRINT_LOCATION(s->where); switch (s->kind) { case STMT_DECLS: - arr_foreach(s->decls, Declaration, decl) { + arr_foreach(&s->decls, Declaration, decl) { if (decl != s->decls.data) { fprintf(out, ", "); } @@ -316,7 +539,9 @@ static void stmt_fprint(FILE *out, Statement *s) { } static void parsed_file_fprint(FILE *out, ParsedFile *f) { - arr_foreach(f->stmts, Statement, stmt) { + arr_foreach(&f->stmts, Statement, stmt) { stmt_fprint(out, stmt); } } + +/* TODO: Freeing parser */ @@ -1,3 +1,3 @@ foo:i8=3; -foo:i8=3; +foo:i8=3-+++---+++---3; a:float-4;
\ No newline at end of file diff --git a/tokenizer.c b/tokenizer.c index e65b2e3..587d080 100644 --- a/tokenizer.c +++ b/tokenizer.c @@ -1,9 +1,9 @@ typedef enum { TOKEN_KW, TOKEN_IDENT, - TOKEN_NUM_CONST, - TOKEN_CHAR_CONST, - TOKEN_STR_CONST, + TOKEN_NUM_LITERAL, + TOKEN_CHAR_LITERAL, + TOKEN_STR_LITERAL, TOKEN_EOF } TokenKind; @@ -21,6 +21,7 @@ typedef enum { KW_LT, KW_LE, KW_MINUS, + KW_PLUS, KW_INT, KW_I8, KW_I16, @@ -37,7 +38,7 @@ typedef enum { } Keyword; static const char *keywords[KW_COUNT] = - {";", "=", ":", ",", "fn", "(", ")", "{", "}", "==", "<", "<=", "-", + {";", "=", ":", ",", "fn", "(", ")", "{", "}", "==", "<", "<=", "-", "+", "int", "i8", "i16", "i32", "i64", "u8", "u16", "u32", "u64", "float", "f32", "f64"}; @@ -54,29 +55,26 @@ static Keyword tokenize_keyword(char **s) { return KW_COUNT; } -#define TOKR_USE_LLONG 1 - -typedef unsigned long long IntConst; - -typedef long double FloatConst; /* OPTIM: Switch to double */ +typedef unsigned long long IntLiteral; +typedef long double FloatLiteral; /* OPTIM: Switch to double */ typedef enum { - NUM_CONST_INT, - NUM_CONST_FLOAT -} NumConstKind; + NUM_LITERAL_INT, + NUM_LITERAL_FLOAT +} NumLiteralKind; typedef struct { - NumConstKind kind; + NumLiteralKind kind; union { - IntConst intval; - FloatConst floatval; + IntLiteral intval; + FloatLiteral floatval; }; -} NumConst; +} NumLiteral; typedef struct { char *str; size_t len; -} StrConst; +} StrLiteral; typedef struct { LineNo line; @@ -90,16 +88,14 @@ typedef struct { union { Keyword kw; Identifier ident; - NumConst num; + NumLiteral num; char chr; - StrConst str; + StrLiteral str; }; } Token; -arr_declaration(Tokens, Token, tokens_) - typedef struct { - Tokens tokens; + Array tokens; char *s; /* string being parsed */ LineNo line; Token *token; /* token currently being processed */ @@ -121,21 +117,21 @@ static void token_fprint(FILE *out, Token *t) { fprintf(out, "identifier: %ld:", t->ident->id); ident_fprint(out, t->ident); break; - case TOKEN_NUM_CONST: + case TOKEN_NUM_LITERAL: fprintf(out, "number: "); switch (t->num.kind) { - case NUM_CONST_INT: + case NUM_LITERAL_INT: fprintf(out, "%llu", t->num.intval); break; - case NUM_CONST_FLOAT: + case NUM_LITERAL_FLOAT: fprintf(out, "%g", (double)t->num.floatval); break; } break; - case TOKEN_CHAR_CONST: + case TOKEN_CHAR_LITERAL: fprintf(out, "char: '%c' (%d)", t->chr, t->chr); break; - case TOKEN_STR_CONST: + case TOKEN_STR_LITERAL: fprintf(out, "str: \"%s\"", t->str.str); break; case TOKEN_EOF: @@ -144,12 +140,11 @@ static void token_fprint(FILE *out, Token *t) { } } -static void tokr_add(Tokenizer *t, Token *token) { - if (!token->where.line) - token->where.line = t->line; - if (!token->where.code) - token->where.code = t->s; - tokens_add(&t->tokens, token); +static Token *tokr_add(Tokenizer *t) { + Token *token = arr_add(&t->tokens); + token->where.line = t->line; + token->where.code = t->s; + return token; } static void tokr_nextchar(Tokenizer *t) { @@ -224,8 +219,8 @@ static void tokr_get_location(Tokenizer *tokr, Token *t) { static bool tokenize_string(Tokenizer *tokr, char *str) { int has_err = 0; Tokenizer t; - tokens_create(&t.tokens); - tokens_reserve(&t.tokens, 256); + arr_create(&t.tokens, sizeof(Token)); + arr_reserve(&t.tokens, 256); t.s = str; t.line = 1; @@ -274,14 +269,13 @@ static bool tokenize_string(Tokenizer *tokr, char *str) { if (is_comment) continue; } { - Token token = {0}; - tokr_put_location(&t, &token); Keyword kw = tokenize_keyword(&t.s); if (kw != KW_COUNT) { /* it's a keyword */ - token.kind = TOKEN_KW; - token.kw = kw; - tokr_add(&t, &token); + Token *token = tokr_add(&t); + tokr_put_location(&t, token); + token->kind = TOKEN_KW; + token->kw = kw; continue; } } @@ -289,14 +283,14 @@ static bool tokenize_string(Tokenizer *tokr, char *str) { /* check if it's a number */ if (isdigit(*t.s)) { - /* it's a numeric constant */ + /* it's a numeric literal */ int base = 10; - FloatConst decimal_pow10; - NumConst n; - n.kind = NUM_CONST_INT; + FloatLiteral decimal_pow10; + NumLiteral n; + n.kind = NUM_LITERAL_INT; n.intval = 0; - Token token = {0}; - tokr_put_location(&t, &token); + Token *token = tokr_add(&t); + tokr_put_location(&t, token); if (*t.s == '0') { tokr_nextchar(&t); /* octal/hexadecimal/binary (or zero) */ @@ -322,7 +316,7 @@ static bool tokenize_string(Tokenizer *tokr, char *str) { while (1) { if (*t.s == '.') { - if (n.kind == NUM_CONST_FLOAT) { + if (n.kind == NUM_LITERAL_FLOAT) { tokenization_err(&t, "Double . in number."); goto err; } @@ -330,16 +324,16 @@ static bool tokenize_string(Tokenizer *tokr, char *str) { tokenization_err(&t, "Decimal point in non base 10 number."); goto err; } - n.kind = NUM_CONST_FLOAT; + n.kind = NUM_LITERAL_FLOAT; decimal_pow10 = 0.1; - n.floatval = (FloatConst)n.intval; + n.floatval = (FloatLiteral)n.intval; tokr_nextchar(&t); continue; } else if (*t.s == 'e') { tokr_nextchar(&t); - if (n.kind == NUM_CONST_INT) { - n.kind = NUM_CONST_FLOAT; - n.floatval = (FloatConst)n.intval; + if (n.kind == NUM_LITERAL_INT) { + n.kind = NUM_LITERAL_FLOAT; + n.floatval = (FloatLiteral)n.intval; } /* TODO: check if exceeding maximum exponent */ int exponent = 0; @@ -382,38 +376,37 @@ static bool tokenize_string(Tokenizer *tokr, char *str) { tokenization_err(&t, "Digit %d cannot appear in a base %d number.", digit, base); goto err; } - /* end of numeric constant */ + /* end of numeric literal */ break; } switch (n.kind) { - case NUM_CONST_INT: - if (n.intval > ULLONG_MAX / (IntConst)base || - n.intval * (IntConst)base > ULLONG_MAX - (IntConst)digit) { + case NUM_LITERAL_INT: + if (n.intval > ULLONG_MAX / (IntLiteral)base || + n.intval * (IntLiteral)base > ULLONG_MAX - (IntLiteral)digit) { /* too big! */ - tokenization_err(&t, "Number too big to fit in a numeric constant."); + tokenization_err(&t, "Number too big to fit in a numeric literal."); goto err; } - n.intval *= (IntConst)base; - n.intval += (IntConst)digit; + n.intval *= (IntLiteral)base; + n.intval += (IntLiteral)digit; break; - case NUM_CONST_FLOAT: - n.floatval += decimal_pow10 * (FloatConst)digit; + case NUM_LITERAL_FLOAT: + n.floatval += decimal_pow10 * (FloatLiteral)digit; decimal_pow10 /= 10; break; } tokr_nextchar(&t); } - token.kind = TOKEN_NUM_CONST; - token.num = n; - tokr_add(&t, &token); + token->kind = TOKEN_NUM_LITERAL; + token->num = n; continue; } if (*t.s == '\'') { - /* it's a character constant! */ + /* it's a character literal! */ tokr_nextchar(&t); - Token token = {0}; - tokr_put_location(&t, &token); + Token *token = tokr_add(&t); + tokr_put_location(&t, token); char c; if (*t.s == '\\') { /* escape sequence */ @@ -428,20 +421,19 @@ static bool tokenize_string(Tokenizer *tokr, char *str) { tokr_nextchar(&t); } if (*t.s != '\'') { - tokenization_err(&t, "End of character constant expected."); + tokenization_err(&t, "End of character literal expected."); goto err; } tokr_nextchar(&t); - token.kind = TOKEN_CHAR_CONST; - token.chr = c; - tokr_add(&t, &token); + token->kind = TOKEN_CHAR_LITERAL; + token->chr = c; continue; } if (*t.s == '"') { - /* it's a string constant! */ - Token token; - tokr_put_location(&t, &token); + /* it's a string literal! */ + Token *token = tokr_add(&t); + tokr_put_location(&t, token); tokr_nextchar(&t); size_t len = 0; size_t backslashes = 0; @@ -450,7 +442,7 @@ static bool tokenize_string(Tokenizer *tokr, char *str) { backslashes++; } else if (*t.s == 0) { /* return t to opening " so that we go to the next line */ - tokr_get_location(&t, &token); + tokr_get_location(&t, token); tokenization_err(&t, "No matching \" found."); goto err; } else { @@ -461,7 +453,7 @@ static bool tokenize_string(Tokenizer *tokr, char *str) { } char *str = malloc(len + 1); char *strptr = str; - tokr_get_location(&t, &token); + tokr_get_location(&t, token); tokr_nextchar(&t); /* past opening " */ while (*t.s != '"') { assert(*t.s); @@ -479,31 +471,28 @@ static bool tokenize_string(Tokenizer *tokr, char *str) { } } *strptr = 0; - token.kind = TOKEN_STR_CONST; - token.str.len = len; - token.str.str = str; - tokr_add(&t, &token); + token->kind = TOKEN_STR_LITERAL; + token->str.len = len; + token->str.str = str; tokr_nextchar(&t); /* move past closing " */ continue; } if (isidentstart(*t.s)) { /* it's an identifier */ - Token token = {0}; - tokr_put_location(&t, &token); + Token *token = tokr_add(&t); + tokr_put_location(&t, token); Identifier ident = ident_insert(&t.s); - token.kind = TOKEN_IDENT; - token.ident = ident; - tokr_add(&t, &token); + token->kind = TOKEN_IDENT; + token->ident = ident; continue; } tokenization_err(&t, "Token not recognized"); err: has_err = 1; } - Token token = {0}; - token.kind = TOKEN_EOF; - tokr_add(&t, &token); + Token *token = tokr_add(&t); + token->kind = TOKEN_EOF; t.token = t.tokens.data; *tokr = t; @@ -511,13 +500,13 @@ static bool tokenize_string(Tokenizer *tokr, char *str) { } static void tokr_free(Tokenizer *t) { - arr_foreach(t->tokens, Token, token) { + arr_foreach(&t->tokens, Token, token) { switch (token->kind) { - case TOKEN_STR_CONST: + case TOKEN_STR_LITERAL: free(token->str.str); break; default: break; } } - tokens_clear(&t->tokens); + arr_clear(&t->tokens); } @@ -1,28 +1,43 @@ -#define arr_declaration(arr_type, type, prefix) typedef struct { \ - type *data; \ - size_t cap; \ - size_t len; \ - } arr_type; \ - static void prefix##create(arr_type *arr) { \ - arr->data = NULL; \ - arr->cap = 0; \ - arr->len = 0; \ - } \ - static void prefix##reserve(arr_type *arr, size_t n) { \ - arr->data = err_realloc(arr->data, n * sizeof(*arr->data)); \ - arr->cap = n; \ - } \ - static void prefix##add(arr_type *arr, type *item) { \ - if (arr->len >= arr->cap) { \ - prefix##reserve(arr, 2 * arr->len + 2); \ - } \ - arr->data[arr->len++] = *item; \ - } \ - static void prefix##clear(arr_type *arr) { \ - free(arr->data); \ - arr->data = NULL; \ - arr->cap = 0; \ - arr->len = 0; \ +typedef struct { + void *data; + void *last; + size_t len; + size_t cap; + size_t item_sz; +} Array; + +void arr_create(Array *arr, size_t item_sz) { + arr->len = arr->cap = 0; + arr->item_sz = item_sz; + arr->data = NULL; + arr->last = NULL; +} + +void arr_reserve(Array *arr, size_t n) { + arr->cap = n; + arr->data = realloc(arr->data, arr->item_sz * arr->cap); + arr->last = (void*)((char*)arr->data + arr->item_sz * (arr->len - 1)); +} + +void *arr_add(Array *arr) { + if (arr->len >= arr->cap) { + arr_reserve(arr, (arr->cap + 2) * 2); } + arr->len++; + arr->last = (char*)arr->last + arr->item_sz; + void *item = arr->last; + return item; +} + +void arr_free(Array *arr) { + free(arr->data); +} + +void arr_clear(Array *arr) { + free(arr->data); + arr->len = arr->cap = 0; + arr->data = NULL; + arr->last = NULL; +} -#define arr_foreach(arr, type, var) for (type *var = (arr).data, *arr_iterate_last_ = (arr).data + ((arr).len - 1); var; (var == arr_iterate_last_) ? var = NULL : var++) +#define arr_foreach(arr, type, var) for (type *var = (arr)->data; var; var == (arr)->last ? var = NULL : var++) @@ -47,14 +47,15 @@ static void err_print_footer_(const char *context) { } /* Write nicely-formatted errors to the error file */ -static void err_print(LineNo line, const char *context, const char *fmt, ...) { - err_print_header_(line); - va_list args; - va_start(args, fmt); - err_vfprint(fmt, args); - va_end(args); - err_print_footer_(context); -} + +/* static void err_print(LineNo line, const char *context, const char *fmt, ...) { */ +/* err_print_header_(line); */ +/* va_list args; */ +/* va_start(args, fmt); */ +/* err_vfprint(fmt, args); */ +/* va_end(args); */ +/* err_print_footer_(context); */ +/* } */ static void err_vprint(LineNo line, const char *context, const char *fmt, va_list args) { err_print_header_(line); |