diff options
-rw-r--r-- | 05/constants.b | 15 | ||||
-rw-r--r-- | 05/main.b | 11 | ||||
-rw-r--r-- | 05/main.c | 3 | ||||
-rw-r--r-- | 05/parse.b | 173 | ||||
-rw-r--r-- | 05/tokenize.b | 6 | ||||
-rw-r--r-- | 05/util.b | 1 |
6 files changed, 196 insertions, 13 deletions
diff --git a/05/constants.b b/05/constants.b index 6a0bba1..68481a1 100644 --- a/05/constants.b +++ b/05/constants.b @@ -30,6 +30,14 @@ ; highest ; NB: for equal precedence, operators are applied left-to-right except for assignment operators (precedence 2) + +; TOKENS +; tokens are 16 bytes and have the following format: +; uchar type +; uchar info +; ushort file +; uint line +; ulong data -- for int/float literals, the value; for string literals, the runtime address; for identifiers, the name of the identifier #define SYMBOL_COMMA 200 ; NOTE: operator_right_associative and others require SYMBOL_EQ to be the first assignment operator #define SYMBOL_EQ 201 @@ -79,6 +87,7 @@ #define SYMBOL_SEMICOLON 244 #define SYMBOL_DOT 245 + #define TOKEN_IDENTIFIER 1 #define TOKEN_CONSTANT_FLOAT 2 #define TOKEN_CONSTANT_INT 3 @@ -738,6 +747,10 @@ :str_union string union byte 0 -:str_typedef ; currently only used for nice debug output +; NB: some of these are only used for nice debug output +:str_typedef string typedef byte 0 +:str_return + string return + byte 0 @@ -1,3 +1,10 @@ +; @TODO: if we have, +; 1 extern int blah; +; 2 ... +; n int blah; +; give `blah` an address on line 1, then ignore declaration on line n + + ; add 24 + 16 = 40 to the stack pointer to put argc, argv in the right place byte 0x48 byte 0x81 @@ -52,6 +59,8 @@ global function_statements global statement_datas global statement_datas_ends global parse_stmt_depth +global expressions +global expressions_end #include util.b #include idents.b @@ -198,6 +207,8 @@ function main *1file_list = 255 object_macros = malloc(4000000) function_macros = malloc(4000000) + expressions = malloc(16000000) + expressions_end = expressions types = malloc(16000000) types_init(types, &types_bytes_used) @@ -1,5 +1,6 @@ int f(void) { -blah:blah:blah:; + lbl1:break;;; + continue;a:break;return;return 6+3<<sizeof(int); } @@ -173,13 +173,13 @@ function parse_tokens global function_stmt_data ; initialized in main global function_stmt_data_bytes_used - n = function_stmt_data_bytes_used - out = function_stmt_data + function_stmt_data_bytes_used + p = function_stmt_data + function_stmt_data_bytes_used + out = p parse_statement(&token, &out) if parse_stmt_depth != 0 goto stmtdepth_internal_err function_stmt_data_bytes_used = out - function_stmt_data - - ident_list_add(function_statements, name, n) + ident_list_add(function_statements, name, p) + print_statement(p) goto parse_tokens_loop :stmtdepth_internal_err @@ -304,6 +304,9 @@ function parse_statement c = *1token if c == SYMBOL_SEMICOLON goto stmt_empty if c == SYMBOL_LBRACE goto stmt_block + if c == KEYWORD_BREAK goto stmt_break + if c == KEYWORD_CONTINUE goto stmt_continue + if c == KEYWORD_RETURN goto stmt_return token_error(token, .str_unrecognized_statement) :str_unrecognized_statement @@ -313,6 +316,42 @@ function parse_statement *8p_token = token *8p_out = out return + :stmt_break + token += 16 + if *1token != SYMBOL_SEMICOLON goto break_no_semicolon + token += 16 + write_statement_header(out, STATEMENT_BREAK, token) + out += 40 + goto parse_statement_ret + :break_no_semicolon + token_error(token, .str_break_no_semicolon) + :str_break_no_semicolon + string No semicolon after break. + byte 0 + :stmt_continue + token += 16 + if *1token != SYMBOL_SEMICOLON goto continue_no_semicolon + token += 16 + write_statement_header(out, STATEMENT_CONTINUE, token) + out += 40 + goto parse_statement_ret + :continue_no_semicolon + token_error(token, .str_continue_no_semicolon) + :str_continue_no_semicolon + string No semicolon after continue. + byte 0 + :stmt_return + write_statement_header(out, STATEMENT_RETURN, token) + out += 8 + token += 16 + if *1token == SYMBOL_SEMICOLON goto return_no_expr + n = token_next_semicolon_not_in_brackets(token) + *8out = expressions_end + expressions_end = parse_expression(token, n, expressions_end) + token = n + 16 + :return_no_expr + out += 32 + goto parse_statement_ret :stmt_block local block_p_out ; find the appropriate statement data to use for this block's body @@ -356,6 +395,92 @@ function parse_statement token += 16 ; skip semicolon goto parse_statement_ret +function print_statement + argument statement + print_statement_with_depth(statement, 0) + return + +function print_statement_with_depth + argument statement + argument depth + local c + local dat1 + local dat2 + local dat3 + local dat4 + + c = depth + :print_stmt_indent_loop + if c == 0 goto print_stmt_indent_loop_end + putc(9) ; tab + c -= 1 + goto print_stmt_indent_loop + :print_stmt_indent_loop_end + + c = *1statement + dat1 = statement + 8 + dat1 = *8dat1 + dat2 = statement + 16 + dat2 = *8dat2 + dat3 = statement + 24 + dat3 = *8dat3 + dat4 = statement + 32 + dat4 = *8dat4 + + if c == STATEMENT_LABEL goto print_stmt_label + if c == STATEMENT_BLOCK goto print_stmt_block + if c == STATEMENT_CONTINUE goto print_stmt_continue + if c == STATEMENT_BREAK goto print_stmt_break + if c == STATEMENT_RETURN goto print_stmt_return + + die(.pristmtNI) + :pristmtNI + string print_statement not implemented. + byte 0 + :str_semicolon_newline + byte 59 + byte 10 + byte 0 + :print_stmt_label + puts(dat1) + putcln(':) + return + :print_stmt_break + puts(.str_stmt_break) + return + :str_stmt_break + string break + byte 59 ; semicolon + byte 10 + byte 0 + :print_stmt_continue + puts(.str_stmt_continue) + return + :str_stmt_continue + string continue + byte 59 ; semicolon + byte 10 + byte 0 + :print_stmt_return + puts(.str_return) + if dat1 == 0 goto print_ret_noexpr + putc(32) + print_expression(dat1) + :print_ret_noexpr + puts(.str_semicolon_newline) + return + :print_stmt_block + putcln('{) + depth += 1 + :print_block_loop + if *1dat1 == 0 goto print_block_loop_end + print_statement_with_depth(dat1, depth) + dat1 += 40 + goto print_block_loop + :print_block_loop_end + putcln('}) + return + ; parse a global variable's initializer ; e.g. int x[5] = {1+8, 2, 3, 4, 5}; ; advances *p_token to the token right after the initializer @@ -741,6 +866,43 @@ function token_reverse_to_matching_lparen return +; return the next semicolon not in parentheses, square brackets, or braces. +function token_next_semicolon_not_in_brackets + argument token0 + + local token + local depth + local c + + depth = 0 + token = token0 + :next_semicolon_loop + c = *1token + if c == TOKEN_EOF goto next_semicolon_eof + if depth != 0 goto next_semicolon_nocheck + if c == SYMBOL_SEMICOLON goto next_semicolon_loop_end + :next_semicolon_nocheck + token += 16 + if c == SYMBOL_LPAREN goto next_semicolon_incdepth + if c == SYMBOL_RPAREN goto next_semicolon_decdepth + if c == SYMBOL_LSQUARE goto next_semicolon_incdepth + if c == SYMBOL_RSQUARE goto next_semicolon_decdepth + if c == SYMBOL_LBRACE goto next_semicolon_incdepth + if c == SYMBOL_RBRACE goto next_semicolon_decdepth + goto next_semicolon_loop + :next_semicolon_incdepth + depth += 1 + goto next_semicolon_loop + :next_semicolon_decdepth + depth -= 1 + goto next_semicolon_loop + :next_semicolon_loop_end + return token + :next_semicolon_eof + token_error(token0, .str_next_semicolon_eof) + :str_next_semicolon_eof + string End of file found while searching for semicolon. + byte 0 ; we split types into base (B), prefix (P) and suffix (S) ; struct Thing (*things[5])(void), *something_else[3]; ; BBBBBBBBBBBB PP SSSSSSSSSS P SSS @@ -1426,7 +1588,8 @@ function type_create_pointer p = id + 1 types_bytes_used += type_copy_ids(p, type) return id - + +; returns pointer to end of expression data function parse_expression argument tokens argument tokens_end diff --git a/05/tokenize.b b/05/tokenize.b index 977b162..794232e 100644 --- a/05/tokenize.b +++ b/05/tokenize.b @@ -98,12 +98,6 @@ function get_keyword_str ; turn pptokens into tokens, written to out. -; tokens are 16 bytes and have the following format: -; uchar type -; uchar info -; ushort file -; uint line -; ulong data -- for int/float literals, the value; for string literals, the runtime address; for identifiers, the name of the identifier ; This corresponds to translation phases 5-6 and the first half of 7 ; IMPORTANT: this function uses pointers to pptokens, so it should NOT be freed! ; Returns a pointer to the end of tokens. @@ -137,6 +137,7 @@ function file_error function die argument message fputs(2, message) + fputc(2, 10) exit(1) function ftruncate |