summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorpommicket <pommicket@gmail.com>2022-02-04 19:31:45 -0500
committerpommicket <pommicket@gmail.com>2022-02-04 19:31:45 -0500
commitdfce9118b9cbfae1fb287f5e4488b971407522cd (patch)
treea0497c7c78b68ee90797a070c48b1f46b6f76473
parentd1167f03d03c2a6ab75fce410706e3098dfd3090 (diff)
parsing break, continue, return
-rw-r--r--05/constants.b15
-rw-r--r--05/main.b11
-rw-r--r--05/main.c3
-rw-r--r--05/parse.b173
-rw-r--r--05/tokenize.b6
-rw-r--r--05/util.b1
6 files changed, 196 insertions, 13 deletions
diff --git a/05/constants.b b/05/constants.b
index 6a0bba1..68481a1 100644
--- a/05/constants.b
+++ b/05/constants.b
@@ -30,6 +30,14 @@
; highest
; NB: for equal precedence, operators are applied left-to-right except for assignment operators (precedence 2)
+
+; TOKENS
+; tokens are 16 bytes and have the following format:
+; uchar type
+; uchar info
+; ushort file
+; uint line
+; ulong data -- for int/float literals, the value; for string literals, the runtime address; for identifiers, the name of the identifier
#define SYMBOL_COMMA 200
; NOTE: operator_right_associative and others require SYMBOL_EQ to be the first assignment operator
#define SYMBOL_EQ 201
@@ -79,6 +87,7 @@
#define SYMBOL_SEMICOLON 244
#define SYMBOL_DOT 245
+
#define TOKEN_IDENTIFIER 1
#define TOKEN_CONSTANT_FLOAT 2
#define TOKEN_CONSTANT_INT 3
@@ -738,6 +747,10 @@
:str_union
string union
byte 0
-:str_typedef ; currently only used for nice debug output
+; NB: some of these are only used for nice debug output
+:str_typedef
string typedef
byte 0
+:str_return
+ string return
+ byte 0
diff --git a/05/main.b b/05/main.b
index 381ad59..3f4fdf0 100644
--- a/05/main.b
+++ b/05/main.b
@@ -1,3 +1,10 @@
+; @TODO: if we have,
+; 1 extern int blah;
+; 2 ...
+; n int blah;
+; give `blah` an address on line 1, then ignore declaration on line n
+
+
; add 24 + 16 = 40 to the stack pointer to put argc, argv in the right place
byte 0x48
byte 0x81
@@ -52,6 +59,8 @@ global function_statements
global statement_datas
global statement_datas_ends
global parse_stmt_depth
+global expressions
+global expressions_end
#include util.b
#include idents.b
@@ -198,6 +207,8 @@ function main
*1file_list = 255
object_macros = malloc(4000000)
function_macros = malloc(4000000)
+ expressions = malloc(16000000)
+ expressions_end = expressions
types = malloc(16000000)
types_init(types, &types_bytes_used)
diff --git a/05/main.c b/05/main.c
index c5d1b72..3fed294 100644
--- a/05/main.c
+++ b/05/main.c
@@ -1,5 +1,6 @@
int f(void) {
-blah:blah:blah:;
+ lbl1:break;;;
+ continue;a:break;return;return 6+3<<sizeof(int);
}
diff --git a/05/parse.b b/05/parse.b
index 18765b8..3136017 100644
--- a/05/parse.b
+++ b/05/parse.b
@@ -173,13 +173,13 @@ function parse_tokens
global function_stmt_data ; initialized in main
global function_stmt_data_bytes_used
- n = function_stmt_data_bytes_used
- out = function_stmt_data + function_stmt_data_bytes_used
+ p = function_stmt_data + function_stmt_data_bytes_used
+ out = p
parse_statement(&token, &out)
if parse_stmt_depth != 0 goto stmtdepth_internal_err
function_stmt_data_bytes_used = out - function_stmt_data
-
- ident_list_add(function_statements, name, n)
+ ident_list_add(function_statements, name, p)
+ print_statement(p)
goto parse_tokens_loop
:stmtdepth_internal_err
@@ -304,6 +304,9 @@ function parse_statement
c = *1token
if c == SYMBOL_SEMICOLON goto stmt_empty
if c == SYMBOL_LBRACE goto stmt_block
+ if c == KEYWORD_BREAK goto stmt_break
+ if c == KEYWORD_CONTINUE goto stmt_continue
+ if c == KEYWORD_RETURN goto stmt_return
token_error(token, .str_unrecognized_statement)
:str_unrecognized_statement
@@ -313,6 +316,42 @@ function parse_statement
*8p_token = token
*8p_out = out
return
+ :stmt_break
+ token += 16
+ if *1token != SYMBOL_SEMICOLON goto break_no_semicolon
+ token += 16
+ write_statement_header(out, STATEMENT_BREAK, token)
+ out += 40
+ goto parse_statement_ret
+ :break_no_semicolon
+ token_error(token, .str_break_no_semicolon)
+ :str_break_no_semicolon
+ string No semicolon after break.
+ byte 0
+ :stmt_continue
+ token += 16
+ if *1token != SYMBOL_SEMICOLON goto continue_no_semicolon
+ token += 16
+ write_statement_header(out, STATEMENT_CONTINUE, token)
+ out += 40
+ goto parse_statement_ret
+ :continue_no_semicolon
+ token_error(token, .str_continue_no_semicolon)
+ :str_continue_no_semicolon
+ string No semicolon after continue.
+ byte 0
+ :stmt_return
+ write_statement_header(out, STATEMENT_RETURN, token)
+ out += 8
+ token += 16
+ if *1token == SYMBOL_SEMICOLON goto return_no_expr
+ n = token_next_semicolon_not_in_brackets(token)
+ *8out = expressions_end
+ expressions_end = parse_expression(token, n, expressions_end)
+ token = n + 16
+ :return_no_expr
+ out += 32
+ goto parse_statement_ret
:stmt_block
local block_p_out
; find the appropriate statement data to use for this block's body
@@ -356,6 +395,92 @@ function parse_statement
token += 16 ; skip semicolon
goto parse_statement_ret
+function print_statement
+ argument statement
+ print_statement_with_depth(statement, 0)
+ return
+
+function print_statement_with_depth
+ argument statement
+ argument depth
+ local c
+ local dat1
+ local dat2
+ local dat3
+ local dat4
+
+ c = depth
+ :print_stmt_indent_loop
+ if c == 0 goto print_stmt_indent_loop_end
+ putc(9) ; tab
+ c -= 1
+ goto print_stmt_indent_loop
+ :print_stmt_indent_loop_end
+
+ c = *1statement
+ dat1 = statement + 8
+ dat1 = *8dat1
+ dat2 = statement + 16
+ dat2 = *8dat2
+ dat3 = statement + 24
+ dat3 = *8dat3
+ dat4 = statement + 32
+ dat4 = *8dat4
+
+ if c == STATEMENT_LABEL goto print_stmt_label
+ if c == STATEMENT_BLOCK goto print_stmt_block
+ if c == STATEMENT_CONTINUE goto print_stmt_continue
+ if c == STATEMENT_BREAK goto print_stmt_break
+ if c == STATEMENT_RETURN goto print_stmt_return
+
+ die(.pristmtNI)
+ :pristmtNI
+ string print_statement not implemented.
+ byte 0
+ :str_semicolon_newline
+ byte 59
+ byte 10
+ byte 0
+ :print_stmt_label
+ puts(dat1)
+ putcln(':)
+ return
+ :print_stmt_break
+ puts(.str_stmt_break)
+ return
+ :str_stmt_break
+ string break
+ byte 59 ; semicolon
+ byte 10
+ byte 0
+ :print_stmt_continue
+ puts(.str_stmt_continue)
+ return
+ :str_stmt_continue
+ string continue
+ byte 59 ; semicolon
+ byte 10
+ byte 0
+ :print_stmt_return
+ puts(.str_return)
+ if dat1 == 0 goto print_ret_noexpr
+ putc(32)
+ print_expression(dat1)
+ :print_ret_noexpr
+ puts(.str_semicolon_newline)
+ return
+ :print_stmt_block
+ putcln('{)
+ depth += 1
+ :print_block_loop
+ if *1dat1 == 0 goto print_block_loop_end
+ print_statement_with_depth(dat1, depth)
+ dat1 += 40
+ goto print_block_loop
+ :print_block_loop_end
+ putcln('})
+ return
+
; parse a global variable's initializer
; e.g. int x[5] = {1+8, 2, 3, 4, 5};
; advances *p_token to the token right after the initializer
@@ -741,6 +866,43 @@ function token_reverse_to_matching_lparen
return
+; return the next semicolon not in parentheses, square brackets, or braces.
+function token_next_semicolon_not_in_brackets
+ argument token0
+
+ local token
+ local depth
+ local c
+
+ depth = 0
+ token = token0
+ :next_semicolon_loop
+ c = *1token
+ if c == TOKEN_EOF goto next_semicolon_eof
+ if depth != 0 goto next_semicolon_nocheck
+ if c == SYMBOL_SEMICOLON goto next_semicolon_loop_end
+ :next_semicolon_nocheck
+ token += 16
+ if c == SYMBOL_LPAREN goto next_semicolon_incdepth
+ if c == SYMBOL_RPAREN goto next_semicolon_decdepth
+ if c == SYMBOL_LSQUARE goto next_semicolon_incdepth
+ if c == SYMBOL_RSQUARE goto next_semicolon_decdepth
+ if c == SYMBOL_LBRACE goto next_semicolon_incdepth
+ if c == SYMBOL_RBRACE goto next_semicolon_decdepth
+ goto next_semicolon_loop
+ :next_semicolon_incdepth
+ depth += 1
+ goto next_semicolon_loop
+ :next_semicolon_decdepth
+ depth -= 1
+ goto next_semicolon_loop
+ :next_semicolon_loop_end
+ return token
+ :next_semicolon_eof
+ token_error(token0, .str_next_semicolon_eof)
+ :str_next_semicolon_eof
+ string End of file found while searching for semicolon.
+ byte 0
; we split types into base (B), prefix (P) and suffix (S)
; struct Thing (*things[5])(void), *something_else[3];
; BBBBBBBBBBBB PP SSSSSSSSSS P SSS
@@ -1426,7 +1588,8 @@ function type_create_pointer
p = id + 1
types_bytes_used += type_copy_ids(p, type)
return id
-
+
+; returns pointer to end of expression data
function parse_expression
argument tokens
argument tokens_end
diff --git a/05/tokenize.b b/05/tokenize.b
index 977b162..794232e 100644
--- a/05/tokenize.b
+++ b/05/tokenize.b
@@ -98,12 +98,6 @@ function get_keyword_str
; turn pptokens into tokens, written to out.
-; tokens are 16 bytes and have the following format:
-; uchar type
-; uchar info
-; ushort file
-; uint line
-; ulong data -- for int/float literals, the value; for string literals, the runtime address; for identifiers, the name of the identifier
; This corresponds to translation phases 5-6 and the first half of 7
; IMPORTANT: this function uses pointers to pptokens, so it should NOT be freed!
; Returns a pointer to the end of tokens.
diff --git a/05/util.b b/05/util.b
index 6efb8e7..ae04d7e 100644
--- a/05/util.b
+++ b/05/util.b
@@ -137,6 +137,7 @@ function file_error
function die
argument message
fputs(2, message)
+ fputc(2, 10)
exit(1)
function ftruncate