summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorpommicket <pommicket@gmail.com>2022-02-06 18:24:52 -0500
committerpommicket <pommicket@gmail.com>2022-02-06 18:24:52 -0500
commitb404c21bfc5b8d7d2317da89577633c778233ec4 (patch)
treed31458da268735398cab935a1bc9fe01e09ab86a
parentdd035eb2247201e329d383158c2e330d56fc7388 (diff)
function parameters, fix local variables
-rw-r--r--05/constants.b2
-rw-r--r--05/main.b11
-rw-r--r--05/main.c49
-rw-r--r--05/parse.b188
4 files changed, 155 insertions, 95 deletions
diff --git a/05/constants.b b/05/constants.b
index 730aff0..e454942 100644
--- a/05/constants.b
+++ b/05/constants.b
@@ -262,7 +262,7 @@
; a type of 0 indicates the end of the block.
; data layout for particular statements:
; - STATEMENT_EXPRESSION - data1 is a pointer to expression data; data2,3,4 are unused
-; - STATEMENT_LOCAL_DECLARATION - declaring a local variable, data1 = rbp offset, data2 = size of type, data3 = initializer expression or 0, data4 = initializer memory address to copy from (for braced initializers) or 0
+; - STATEMENT_LOCAL_DECLARATION - declaring a local variable, data1 = rbp offset, data2 = size of type (ignored if dat4 is 0), data3 = initializer expression or 0, data4 = initializer memory address to copy from (for braced initializers) or 0
; - STATEMENT_LABEL - data1 is a pointer to the name of the label; data2,3,4 are unused
; - STATEMENT_BLOCK - data1 is a pointer to an array of statements; data2,3,4 are unused
; - STATEMENT_IF - data1 is a pointer to the condition, data2 is a pointer to the `if' branch statement, data3 is a pointer to the `else' branch statement, or 0 if there is none; data4 is unused
diff --git a/05/main.b b/05/main.b
index 232dbca..64585d0 100644
--- a/05/main.b
+++ b/05/main.b
@@ -49,7 +49,9 @@ global output_file_data
; ident list of global variables. each one is stored as
; (type << 32) | address
global global_variables
-; ident list of functions. each entry is a pointer to a single statement - which should always be a STATEMENT_BLOCK
+; ident list of functions. each entry is a pointer two statements
+; - the first one is a STATEMENT_LOCAL_DECLARATION (with dat2=dat3=dat4=0), which is only there to set rsp properly because of parameters
+; - the second one is the function body (a STATEMENT_BLOCK)
global function_statements
; statement_datas[0] = pointer to statement data for block-nesting depth 0 (i.e. function bodies)
; statement_datas[1] = pointer to statement data for block-nesting depth 1 (blocks inside functions)
@@ -68,9 +70,9 @@ global local_variables
global block_depth
global expressions
global expressions_end
-; where to put the next local variable
-global local_var_next_rbp_offset
-
+; current rbp offset (where rsp is)
+global local_var_rbp_offset
+global function_param_names
#include util.b
#include idents.b
@@ -193,6 +195,7 @@ function main
statement_datas_ends = memory + 400
block_static_variables = memory + 800
local_variables = memory + 1200
+ function_param_names = memory + 1600
p = statement_datas
q = statement_datas_ends
diff --git a/05/main.c b/05/main.c
index 5ed76a3..37b2279 100644
--- a/05/main.c
+++ b/05/main.c
@@ -1,49 +1,10 @@
static int g;
-int f(void) {
- while (g+10 == 17){
- while (g+100==1234){
- }
- }
- do {
- break;
- continue;
- }while(0);
- if (0) {
- if(1) {
- return g;
- }else;
- } else if (5 == 6){
- return 6 * g;
- }
- lbl1:break;;goto blah;
- case -1-3:
- continue;a:break;return;return 6+3<<sizeof(int);
- goto lbl1;
- case 77:;return 92834;
- return g;
- static int x = 0x12345;
- return x;
- switch(g<<17){
- case 88: break;
- case 99: int x = 3;
- }
- int i;
- for(i=0;i < 100; i ++);
- for(;;);
-}
-
-int h(void) {
- int y, r[3], s;
- char d, e[5], f, g, *p;
- int z = 3, R=12+459834-g;
- int x[] = {1,2,3,4,5};
- struct {
- char a,b;
- } P[] = {1,2,3,4,5};
- int *Y = x;
- static int marker = 0x12345678;
- return 5;
+int f(int x, int y[3]) {
+ int z = 17 +x;
+ int g[]={1,2,3,4,5};
+ int h;
+ return *(g + x + z);
}
/* typedef int AA[sizeof x]; */
diff --git a/05/parse.b b/05/parse.b
index ccf1c4f..091be9c 100644
--- a/05/parse.b
+++ b/05/parse.b
@@ -59,6 +59,7 @@ function parse_toplevel_declaration
local token
local ident
local type
+ local list
local p
local b
local c
@@ -72,6 +73,7 @@ function parse_toplevel_declaration
local suffix_end
local is_extern
local out
+ local out0
token = *8p_token
is_extern = 0
@@ -110,7 +112,8 @@ function parse_toplevel_declaration
suffix = prefix_end + 16
suffix_end = type_get_suffix_end(prefix)
type = types_bytes_used
- parse_type_declarators(prefix, prefix_end, suffix, suffix_end)
+ *1function_param_names = 0
+ parse_type_declarators(prefix, prefix_end, suffix, suffix_end, function_param_names)
parse_base_type(base_type, base_type_end)
; ensure rwdata_end_addr is aligned to 8 bytes
@@ -188,22 +191,49 @@ function parse_toplevel_declaration
:parse_function_definition
if block_depth != 0 goto nested_function
p = types + type
- ; @TODO: parameters
- ; @NOTE: remember to turn array members into pointers
if *1p != TYPE_FUNCTION goto lbrace_after_declaration
global function_stmt_data ; initialized in main
global function_stmt_data_bytes_used
- local_var_next_rbp_offset = 0
+ local_var_rbp_offset = 0
+
+ out = function_stmt_data + function_stmt_data_bytes_used
+ out0 = out
+ ident_list_add(function_statements, name, out)
+
+ ; deal with function parameters
+ p = type + 1
+ name = function_param_names
+ list = local_variables
+ list += 8
+ list = *8list
+ :fn_params_loop
+ if *1name == 0 goto fn_params_loop_end
+ local_var_rbp_offset += type_sizeof(p)
+ local_var_rbp_offset += 7
+ local_var_rbp_offset >= 3
+ local_var_rbp_offset <= 3
+ c = p < 32
+ c |= local_var_rbp_offset
+ ident_list_add(list, name, c)
+ p += type_length(p)
+ name = memchr(name, 0)
+ name += 1
+ goto fn_params_loop
+ :fn_params_loop_end
+
+ write_statement_header(out, STATEMENT_LOCAL_DECLARATION, token)
+ out += 8
+ *8out = local_var_rbp_offset
+ out += 32
- p = function_stmt_data + function_stmt_data_bytes_used
- out = p
parse_statement(&token, &out)
if block_depth != 0 goto stmtdepth_internal_err
function_stmt_data_bytes_used = out - function_stmt_data
- ident_list_add(function_statements, name, p)
- print_statement(p)
+ print_statement(out0)
+ out0 += 40
+ print_statement(out0)
goto parse_tld_ret
:stmtdepth_internal_err
@@ -248,7 +278,7 @@ function parse_toplevel_declaration
;print_tokens(suffix, suffix_end)
type = types_bytes_used
- parse_type_declarators(prefix, prefix_end, suffix, suffix_end)
+ parse_type_declarators(prefix, prefix_end, suffix, suffix_end, 0)
parse_base_type(base_type)
puts(.str_typedef)
@@ -357,10 +387,16 @@ function parse_statement
b = token_is_type(token)
if b != 0 goto stmt_local_declaration
- token_error(token, .str_unrecognized_statement)
- :str_unrecognized_statement
- string Unrecognized statement.
- byte 0
+ ; it's an expression statement
+ write_statement_header(out, STATEMENT_EXPRESSION, token)
+ out += 8
+ p = token_next_semicolon_not_in_brackets(token)
+ *8out = expressions_end
+ expressions_end = parse_expression(token, p, expressions_end)
+ out += 32
+ token = p + 16
+ goto parse_statement_ret
+
:parse_statement_ret
*8p_token = token
*8p_out = out
@@ -577,38 +613,38 @@ function parse_statement
l_suffix = l_prefix_end + 16
l_suffix_end = type_get_suffix_end(l_prefix)
l_type = types_bytes_used
- parse_type_declarators(l_prefix, l_prefix_end, l_suffix, l_suffix_end)
+ parse_type_declarators(l_prefix, l_prefix_end, l_suffix, l_suffix_end, 0)
parse_base_type(l_base_type)
+
+
+ token = l_suffix_end
+ if *1token == SYMBOL_EQ goto local_decl_initializer
+ :local_decl_continue
+ ; we need to calculate the size of the type here, because of stuff like
+ ; int x[] = {1,2,3};
+ n = type_sizeof(l_type)
+ ; advance
+ local_var_rbp_offset += n
+ ; align
+ local_var_rbp_offset += 7
+ local_var_rbp_offset >= 3
+ local_var_rbp_offset <= 3
write_statement_header(out, STATEMENT_LOCAL_DECLARATION, token)
out += 8
- *8out = local_var_next_rbp_offset
- out += 32
+ *8out = local_var_rbp_offset
+ out += 8
+ *8out = n
+ out += 24
p = local_variables
p += block_depth < 3
- l_offset = local_var_next_rbp_offset
+ l_offset = local_var_rbp_offset
c = ident_list_lookup(*8p, l_name)
if c != 0 goto local_redeclaration
c = l_offset
c |= l_type < 32
ident_list_add(*8p, l_name, c)
-
- token = l_suffix_end
- :local_decl_continue
- ; we need to calculate the size of the type here, because of stuff like
- ; int x[] = {1,2,3};
- n = type_sizeof(l_type)
- out -= 24
- *8out = n
- out += 24
- ; advance
- local_var_next_rbp_offset += type_sizeof(l_type)
- ; align
- local_var_next_rbp_offset += 7
- local_var_next_rbp_offset >= 3
- local_var_next_rbp_offset <= 3
if *1token == SYMBOL_SEMICOLON goto local_decl_loop_end
- if *1token == SYMBOL_EQ goto local_decl_initializer
if *1token != SYMBOL_COMMA goto local_decl_badsuffix
token += 16 ; skip comma
@@ -618,10 +654,10 @@ function parse_statement
token += 16
if *1token == SYMBOL_LBRACE goto local_init_lbrace
n = token_next_semicolon_or_comma_not_in_brackets(token)
- out -= 16
+ out += 16
p = expressions_end
*8out = p
- out += 16
+ out -= 16
expressions_end = parse_expression(token, n, p)
p += 4
type_decay_array_to_pointer(*4p) ; fix typing for `int[] x = {5,6}; int *y = x;`
@@ -631,9 +667,9 @@ function parse_statement
rwdata_end_addr += 7
rwdata_end_addr >= 3
rwdata_end_addr <= 3
- out -= 8
+ out += 32
*8out = rwdata_end_addr
- out += 8
+ out -= 32
parse_constant_initializer(&token, l_type)
goto local_decl_continue
:local_decl_badsuffix
@@ -841,6 +877,7 @@ function print_statement_with_depth
dat4 = *8dat4
if c == STATEMENT_NOOP goto print_stmt_noop
+ if c == STATEMENT_EXPRESSION goto print_stmt_expr
if c == STATEMENT_BLOCK goto print_stmt_block
if c == STATEMENT_CONTINUE goto print_stmt_continue
if c == STATEMENT_BREAK goto print_stmt_break
@@ -855,9 +892,9 @@ function print_statement_with_depth
if c == STATEMENT_FOR goto print_stmt_for
if c == STATEMENT_LOCAL_DECLARATION goto print_stmt_local_decl
- die(.pristmtNI)
- :pristmtNI
- string print_statement not implemented.
+ die(.str_bad_print_stmt)
+ :str_bad_print_stmt
+ string Bad statement passed to print_statement.
byte 0
:str_semicolon_newline
byte 59
@@ -867,6 +904,10 @@ function print_statement_with_depth
puts(dat1)
putcln(':)
return
+ :print_stmt_expr
+ print_expression(dat1)
+ putcln(59)
+ return
:print_stmt_noop
putcln(59)
return
@@ -1638,12 +1679,17 @@ function parse_type_declarators
argument prefix_end
argument suffix
argument suffix_end
+ ; function parameters will be stored here (null-byte separated) if not 0
+ ; make sure you check that it's actually a function type before reading this
+ argument parameters
local p
local expr
local n
local c
local depth
local out
+ local param_names_out
+ param_names_out = parameters
; main loop for parsing types
:type_declarators_loop
@@ -1656,12 +1702,14 @@ function parse_type_declarators
goto parse_typedecls_bad_type
:parse_pointer_type
+ param_names_out = 0
out = types + types_bytes_used
*1out = TYPE_POINTER
types_bytes_used += 1
prefix_end = p
goto type_declarators_loop
:parse_array_type
+ param_names_out = 0
out = types + types_bytes_used
*1out = TYPE_ARRAY
types_bytes_used += 1
@@ -1709,11 +1757,27 @@ function parse_type_declarators
local param_prefix_end
local param_suffix
local param_suffix_end
+ local d
- p = suffix + 16
out = types + types_bytes_used
*1out = TYPE_FUNCTION
types_bytes_used += 1
+
+ p = suffix + 16
+ if *1p != KEYWORD_VOID goto ftype_has_parameters
+ if *1p == SYMBOL_RPAREN goto ftype_no_parameters ; e.g. int f() { return 17; }
+ n = p + 16
+ if *1n != SYMBOL_RPAREN goto ftype_has_parameters
+ :ftype_no_parameters
+ ; special handling of function type with no parameters
+ out = types + types_bytes_used
+ *1out = 0
+ types_bytes_used += 1
+ suffix += 48
+ goto type_declarators_loop
+
+
+ :ftype_has_parameters
:function_type_loop
param_base_type = p
param_prefix = type_get_base_end(param_base_type)
@@ -1721,22 +1785,54 @@ function parse_type_declarators
param_suffix = param_prefix_end
if *1param_suffix != TOKEN_IDENTIFIER goto functype_no_ident
param_suffix += 16
+ if param_names_out == 0 goto functype_had_ident
+ param_suffix -= 8
+ param_names_out = strcpy(param_names_out, *8param_suffix)
+ param_names_out += 1
+ param_suffix += 8
+ goto functype_had_ident
:functype_no_ident
+ if param_names_out != 0 goto no_param_name
+ :functype_had_ident
param_suffix_end = type_get_suffix_end(param_prefix)
- parse_type_declarators(param_prefix, param_prefix_end, param_suffix, param_suffix_end)
+ c = types + types_bytes_used
+ parse_type_declarators(param_prefix, param_prefix_end, param_suffix, param_suffix_end, 0)
parse_base_type(param_base_type)
+ if *1c != TYPE_ARRAY goto function_param_not_array
+ ; decay array into pointer
+ *1c = TYPE_POINTER
+ c += 1
+ c -= types
+ d = c + 8
+ type_copy_ids(c, d)
+ types_bytes_used -= 8
+ :function_param_not_array
p = param_suffix_end
if *1p == SYMBOL_RPAREN goto function_type_loop_end
if *1p != SYMBOL_COMMA goto parse_typedecls_bad_type
p += 16
goto function_type_loop
:function_type_loop_end
+ if param_names_out == 0 goto ftype_skip_zpno
+ *1param_names_out = 0
+ ; prevent lower-level parts of type from writing parameters
+ param_names_out = 0
+ :ftype_skip_zpno
+
out = types + types_bytes_used
*1out = 0
types_bytes_used += 1
suffix = p + 16
goto type_declarators_loop
+ :no_param_name
+ token_error(param_suffix, .str_no_param_name)
+ :str_no_param_name
+ string Function parameter has no name.
+ byte 0
:parse_type_remove_parentheses
+ ; interestingly:
+ ; int (f(int x)) { return x * 2; }
+ ; seems perfectly legal.
if *1p != SYMBOL_LPAREN goto parse_typedecls_bad_type
prefix_end = p
suffix += 16
@@ -1966,7 +2062,7 @@ function parse_base_type
member_type = types_bytes_used
- parse_type_declarators(member_prefix, member_prefix_end, member_suffix, member_suffix_end)
+ parse_type_declarators(member_prefix, member_prefix_end, member_suffix, member_suffix_end, 0)
parse_base_type(member_base_type)
; make sure struct member is aligned
@@ -2487,7 +2583,7 @@ function parse_expression
a = types_bytes_used
- parse_type_declarators(cast_prefix, cast_suffix, cast_suffix, cast_suffix_end)
+ parse_type_declarators(cast_prefix, cast_suffix, cast_suffix, cast_suffix_end, 0)
parse_base_type(cast_base_type)
p = cast_suffix_end
@@ -2565,7 +2661,7 @@ function parse_expression
sizeof_suffix_end = type_get_suffix_end(sizeof_prefix)
p = sizeof_suffix_end
a = types_bytes_used
- parse_type_declarators(sizeof_prefix, sizeof_suffix, sizeof_suffix, sizeof_suffix_end)
+ parse_type_declarators(sizeof_prefix, sizeof_suffix, sizeof_suffix, sizeof_suffix_end, 0)
parse_base_type(sizeof_base_type)
if *1p != SYMBOL_RPAREN goto bad_expression ; e.g. sizeof(int ,)
p += 16
@@ -3734,7 +3830,7 @@ function print_expression
expression += 8
return expression
:str_local_prefix
- string [rsp-
+ string [rbp-
byte 0
:print_global_variable
puts(.str_global_at)