summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--05/main.c3
-rw-r--r--05/parse.b419
2 files changed, 230 insertions, 192 deletions
diff --git a/05/main.c b/05/main.c
index 17e960f..f706e88 100644
--- a/05/main.c
+++ b/05/main.c
@@ -4,8 +4,11 @@ int f(void) {
continue;a:break;return;return 6+3<<sizeof(int);
goto lbl1;
case 77:;return 92834;
+ static int x = 0x12345;
}
+/* typedef int AA[sizeof x]; */
+
/* typedef struct { */
/* int i[41]; */
diff --git a/05/parse.b b/05/parse.b
index caf4130..7274331 100644
--- a/05/parse.b
+++ b/05/parse.b
@@ -39,6 +39,24 @@ function structure_is_union
function parse_tokens
argument tokens
local token
+
+ token = tokens
+ :parse_tokens_loop
+ if *1token == TOKEN_EOF goto parse_tokens_eof
+ parse_toplevel_declaration(&token, global_variables)
+ goto parse_tokens_loop
+ :parse_tokens_eof
+ return
+
+
+; also handles static declarations inside functions
+; advances *p_token past semicolon
+; static_vars = where to put static variables
+function parse_toplevel_declaration
+ argument p_token
+ argument static_vars
+
+ local token
local ident
local type
local p
@@ -55,206 +73,221 @@ function parse_tokens
local is_extern
local out
- token = tokens
- :parse_tokens_loop
- is_extern = 0
- if *1token == TOKEN_EOF goto parse_tokens_eof
- if *1token == KEYWORD_STATIC goto parse_static_toplevel_decl
- if *1token == KEYWORD_EXTERN goto parse_extern_toplevel_decl
- if *1token == KEYWORD_TYPEDEF goto parse_typedef
+ token = *8p_token
+ is_extern = 0
+
+ if *1token == KEYWORD_STATIC goto parse_static_toplevel_decl
+ if *1token == KEYWORD_EXTERN goto parse_extern_toplevel_decl
+ if *1token == KEYWORD_TYPEDEF goto parse_typedef
+
+ b = token_is_type(token)
+ if b != 0 goto parse_toplevel_decl
+
+ token_error(token, .str_bad_decl)
+ :str_bad_decl
+ string Bad declaration.
+ byte 0
+ :parse_tld_ret
+ *8p_token = token
+ return
+ :parse_static_toplevel_decl
+ token += 16 ; we don't care that this is static
+ goto parse_toplevel_decl
+ :parse_extern_toplevel_decl
+ token += 16
+ is_extern = 1
+ goto parse_toplevel_decl
+ :parse_toplevel_decl
+ base_type = token
+ base_type_end = type_get_base_end(token)
+ token = base_type_end
+ :tl_decl_loop
+ prefix = token
+ prefix_end = type_get_prefix_end(prefix)
+ if *1prefix_end != TOKEN_IDENTIFIER goto tl_decl_no_ident
+ name = prefix_end + 8
+ name = *8name
+ suffix = prefix_end + 16
+ suffix_end = type_get_suffix_end(prefix)
+ type = types_bytes_used
+ parse_type_declarators(prefix, prefix_end, suffix, suffix_end)
+ parse_base_type(base_type, base_type_end)
+
+ ; ensure rwdata_end_addr is aligned to 8 bytes
+ ; otherwise addresses could be screwed up
+ rwdata_end_addr += 7
+ rwdata_end_addr >= 3
+ rwdata_end_addr <= 3
+
+ token = suffix_end
+ if *1token == SYMBOL_LBRACE goto parse_function_definition
+ if is_extern != 0 goto parse_tl_decl_cont ; ignore external variable declarations
+ ; deal with the initializer if there is one
+ if *1token == SYMBOL_SEMICOLON goto parse_tld_no_initializer
+ if *1token == SYMBOL_COMMA goto parse_tld_no_initializer
+ if *1token == SYMBOL_EQ goto parse_tld_initializer
+ token_error(token, .str_unrecognized_stuff_after_declaration)
+ :str_unrecognized_stuff_after_declaration
+ string Declaration should be followed by one of: { , =
+ byte 32
+ byte 59 ; semicolon
+ byte 0
+ :parse_tl_decl_cont
+
+
+ if *1token == SYMBOL_SEMICOLON goto tl_decl_loop_done
+ if *1token != SYMBOL_COMMA goto tld_bad_stuff_after_decl
+ token += 16
+ goto tl_decl_loop
+ :tl_decl_loop_done
+ token += 16 ; skip semicolon
+ goto parse_tld_ret
+
+ :tl_decl_no_ident
+ token_error(prefix_end, .str_tl_decl_no_ident)
+ :str_tl_decl_no_ident
+ string No identifier in top-level declaration.
+ byte 0
+ :tld_bad_stuff_after_decl
+ token_error(token, .str_tld_bad_stuff_after_decl)
+ :str_tld_bad_stuff_after_decl
+ string Declarations should be immediately followed by a comma or semicolon.
+ byte 0
+ :parse_tld_no_initializer
+ p = types + type
+ if *1p == TYPE_FUNCTION goto parse_tl_decl_cont ; ignore function declarations -- we do two passes anyways
+ b = ident_list_lookup(static_vars, name)
+ if b != 0 goto global_redefinition
+ c = type < 32
+ c |= rwdata_end_addr
+ ident_list_add(static_vars, name, c)
+ ; just skip forward by the size of this variable -- it'll automatically be filled with 0s.
+ rwdata_end_addr += type_sizeof(type)
+ goto parse_tl_decl_cont
+ :parse_tld_initializer
+ p = types + type
+ if *1p == TYPE_FUNCTION goto function_initializer
+ b = ident_list_lookup(static_vars, name)
+ if b != 0 goto global_redefinition
+ token += 16 ; skip =
+ c = type < 32
+ c |= rwdata_end_addr
+ ident_list_add(static_vars, name, c)
+ parse_constant_initializer(&token, type)
+ goto parse_tl_decl_cont
+ :global_redefinition
+ token_error(token, .str_global_redefinition)
+ :str_global_redefinition
+ string Redefinition of global variable.
+ byte 0
+ :function_initializer
+ token_error(token, .str_function_initializer)
+ :str_function_initializer
+ string Functions should not have initializers.
+ byte 0
+ :parse_function_definition
+ if block_depth != 0 goto nested_function
+ p = types + type
+ ; @TODO: parameters
+ ; @NOTE: remember to turn array members into pointers
+ if *1p != TYPE_FUNCTION goto lbrace_after_declaration
+
+ global function_stmt_data ; initialized in main
+ global function_stmt_data_bytes_used
- b = token_is_type(token)
- if b != 0 goto parse_toplevel_decl
+ p = function_stmt_data + function_stmt_data_bytes_used
+ out = p
+ parse_statement(&token, &out)
+ if block_depth != 0 goto stmtdepth_internal_err
+ function_stmt_data_bytes_used = out - function_stmt_data
+ ident_list_add(function_statements, name, p)
+ print_statement(p)
+ goto parse_tld_ret
- token_error(token, .str_bad_statement)
- :str_bad_statement
- string Bad statement.
+ :stmtdepth_internal_err
+ token_error(token, .str_stmtdepth_internal_err)
+ :str_stmtdepth_internal_err
+ string Internal compiler error: parse_stmt_depth is not 0 after parsing function body.
byte 0
- :parse_static_toplevel_decl
- token += 16 ; we don't care that this is static
- goto parse_toplevel_decl
- :parse_extern_toplevel_decl
- token += 16
- is_extern = 1
- goto parse_toplevel_decl
- :parse_toplevel_decl
- base_type = token
- base_type_end = type_get_base_end(token)
- token = base_type_end
- :tl_decl_loop
- prefix = token
- prefix_end = type_get_prefix_end(prefix)
- if *1prefix_end != TOKEN_IDENTIFIER goto tl_decl_no_ident
- name = prefix_end + 8
- name = *8name
- suffix = prefix_end + 16
- suffix_end = type_get_suffix_end(prefix)
- type = types_bytes_used
- parse_type_declarators(prefix, prefix_end, suffix, suffix_end)
- parse_base_type(base_type, base_type_end)
-
- ; ensure rwdata_end_addr is aligned to 8 bytes
- ; otherwise addresses could be screwed up
- rwdata_end_addr += 7
- rwdata_end_addr >= 3
- rwdata_end_addr <= 3
-
- token = suffix_end
- if *1token == SYMBOL_LBRACE goto parse_function_definition
- if is_extern != 0 goto parse_tl_decl_cont ; ignore external variable declarations
- ; deal with the initializer if there is one
- if *1token == SYMBOL_SEMICOLON goto parse_tld_no_initializer
- if *1token == SYMBOL_COMMA goto parse_tld_no_initializer
- if *1token == SYMBOL_EQ goto parse_tld_initializer
- token_error(token, .str_unrecognized_stuff_after_declaration)
- :str_unrecognized_stuff_after_declaration
- string Declaration should be followed by one of: { , =
- byte 32
- byte 59 ; semicolon
- byte 0
- :parse_tl_decl_cont
-
-
- if *1token == SYMBOL_SEMICOLON goto tl_decl_loop_done
- if *1token != SYMBOL_COMMA goto tld_bad_stuff_after_decl
- token += 16
- goto tl_decl_loop
- :tl_decl_loop_done
- token += 16 ; skip semicolon
- goto parse_tokens_loop
-
- :tl_decl_no_ident
- token_error(prefix_end, .str_tl_decl_no_ident)
- :str_tl_decl_no_ident
- string No identifier in top-level declaration.
- byte 0
- :tld_bad_stuff_after_decl
- token_error(token, .str_tld_bad_stuff_after_decl)
- :str_tld_bad_stuff_after_decl
- string Declarations should be immediately followed by a comma or semicolon.
- byte 0
- :parse_tld_no_initializer
- p = types + type
- if *1p == TYPE_FUNCTION goto parse_tl_decl_cont ; ignore function declarations -- we do two passes anyways
- b = ident_list_lookup(global_variables, name)
- if b != 0 goto global_redefinition
- c = type < 32
- c |= rwdata_end_addr
- ident_list_add(global_variables, name, c)
- ; just skip forward by the size of this variable -- it'll automatically be filled with 0s.
- rwdata_end_addr += type_sizeof(type)
- goto parse_tl_decl_cont
- :parse_tld_initializer
- if *1p == TYPE_FUNCTION goto function_initializer
- b = ident_list_lookup(global_variables, name)
- if b != 0 goto global_redefinition
- token += 16 ; skip =
- c = type < 32
- c |= rwdata_end_addr
- ident_list_add(global_variables, name, c)
- parse_constant_initializer(&token, type)
- goto parse_tl_decl_cont
- :global_redefinition
- token_error(token, .str_global_redefinition)
- :str_global_redefinition
- string Redefinition of global variable.
+ :lbrace_after_declaration
+ token_error(token, .str_lbrace_after_declaration)
+ :str_lbrace_after_declaration
+ string Opening { after declaration of non-function.
byte 0
- :function_initializer
- token_error(token, .str_function_initializer)
- :str_function_initializer
- string Functions should not have initializers.
+ :nested_function
+ token_error(token, .str_nested_function)
+ :str_nested_function
+ string Nested function.
byte 0
- :parse_function_definition
- p = types + type
- ; @TODO: parameters
- ; @NOTE: remember to turn array members into pointers
- if *1p != TYPE_FUNCTION goto lbrace_after_declaration
+ :parse_typedef
+ if block_depth > 0 goto local_typedef
+ base_type = token + 16
+ base_type_end = type_get_base_end(base_type)
+
+ token = base_type_end
+
+ :typedef_loop
+ prefix = token
+ prefix_end = type_get_prefix_end(prefix)
+ if *1prefix_end != TOKEN_IDENTIFIER goto typedef_no_ident
+ ident = prefix_end + 8
+ ident = *8ident
+ suffix = prefix_end + 16
+ suffix_end = type_get_suffix_end(prefix)
- global function_stmt_data ; initialized in main
- global function_stmt_data_bytes_used
+ ;putc('B)
+ ;putc(':)
+ ;print_tokens(base_type, base_type_end)
+ ;putc('P)
+ ;putc(':)
+ ;print_tokens(prefix, prefix_end)
+ ;putc('S)
+ ;putc(':)
+ ;print_tokens(suffix, suffix_end)
- p = function_stmt_data + function_stmt_data_bytes_used
- out = p
- parse_statement(&token, &out)
- if block_depth != 0 goto stmtdepth_internal_err
- function_stmt_data_bytes_used = out - function_stmt_data
- ident_list_add(function_statements, name, p)
- print_statement(p)
- goto parse_tokens_loop
+ type = types_bytes_used
+ parse_type_declarators(prefix, prefix_end, suffix, suffix_end)
+ parse_base_type(base_type)
- :stmtdepth_internal_err
- token_error(token, .str_stmtdepth_internal_err)
- :str_stmtdepth_internal_err
- string Internal compiler error: parse_stmt_depth is not 0 after parsing function body.
- byte 0
- :lbrace_after_declaration
- token_error(token, .str_lbrace_after_declaration)
- :str_lbrace_after_declaration
- string Opening { after declaration of non-function.
- byte 0
- :parse_typedef
- base_type = token + 16
- base_type_end = type_get_base_end(base_type)
+ puts(.str_typedef)
+ putc(32)
+ print_type(type)
+ putc(10)
- token = base_type_end
+ b = ident_list_lookup(typedefs, ident)
+ if b != 0 goto typedef_redefinition
- :typedef_loop
- prefix = token
- prefix_end = type_get_prefix_end(prefix)
- if *1prefix_end != TOKEN_IDENTIFIER goto typedef_no_ident
- ident = prefix_end + 8
- ident = *8ident
- suffix = prefix_end + 16
- suffix_end = type_get_suffix_end(prefix)
-
- ;putc('B)
- ;putc(':)
- ;print_tokens(base_type, base_type_end)
- ;putc('P)
- ;putc(':)
- ;print_tokens(prefix, prefix_end)
- ;putc('S)
- ;putc(':)
- ;print_tokens(suffix, suffix_end)
-
- type = types_bytes_used
- parse_type_declarators(prefix, prefix_end, suffix, suffix_end)
- parse_base_type(base_type)
-
- puts(.str_typedef)
- putc(32)
- print_type(type)
- putc(10)
-
- b = ident_list_lookup(typedefs, ident)
- if b != 0 goto typedef_redefinition
-
- ident_list_add(typedefs, ident, type)
- token = suffix_end
- if *1token == SYMBOL_SEMICOLON goto typedef_loop_end
- if *1token != SYMBOL_COMMA goto bad_typedef
- token += 16 ; skip comma
- goto typedef_loop
- :typedef_loop_end
- token += 16 ; skip semicolon
- goto parse_tokens_loop
- :typedef_no_ident
- token_error(token, .str_typedef_no_ident)
- :str_typedef_no_ident
- string No identifier in typedef declaration.
- byte 0
- :bad_typedef
- token_error(token, .str_bad_typedef)
- :str_bad_typedef
- string Bad typedef.
- byte 0
- :typedef_redefinition
- token_error(token, .str_typedef_redefinition)
- :str_typedef_redefinition
- string typedef redefinition.
- byte 0
- :parse_tokens_eof
- return
+ ident_list_add(typedefs, ident, type)
+ token = suffix_end
+ if *1token == SYMBOL_SEMICOLON goto typedef_loop_end
+ if *1token != SYMBOL_COMMA goto bad_typedef
+ token += 16 ; skip comma
+ goto typedef_loop
+ :typedef_loop_end
+ token += 16 ; skip semicolon
+ goto parse_tld_ret
+ :local_typedef
+ ; @NONSTANDARD
+ ; we could add an extra "typedefs" argument to this function to fix this.
+ token_error(token, .str_local_typedef)
+ :str_local_typedef
+ string typedefs inside functions are not supported.
+ byte 0
+ :typedef_no_ident
+ token_error(token, .str_typedef_no_ident)
+ :str_typedef_no_ident
+ string No identifier in typedef declaration.
+ byte 0
+ :bad_typedef
+ token_error(token, .str_bad_typedef)
+ :str_bad_typedef
+ string Bad typedef.
+ byte 0
+ :typedef_redefinition
+ token_error(token, .str_typedef_redefinition)
+ :str_typedef_redefinition
+ string typedef redefinition.
+ byte 0
; write type, file, and line info for statement
function write_statement_header
@@ -281,7 +314,6 @@ function parse_statement
local c
local n
-
out = *8p_out
token = *8p_token
@@ -321,7 +353,10 @@ function parse_statement
*8p_out = out
return
:stmt_static_declaration
- byte 0xcc ; @TODO
+ p = block_static_variables
+ p += block_depth < 3
+ parse_toplevel_declaration(&token, *8p)
+ goto parse_statement_ret
:stmt_break
write_statement_header(out, STATEMENT_BREAK, token)
token += 16