From 217794afce69f6e17a78c10553d2357860cfe6e1 Mon Sep 17 00:00:00 2001 From: pommicket Date: Fri, 28 Jan 2022 15:07:23 -0500 Subject: start top level decls - also oops forgot about sizeless arrays --- 05/constants.b | 1 + 05/idents.b | 33 ++++++++++++++++- 05/main.b | 8 +++++ 05/main.c | 45 ++++++++++++++--------- 05/parse.b | 111 +++++++++++++++++++++++++++++++++++++++++++++++++++++---- 5 files changed, 173 insertions(+), 25 deletions(-) (limited to '05') diff --git a/05/constants.b b/05/constants.b index d0f2060..01e5dd2 100644 --- a/05/constants.b +++ b/05/constants.b @@ -6,6 +6,7 @@ ; note that file offsets and runtime addresses are the same. ; you should be able to change these constants without breaking anything: #define RODATA_ADDR 0x800000 +#define RWDATA_ADDR 0xc00000 #define RWDATA_END 0x1000000 #define EXECUTABLE_SIZE 0x1000000 diff --git a/05/idents.b b/05/idents.b index ef859bc..415059c 100644 --- a/05/idents.b +++ b/05/idents.b @@ -111,8 +111,39 @@ function ident_list_print putc(32) list = memchr(list, 0) list += 1 - putn(*8list) + putnln(*8list) list += 8 goto ilist_print_loop :ilist_print_loop_end return + +function ident_list_printx64 + argument list + :ilist_printx64_loop + if *1list == 0 goto ilist_printx64_loop_end + puts(list) + putc(':) + putc(32) + list = memchr(list, 0) + list += 1 + putx64ln(*8list) + list += 8 + goto ilist_printx64_loop + :ilist_printx64_loop_end + return + +function ident_list_printx32 + argument list + :ilist_printx32_loop + if *1list == 0 goto ilist_printx32_loop_end + puts(list) + putc(':) + putc(32) + list = memchr(list, 0) + list += 1 + putx32ln(*8list) + list += 8 + goto ilist_printx32_loop + :ilist_printx32_loop_end + return + diff --git a/05/main.b b/05/main.b index d988ee3..6d14ed1 100644 --- a/05/main.b +++ b/05/main.b @@ -36,7 +36,11 @@ global structures global structures_bytes_used ; file offset/runtime address to write next piece of read-only data; initialized in main global rodata_end_addr +; file offset/runtime address to write next piece of read-write data; initialized in main +global rwdata_end_addr global output_file_data +; ident list of addresses of global variables. +global global_variables #include util.b #include idents.b @@ -157,6 +161,7 @@ function main typedefs = ident_list_create(100000) enumerators = ident_list_create(4000000) structures = ident_list_create(4000000) + global_variables = ident_list_create(4000000) dat_banned_objmacros = 255 dat_banned_fmacros = 255 @@ -178,6 +183,7 @@ function main :have_filenames output_fd = open_rw(output_filename, 493) rodata_end_addr = RODATA_ADDR + rwdata_end_addr = RWDATA_ADDR ftruncate(output_fd, RWDATA_END) output_file_data = mmap(0, RWDATA_END, PROT_READ_WRITE, MAP_SHARED, output_fd, 0) @@ -206,6 +212,8 @@ function main munmap(output_file_data, RWDATA_END) close(output_fd) + ident_list_printx32(global_variables) + exit(0) :mmap_output_fd_failed diff --git a/05/main.c b/05/main.c index 3b094ad..5d4b282 100644 --- a/05/main.c +++ b/05/main.c @@ -7,22 +7,33 @@ /* R,S,T */ /* } *Foo[sizeof(unsigned long)]; */ /* typedef int A[T]; */ +/* */ +/* typedef struct A { */ +/* int x, y; */ +/* long double c; */ +/* unsigned long d; */ +/* char e[3]; */ +/* long f; */ +/* } A; */ +/* */ +/* typedef union B{ */ +/* int x; */ +/* struct { */ +/* int y; */ +/* struct {long z; } c; */ +/* } c; */ +/* }B; */ +/* */ +/* typedef int x[sizeof(A)+sizeof"hello"]; */ +/* typedef int y[sizeof(struct B)]; */ -typedef struct A { - int x, y; - long double c; - unsigned long d; - char e[3]; - long f; -} A; +static unsigned int x; +static unsigned int y; +static unsigned int z[1000]; +static unsigned int w; -typedef union B{ - int x; - struct { - int y; - struct {long z; } c; - } c; -}B; - -typedef int x[sizeof(A)+sizeof"hello"]; -typedef int y[sizeof(struct B)]; +/* +NOTE: THIS MUST WORK +int x[] = {1,2,3} +sizeof x +*/ diff --git a/05/parse.b b/05/parse.b index 48a5256..bc14b58 100644 --- a/05/parse.b +++ b/05/parse.b @@ -33,20 +33,105 @@ function parse_tokens local b local base_type local base_type_end + local name local prefix local prefix_end local suffix local suffix_end + local is_extern token = tokens :parse_tokens_loop + is_extern = 0 if *1token == TOKEN_EOF goto parse_tokens_eof + if *1token == KEYWORD_STATIC goto parse_static_toplevel_decl + if *1token == KEYWORD_EXTERN goto parse_extern_toplevel_decl if *1token == KEYWORD_TYPEDEF goto parse_typedef - die(.str_parse_tokens_ni) - :str_parse_tokens_ni - string parse_tokens not implemented. + b = token_is_type(token) + if b != 0 goto parse_toplevel_decl + + die(.str_bad_statement) + :str_bad_statement + string Bad statement. byte 0 + :parse_static_toplevel_decl + token += 16 ; we don't care that this is static + goto parse_toplevel_decl + :parse_extern_toplevel_decl + token += 16 + is_extern = 1 + goto parse_toplevel_decl + :parse_toplevel_decl + base_type = token + base_type_end = type_get_base_end(token) + token = base_type_end + :tl_decl_loop + prefix = token + prefix_end = type_get_prefix_end(prefix) + if *1prefix_end != TOKEN_IDENTIFIER goto tl_decl_no_ident + name = prefix_end + 8 + name = *8name + suffix = prefix_end + 16 + suffix_end = type_get_suffix_end(prefix) + type = types_bytes_used + parse_type_declarators(prefix, prefix_end, suffix, suffix_end) + parse_base_type(base_type, base_type_end) + token = suffix_end + if *1token == SYMBOL_LBRACE goto parse_function_definition + if *1token == SYMBOL_SEMICOLON goto parse_tld_no_initializer + if *1token == SYMBOL_COMMA goto parse_tld_no_initializer + if *1token == SYMBOL_EQ goto parse_tld_initializer + token_error(token, .str_unrecognized_stuff_after_declaration) + :str_unrecognized_stuff_after_declaration + string Declaration should be followed by one of: { , = + byte 32 + byte 59 ; semicolon + byte 0 + :parse_tl_decl_cont + if *1token == SYMBOL_SEMICOLON goto tl_decl_loop_done + if *1token != SYMBOL_COMMA goto tld_bad_stuff_after_decl + goto tl_decl_loop + :tl_decl_loop_done + token += 16 ; skip semicolon + goto parse_tokens_loop + + :tl_decl_no_ident + token_error(prefix_end, .str_tl_decl_no_ident) + :str_tl_decl_no_ident + string No identifier in top-level declaration. + byte 0 + :tld_bad_stuff_after_decl + token_error(token, .str_tld_bad_stuff_after_decl) + :str_tld_bad_stuff_after_decl + string Declarations should be immediately followed by a comma or semicolon. + byte 0 + :parse_tld_no_initializer + p = types + type + if *1p == TYPE_FUNCTION goto parse_tl_decl_cont ; ignore function declarations -- we do two passes anyways + ident_list_add(global_variables, name, rwdata_end_addr) + ; just skip forward by the size of this variable -- it'll automatically be filled with 0s. + rwdata_end_addr += type_sizeof(type) + goto parse_tl_decl_cont + :parse_tld_initializer + die(.str_tldinNI) ; @TODO + :str_tldinNI + string tld initializer not implemented. + byte 10 + byte 0 + :parse_function_definition + p = types + type + if *1p != TYPE_FUNCTION goto lbrace_after_declaration + die(.str_fdNI) ; @TODO + :str_fdNI + string function definitions not implemented. + byte 10 + byte 0 + :lbrace_after_declaration + token_error(token, .str_lbrace_after_declaration) + :str_lbrace_after_declaration + string Opening { after declaration of non-function. + byte 0 :parse_typedef base_type = token + 16 base_type_end = type_get_base_end(base_type) @@ -359,6 +444,11 @@ function parse_type_declarators *1out = TYPE_ARRAY types_bytes_used += 1 + p = suffix + token_skip_to_matching_rsquare(&p) + suffix += 16 ; skip [ + if *1suffix == SYMBOL_RSQUARE goto array_no_size + ; little hack to avoid screwing up types like double[sizeof(int)] ; temporarily pretend we're using a lot more of types local prev_types_bytes_used @@ -366,9 +456,7 @@ function parse_type_declarators types_bytes_used += 4000 expr = malloc(4000) - p = suffix - token_skip_to_matching_rsquare(&p) - suffix += 16 ; skip [ + parse_expression(suffix, p, expr) ;print_expression(expr) ;putc(10) @@ -388,7 +476,14 @@ function parse_type_declarators token_error(suffix, .str_bad_array_size) :str_bad_array_size string Very large or negative array size. - byte 0 + byte 0 + :array_no_size + ; e.g. int x[] = {1,2,3}; + out = types + types_bytes_used + *8out = 0 + types_bytes_used += 8 + suffix += 16 + goto type_declarators_loop :parse_function_type local param_base_type local param_prefix @@ -605,6 +700,8 @@ function parse_base_type out += 1 goto base_type_done :base_type_struct_definition + ; @NONSTANDARD: we don't handle bit-fields. + local member_base_type local member_prefix local member_prefix_end -- cgit v1.2.3