From d036dcc5ac24aaa499d7cec50a94eed645f76bfb Mon Sep 17 00:00:00 2001 From: pommicket Date: Wed, 26 Jan 2022 18:00:47 -0500 Subject: struct/union definitions --- 05/idents.b | 26 +++++---- 05/main.b | 13 ++--- 05/main.c | 17 +++++- 05/parse.b | 175 +++++++++++++++++++++++++++++++++++++++++++++++++++++----- 05/tokenize.b | 7 +++ 05/util.b | 14 +++++ 6 files changed, 217 insertions(+), 35 deletions(-) (limited to '05') diff --git a/05/idents.b b/05/idents.b index c4fff0f..ef859bc 100644 --- a/05/idents.b +++ b/05/idents.b @@ -1,15 +1,21 @@ ; an "identifier list" is a list of identifiers and 64-bit values associated with them. +; they are stored as +; null-terminated string +; 64-bit value (unaligned) +; ... +; null-terminated string +; 64-bit value (unaligned) +; 0 byte function ident_list_create argument nbytes local list list = malloc(nbytes) - *1list = 255 return list function ident_list_clear argument list - *1list = 255 + *1list = 0 return function ident_list_free @@ -22,7 +28,7 @@ function ident_list_len local len len = 0 :ilist_len_loop - if *1list == 255 goto ilist_len_ret + if *1list == 0 goto ilist_len_ret list = memchr(list, 0) list += 9 ; skip null byte and value len += 1 @@ -34,7 +40,7 @@ function ident_list_value_at_index argument list argument idx :ilist_vai_loop - if *1list == 255 goto return_0 + if *1list == 0 goto return_0 list = memchr(list, 0) list += 1 if idx <= 0 goto ilist_vai_ret @@ -50,10 +56,8 @@ function ident_list_add argument ident argument value - ; note: we can't just do list = memchr(list, 255) because values - ; might have a 255 byte. :ilist_add_go_to_end_loop - if *1list == 255 goto ilist_add_found_end + if *1list == 0 goto ilist_add_found_end list = memchr(list, 0) list += 9 ; skip null byte and value goto ilist_add_go_to_end_loop @@ -62,7 +66,7 @@ function ident_list_add list += 1 *8list = value ; UNALIGNED list += 8 - *1list = 255 + *1list = 0 return @@ -72,7 +76,7 @@ function ident_list_lookup argument ident local b :ilist_lookup_loop - if *1list == 255 goto return_0 + if *1list == 0 goto return_0 b = str_equals(list, ident) list = memchr(list, 0) list += 9 ; skip null byte and value @@ -88,7 +92,7 @@ function ident_list_lookup_check argument pvalue local b :ilist_lookcheck_loop - if *1list == 255 goto return_0 + if *1list == 0 goto return_0 b = str_equals(list, ident) list = memchr(list, 0) list += 9 ; skip null byte and value @@ -101,7 +105,7 @@ function ident_list_lookup_check function ident_list_print argument list :ilist_print_loop - if *1list == 255 goto ilist_print_loop_end + if *1list == 0 goto ilist_print_loop_end puts(list) putc(':) putc(32) diff --git a/05/main.b b/05/main.b index 2200156..4ff7e5c 100644 --- a/05/main.b +++ b/05/main.b @@ -30,12 +30,10 @@ global types_bytes_used global typedefs ; ident list of enum values global enumerators -; struct/union names -; an ident list of pointers to struct data (see structures below) -global struct_names -; structs and unions -; each struct/union is an ident list of 64-bit values, (type << 32) | offset -; for unions, offset will always be 0. +; struct/unions +; an ident list of pointers to struct data +; each struct data is an ident list of 64-bit values, (type << 32) | offset +; for unions, offset will always be 0. global structures global structures_bytes_used @@ -155,7 +153,7 @@ function main typedefs = ident_list_create(100000) enumerators = ident_list_create(4000000) - struct_names = ident_list_create(4000000) + structures = ident_list_create(4000000) dat_banned_objmacros = 255 dat_banned_fmacros = 255 @@ -164,7 +162,6 @@ function main *1file_list = 255 object_macros = malloc(4000000) function_macros = malloc(4000000) - structures = malloc(40000000) types = malloc(16000000) types_init(types, &types_bytes_used) diff --git a/05/main.c b/05/main.c index b23d08f..0f69d38 100644 --- a/05/main.c +++ b/05/main.c @@ -2,5 +2,18 @@ int i[41]; long double d; } (*x)(void); -*/ -typedef int *Foo[sizeof(unsigned short int)]; + +/* typedef enum X { */ +/* R,S,T */ +/* } *Foo[sizeof(unsigned long)]; */ +/* typedef int A[T]; */ + +typedef struct A { + int x, y; + long double c; + unsigned long d; + char e[3]; + char c[2]; + char d; + long f; +} A; diff --git a/05/parse.b b/05/parse.b index be59e6b..10c718d 100644 --- a/05/parse.b +++ b/05/parse.b @@ -212,9 +212,6 @@ function type_get_base_end token_skip_to_matching_rbrace(&token) token += 16 goto skip_base_type_loop_end - :str_bad_type - string Bad type. - byte 0 ; return the end of this type prefix @@ -285,7 +282,10 @@ function type_get_suffix_end return token :type_get_suffix_bad_type - token_error(prefix, .str_bad_type) + token_error(prefix, .str_bad_type_suffix) + :str_bad_type_suffix + string Bad type suffix. + byte 0 ; writes to *(types + types_bytes_used), and updates types_bytes_used @@ -397,8 +397,11 @@ function parse_type_declarators :type_declarators_loop_end return 0 :parse_typedecls_bad_type - token_error(prefix, .str_bad_type) - + token_error(prefix, .str_bad_type_declarators) + :str_bad_type_declarators + string Bad type declarators. + byte 0 + ; writes to *(types + types_bytes_used), and updates types_bytes_used (no return value) function parse_base_type argument base_type @@ -407,7 +410,8 @@ function parse_base_type local p local c local depth - local expr + local is_struct + is_struct = 0 out = types + types_bytes_used @@ -437,7 +441,6 @@ function parse_base_type :base_type_normal_loop c = *1p p += 16 - ; yes, this allows for `int int x;` but whatever if c == KEYWORD_CHAR goto base_type_flag_char if c == KEYWORD_SHORT goto base_type_flag_short if c == KEYWORD_INT goto base_type_flag_int @@ -446,23 +449,40 @@ function parse_base_type if c == KEYWORD_DOUBLE goto base_type_flag_double goto base_type_normal_loop_end :base_type_flag_char + c = flags & PARSETYPE_FLAG_CHAR + if c != 0 goto repeated_base_type flags |= PARSETYPE_FLAG_CHAR goto base_type_normal_loop :base_type_flag_short + c = flags & PARSETYPE_FLAG_SHORT + if c != 0 goto repeated_base_type flags |= PARSETYPE_FLAG_SHORT goto base_type_normal_loop :base_type_flag_int + c = flags & PARSETYPE_FLAG_INT + if c != 0 goto repeated_base_type flags |= PARSETYPE_FLAG_INT goto base_type_normal_loop :base_type_flag_long + c = flags & PARSETYPE_FLAG_LONG + if c != 0 goto repeated_base_type flags |= PARSETYPE_FLAG_LONG goto base_type_normal_loop :base_type_flag_unsigned + c = flags & PARSETYPE_FLAG_UNSIGNED + if c != 0 goto repeated_base_type flags |= PARSETYPE_FLAG_UNSIGNED goto base_type_normal_loop :base_type_flag_double + c = flags & PARSETYPE_FLAG_DOUBLE + if c != 0 goto repeated_base_type flags |= PARSETYPE_FLAG_DOUBLE goto base_type_normal_loop + :repeated_base_type + token_error(p, .str_repeated_base_type) + :str_repeated_base_type + string Arithmetic type repeated (e.g. unsigned unsigned int). + byte 0 :base_type_normal_loop_end if flags == 8 goto base_type_int ; `int` if flags == 1 goto base_type_uint ; `unsigned` @@ -524,13 +544,21 @@ function parse_base_type return 0 :base_type_struct + is_struct = 1 + ; fallthrough :base_type_union + local struct_name + local struct + struct_name = .empty_string p = base_type + 16 - if *1p != TOKEN_IDENTIFIER goto base_type_struct_definition - p += 16 + if *1p != TOKEN_IDENTIFIER goto base_type_have_name + p += 8 + struct_name = *8p + p += 8 + :base_type_have_name + c = ident_list_lookup(structures, struct_name) if *1p == SYMBOL_LBRACE goto base_type_struct_definition - p -= 8 - c = ident_list_lookup(struct_names, *8p) + if c == 0 goto base_type_incomplete_struct ; e.g. struct Foo x; where struct Foo has been defined *1out = TYPE_STRUCT @@ -544,10 +572,99 @@ function parse_base_type out += 1 goto base_type_done :base_type_struct_definition - if *1p != SYMBOL_LBRACE goto bad_base_type - byte 0xcc ; @TODO + local member_base_type + local member_prefix + local member_prefix_end + local member_suffix + local member_suffix_end + local member_name + local member_type + local member_align + local member_size + + if c != 0 goto struct_redefinition + struct = ident_list_create(8000) ; note: maximum "* 127 members in a single structure or union" C89 ยง 2.2.4.1 + *1out = TYPE_STRUCT + out += 1 + *8out = struct + out += 8 + types_bytes_used = out - types + p += 16 ; skip opening { + + local offset + offset = 0 + + ident_list_add(structures, struct_name, struct) + + :struct_defn_loop + if *1p == SYMBOL_RBRACE goto struct_defn_loop_end + member_base_type = p + p = type_get_base_end(member_base_type) + :struct_defn_decl_loop ; handle each element of int x, y[5], *z; + member_prefix = p + member_prefix_end = type_get_prefix_end(member_prefix) + if *1member_prefix_end != TOKEN_IDENTIFIER goto member_no_identifier + member_name = member_prefix_end + 8 + member_name = *8member_name + member_suffix = member_prefix_end + 16 + member_suffix_end = type_get_suffix_end(member_prefix) + member_type = types_bytes_used + + + parse_type_declarators(member_prefix, member_prefix_end, member_suffix, member_suffix_end) + parse_base_type(member_base_type) + + ; make sure struct member is aligned + member_align = type_alignof(member_type) + ; offset = ceil(offset / align) * align + offset += member_align - 1 + offset /= member_align + offset *= member_align + + if offset ] 0xffffffff goto struct_too_large + ;putnln(offset) + ; data = (type << 32) | offset + c = member_type < 32 + c |= offset + ident_list_add(struct, member_name, c) + + member_size = type_sizeof(member_type) + offset += member_size * is_struct ; keep offset as 0 if this is a union + p = member_suffix_end + if *1p == SYMBOL_SEMICOLON goto struct_defn_decl_loop_end + if *1p != SYMBOL_COMMA goto struct_bad_declaration + p += 16 ; skip comma + goto struct_defn_decl_loop + :struct_defn_decl_loop_end + p += 16 ; skip semicolon + goto struct_defn_loop + :struct_defn_loop_end + out = types + types_bytes_used + goto base_type_done + :struct_redefinition + token_error(p, .str_struct_redefinition) + :str_struct_redefinition + string struct redefinition. + byte 0 + :struct_bad_declaration + token_error(p, .str_struct_bad_declaration) + :str_struct_bad_declaration + string Bad declaration in struct. + byte 0 + :struct_too_large + token_error(p, .str_struct_too_large) + :str_struct_too_large + string struct too large (maximum is 4GB). + byte 0 + :member_no_identifier + ; e.g. struct { int; }; + token_error(p, .str_member_no_identifier) + :str_member_no_identifier + string No identifier in struct member. + byte 0 :base_type_enum local q + local expr *1out = TYPE_INT ; treat any enum as int out += 1 @@ -1320,6 +1437,36 @@ function type_sizeof c = type_sizeof(p) return n * c +function type_alignof + argument type + local p + local c + p = types + type + c = *1p + if c == TYPE_CHAR goto return_1 + if c == TYPE_UNSIGNED_CHAR goto return_1 + if c == TYPE_SHORT goto return_2 + if c == TYPE_UNSIGNED_SHORT goto return_2 + if c == TYPE_INT goto return_4 + if c == TYPE_UNSIGNED_INT goto return_4 + if c == TYPE_LONG goto return_8 + if c == TYPE_UNSIGNED_LONG goto return_8 + if c == TYPE_FLOAT goto return_4 + if c == TYPE_DOUBLE goto return_8 + if c == TYPE_VOID goto return_1 + if c == TYPE_POINTER goto return_8 + if c == TYPE_FUNCTION goto return_8 + if c == TYPE_ARRAY goto alignof_array + fputs(2, .str_alignof_ni) ; @TODO + exit(1) + :str_alignof_ni + string type_alignof for this type not implemented. + byte 0 + + :alignof_array + p = type + 9 ; skip TYPE_ARRAY and size + return type_alignof(p) + ; evaluate an expression which can be the size of an array, e.g. ; enum { A, B, C }; ; int x[A * sizeof(float) + 3 << 5]; diff --git a/05/tokenize.b b/05/tokenize.b index e2ffb1c..f5b0c30 100644 --- a/05/tokenize.b +++ b/05/tokenize.b @@ -672,3 +672,10 @@ function print_tokens :str_eof string EOF byte 0 + +function print_token + argument token + local p + p = token + 16 + print_tokens(token, p) + return diff --git a/05/util.b b/05/util.b index 3b6eb18..0d9a401 100644 --- a/05/util.b +++ b/05/util.b @@ -437,6 +437,11 @@ function putx64 argument n fputx64(1, n) return +function putx64ln + argument n + fputx64(1, n) + fputc(1, 10) + return function fputx32 argument fd @@ -456,6 +461,11 @@ function putx32 argument n fputx32(1, n) return +function putx32ln + argument n + fputx32(1, n) + fputc(1, 10) + return function putn argument n @@ -666,6 +676,10 @@ function leftmost_1bit :str_leftmost1bit_0 string 0 passed to leftmost_1bit. byte 0 + +:empty_string + byte 0 + :return_0 return 0 :return_1 -- cgit v1.2.3