summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--05/idents.b26
-rw-r--r--05/main.b13
-rw-r--r--05/main.c17
-rw-r--r--05/parse.b175
-rw-r--r--05/tokenize.b7
-rw-r--r--05/util.b14
6 files changed, 217 insertions, 35 deletions
diff --git a/05/idents.b b/05/idents.b
index c4fff0f..ef859bc 100644
--- a/05/idents.b
+++ b/05/idents.b
@@ -1,15 +1,21 @@
; an "identifier list" is a list of identifiers and 64-bit values associated with them.
+; they are stored as
+; null-terminated string
+; 64-bit value (unaligned)
+; ...
+; null-terminated string
+; 64-bit value (unaligned)
+; 0 byte
function ident_list_create
argument nbytes
local list
list = malloc(nbytes)
- *1list = 255
return list
function ident_list_clear
argument list
- *1list = 255
+ *1list = 0
return
function ident_list_free
@@ -22,7 +28,7 @@ function ident_list_len
local len
len = 0
:ilist_len_loop
- if *1list == 255 goto ilist_len_ret
+ if *1list == 0 goto ilist_len_ret
list = memchr(list, 0)
list += 9 ; skip null byte and value
len += 1
@@ -34,7 +40,7 @@ function ident_list_value_at_index
argument list
argument idx
:ilist_vai_loop
- if *1list == 255 goto return_0
+ if *1list == 0 goto return_0
list = memchr(list, 0)
list += 1
if idx <= 0 goto ilist_vai_ret
@@ -50,10 +56,8 @@ function ident_list_add
argument ident
argument value
- ; note: we can't just do list = memchr(list, 255) because values
- ; might have a 255 byte.
:ilist_add_go_to_end_loop
- if *1list == 255 goto ilist_add_found_end
+ if *1list == 0 goto ilist_add_found_end
list = memchr(list, 0)
list += 9 ; skip null byte and value
goto ilist_add_go_to_end_loop
@@ -62,7 +66,7 @@ function ident_list_add
list += 1
*8list = value ; UNALIGNED
list += 8
- *1list = 255
+ *1list = 0
return
@@ -72,7 +76,7 @@ function ident_list_lookup
argument ident
local b
:ilist_lookup_loop
- if *1list == 255 goto return_0
+ if *1list == 0 goto return_0
b = str_equals(list, ident)
list = memchr(list, 0)
list += 9 ; skip null byte and value
@@ -88,7 +92,7 @@ function ident_list_lookup_check
argument pvalue
local b
:ilist_lookcheck_loop
- if *1list == 255 goto return_0
+ if *1list == 0 goto return_0
b = str_equals(list, ident)
list = memchr(list, 0)
list += 9 ; skip null byte and value
@@ -101,7 +105,7 @@ function ident_list_lookup_check
function ident_list_print
argument list
:ilist_print_loop
- if *1list == 255 goto ilist_print_loop_end
+ if *1list == 0 goto ilist_print_loop_end
puts(list)
putc(':)
putc(32)
diff --git a/05/main.b b/05/main.b
index 2200156..4ff7e5c 100644
--- a/05/main.b
+++ b/05/main.b
@@ -30,12 +30,10 @@ global types_bytes_used
global typedefs
; ident list of enum values
global enumerators
-; struct/union names
-; an ident list of pointers to struct data (see structures below)
-global struct_names
-; structs and unions
-; each struct/union is an ident list of 64-bit values, (type << 32) | offset
-; for unions, offset will always be 0.
+; struct/unions
+; an ident list of pointers to struct data
+; each struct data is an ident list of 64-bit values, (type << 32) | offset
+; for unions, offset will always be 0.
global structures
global structures_bytes_used
@@ -155,7 +153,7 @@ function main
typedefs = ident_list_create(100000)
enumerators = ident_list_create(4000000)
- struct_names = ident_list_create(4000000)
+ structures = ident_list_create(4000000)
dat_banned_objmacros = 255
dat_banned_fmacros = 255
@@ -164,7 +162,6 @@ function main
*1file_list = 255
object_macros = malloc(4000000)
function_macros = malloc(4000000)
- structures = malloc(40000000)
types = malloc(16000000)
types_init(types, &types_bytes_used)
diff --git a/05/main.c b/05/main.c
index b23d08f..0f69d38 100644
--- a/05/main.c
+++ b/05/main.c
@@ -2,5 +2,18 @@
int i[41];
long double d;
} (*x)(void);
-*/
-typedef int *Foo[sizeof(unsigned short int)];
+
+/* typedef enum X { */
+/* R,S,T */
+/* } *Foo[sizeof(unsigned long)]; */
+/* typedef int A[T]; */
+
+typedef struct A {
+ int x, y;
+ long double c;
+ unsigned long d;
+ char e[3];
+ char c[2];
+ char d;
+ long f;
+} A;
diff --git a/05/parse.b b/05/parse.b
index be59e6b..10c718d 100644
--- a/05/parse.b
+++ b/05/parse.b
@@ -212,9 +212,6 @@ function type_get_base_end
token_skip_to_matching_rbrace(&token)
token += 16
goto skip_base_type_loop_end
- :str_bad_type
- string Bad type.
- byte 0
; return the end of this type prefix
@@ -285,7 +282,10 @@ function type_get_suffix_end
return token
:type_get_suffix_bad_type
- token_error(prefix, .str_bad_type)
+ token_error(prefix, .str_bad_type_suffix)
+ :str_bad_type_suffix
+ string Bad type suffix.
+ byte 0
; writes to *(types + types_bytes_used), and updates types_bytes_used
@@ -397,8 +397,11 @@ function parse_type_declarators
:type_declarators_loop_end
return 0
:parse_typedecls_bad_type
- token_error(prefix, .str_bad_type)
-
+ token_error(prefix, .str_bad_type_declarators)
+ :str_bad_type_declarators
+ string Bad type declarators.
+ byte 0
+
; writes to *(types + types_bytes_used), and updates types_bytes_used (no return value)
function parse_base_type
argument base_type
@@ -407,7 +410,8 @@ function parse_base_type
local p
local c
local depth
- local expr
+ local is_struct
+ is_struct = 0
out = types + types_bytes_used
@@ -437,7 +441,6 @@ function parse_base_type
:base_type_normal_loop
c = *1p
p += 16
- ; yes, this allows for `int int x;` but whatever
if c == KEYWORD_CHAR goto base_type_flag_char
if c == KEYWORD_SHORT goto base_type_flag_short
if c == KEYWORD_INT goto base_type_flag_int
@@ -446,23 +449,40 @@ function parse_base_type
if c == KEYWORD_DOUBLE goto base_type_flag_double
goto base_type_normal_loop_end
:base_type_flag_char
+ c = flags & PARSETYPE_FLAG_CHAR
+ if c != 0 goto repeated_base_type
flags |= PARSETYPE_FLAG_CHAR
goto base_type_normal_loop
:base_type_flag_short
+ c = flags & PARSETYPE_FLAG_SHORT
+ if c != 0 goto repeated_base_type
flags |= PARSETYPE_FLAG_SHORT
goto base_type_normal_loop
:base_type_flag_int
+ c = flags & PARSETYPE_FLAG_INT
+ if c != 0 goto repeated_base_type
flags |= PARSETYPE_FLAG_INT
goto base_type_normal_loop
:base_type_flag_long
+ c = flags & PARSETYPE_FLAG_LONG
+ if c != 0 goto repeated_base_type
flags |= PARSETYPE_FLAG_LONG
goto base_type_normal_loop
:base_type_flag_unsigned
+ c = flags & PARSETYPE_FLAG_UNSIGNED
+ if c != 0 goto repeated_base_type
flags |= PARSETYPE_FLAG_UNSIGNED
goto base_type_normal_loop
:base_type_flag_double
+ c = flags & PARSETYPE_FLAG_DOUBLE
+ if c != 0 goto repeated_base_type
flags |= PARSETYPE_FLAG_DOUBLE
goto base_type_normal_loop
+ :repeated_base_type
+ token_error(p, .str_repeated_base_type)
+ :str_repeated_base_type
+ string Arithmetic type repeated (e.g. unsigned unsigned int).
+ byte 0
:base_type_normal_loop_end
if flags == 8 goto base_type_int ; `int`
if flags == 1 goto base_type_uint ; `unsigned`
@@ -524,13 +544,21 @@ function parse_base_type
return 0
:base_type_struct
+ is_struct = 1
+ ; fallthrough
:base_type_union
+ local struct_name
+ local struct
+ struct_name = .empty_string
p = base_type + 16
- if *1p != TOKEN_IDENTIFIER goto base_type_struct_definition
- p += 16
+ if *1p != TOKEN_IDENTIFIER goto base_type_have_name
+ p += 8
+ struct_name = *8p
+ p += 8
+ :base_type_have_name
+ c = ident_list_lookup(structures, struct_name)
if *1p == SYMBOL_LBRACE goto base_type_struct_definition
- p -= 8
- c = ident_list_lookup(struct_names, *8p)
+
if c == 0 goto base_type_incomplete_struct
; e.g. struct Foo x; where struct Foo has been defined
*1out = TYPE_STRUCT
@@ -544,10 +572,99 @@ function parse_base_type
out += 1
goto base_type_done
:base_type_struct_definition
- if *1p != SYMBOL_LBRACE goto bad_base_type
- byte 0xcc ; @TODO
+ local member_base_type
+ local member_prefix
+ local member_prefix_end
+ local member_suffix
+ local member_suffix_end
+ local member_name
+ local member_type
+ local member_align
+ local member_size
+
+ if c != 0 goto struct_redefinition
+ struct = ident_list_create(8000) ; note: maximum "* 127 members in a single structure or union" C89 ยง 2.2.4.1
+ *1out = TYPE_STRUCT
+ out += 1
+ *8out = struct
+ out += 8
+ types_bytes_used = out - types
+ p += 16 ; skip opening {
+
+ local offset
+ offset = 0
+
+ ident_list_add(structures, struct_name, struct)
+
+ :struct_defn_loop
+ if *1p == SYMBOL_RBRACE goto struct_defn_loop_end
+ member_base_type = p
+ p = type_get_base_end(member_base_type)
+ :struct_defn_decl_loop ; handle each element of int x, y[5], *z;
+ member_prefix = p
+ member_prefix_end = type_get_prefix_end(member_prefix)
+ if *1member_prefix_end != TOKEN_IDENTIFIER goto member_no_identifier
+ member_name = member_prefix_end + 8
+ member_name = *8member_name
+ member_suffix = member_prefix_end + 16
+ member_suffix_end = type_get_suffix_end(member_prefix)
+ member_type = types_bytes_used
+
+
+ parse_type_declarators(member_prefix, member_prefix_end, member_suffix, member_suffix_end)
+ parse_base_type(member_base_type)
+
+ ; make sure struct member is aligned
+ member_align = type_alignof(member_type)
+ ; offset = ceil(offset / align) * align
+ offset += member_align - 1
+ offset /= member_align
+ offset *= member_align
+
+ if offset ] 0xffffffff goto struct_too_large
+ ;putnln(offset)
+ ; data = (type << 32) | offset
+ c = member_type < 32
+ c |= offset
+ ident_list_add(struct, member_name, c)
+
+ member_size = type_sizeof(member_type)
+ offset += member_size * is_struct ; keep offset as 0 if this is a union
+ p = member_suffix_end
+ if *1p == SYMBOL_SEMICOLON goto struct_defn_decl_loop_end
+ if *1p != SYMBOL_COMMA goto struct_bad_declaration
+ p += 16 ; skip comma
+ goto struct_defn_decl_loop
+ :struct_defn_decl_loop_end
+ p += 16 ; skip semicolon
+ goto struct_defn_loop
+ :struct_defn_loop_end
+ out = types + types_bytes_used
+ goto base_type_done
+ :struct_redefinition
+ token_error(p, .str_struct_redefinition)
+ :str_struct_redefinition
+ string struct redefinition.
+ byte 0
+ :struct_bad_declaration
+ token_error(p, .str_struct_bad_declaration)
+ :str_struct_bad_declaration
+ string Bad declaration in struct.
+ byte 0
+ :struct_too_large
+ token_error(p, .str_struct_too_large)
+ :str_struct_too_large
+ string struct too large (maximum is 4GB).
+ byte 0
+ :member_no_identifier
+ ; e.g. struct { int; };
+ token_error(p, .str_member_no_identifier)
+ :str_member_no_identifier
+ string No identifier in struct member.
+ byte 0
:base_type_enum
local q
+ local expr
*1out = TYPE_INT ; treat any enum as int
out += 1
@@ -1320,6 +1437,36 @@ function type_sizeof
c = type_sizeof(p)
return n * c
+function type_alignof
+ argument type
+ local p
+ local c
+ p = types + type
+ c = *1p
+ if c == TYPE_CHAR goto return_1
+ if c == TYPE_UNSIGNED_CHAR goto return_1
+ if c == TYPE_SHORT goto return_2
+ if c == TYPE_UNSIGNED_SHORT goto return_2
+ if c == TYPE_INT goto return_4
+ if c == TYPE_UNSIGNED_INT goto return_4
+ if c == TYPE_LONG goto return_8
+ if c == TYPE_UNSIGNED_LONG goto return_8
+ if c == TYPE_FLOAT goto return_4
+ if c == TYPE_DOUBLE goto return_8
+ if c == TYPE_VOID goto return_1
+ if c == TYPE_POINTER goto return_8
+ if c == TYPE_FUNCTION goto return_8
+ if c == TYPE_ARRAY goto alignof_array
+ fputs(2, .str_alignof_ni) ; @TODO
+ exit(1)
+ :str_alignof_ni
+ string type_alignof for this type not implemented.
+ byte 0
+
+ :alignof_array
+ p = type + 9 ; skip TYPE_ARRAY and size
+ return type_alignof(p)
+
; evaluate an expression which can be the size of an array, e.g.
; enum { A, B, C };
; int x[A * sizeof(float) + 3 << 5];
diff --git a/05/tokenize.b b/05/tokenize.b
index e2ffb1c..f5b0c30 100644
--- a/05/tokenize.b
+++ b/05/tokenize.b
@@ -672,3 +672,10 @@ function print_tokens
:str_eof
string EOF
byte 0
+
+function print_token
+ argument token
+ local p
+ p = token + 16
+ print_tokens(token, p)
+ return
diff --git a/05/util.b b/05/util.b
index 3b6eb18..0d9a401 100644
--- a/05/util.b
+++ b/05/util.b
@@ -437,6 +437,11 @@ function putx64
argument n
fputx64(1, n)
return
+function putx64ln
+ argument n
+ fputx64(1, n)
+ fputc(1, 10)
+ return
function fputx32
argument fd
@@ -456,6 +461,11 @@ function putx32
argument n
fputx32(1, n)
return
+function putx32ln
+ argument n
+ fputx32(1, n)
+ fputc(1, 10)
+ return
function putn
argument n
@@ -666,6 +676,10 @@ function leftmost_1bit
:str_leftmost1bit_0
string 0 passed to leftmost_1bit.
byte 0
+
+:empty_string
+ byte 0
+
:return_0
return 0
:return_1