From 7ca37ea8dbf1a1fd73fc145a26f95e5fe70ac3e4 Mon Sep 17 00:00:00 2001
From: pommicket <pommicket@gmail.com>
Date: Wed, 26 Jan 2022 16:40:11 -0500
Subject: commas starting to work

---
 05/parse.b | 343 +++++++++++++++++++++++++++++++------------------------------
 1 file changed, 174 insertions(+), 169 deletions(-)

(limited to '05/parse.b')

diff --git a/05/parse.b b/05/parse.b
index 11ff4df..5b47bb8 100644
--- a/05/parse.b
+++ b/05/parse.b
@@ -30,37 +30,62 @@ function parse_tokens
 	local ident
 	local type
 	local p
+	local base_type
+	local base_type_end
+	local prefix
+	local prefix_end
+	local suffix
+	local suffix_end
 	
 	token = tokens
 	:parse_tokens_loop
 		if *1token == TOKEN_EOF goto parse_tokens_eof
 		if *1token == KEYWORD_TYPEDEF goto parse_typedef
 		
-		byte 0xcc ; not implemented
-		
+		die(.str_parse_tokens_ni)
+		:str_parse_tokens_ni
+			string parse_tokens not implemented.
+			byte 0
 		:parse_typedef
-			token += 16
-			type = parse_type(&token)
-			if type != 0 goto typedef_no_ident
-			if *1token != SYMBOL_SEMICOLON goto typedef_no_semicolon
+			base_type = token + 16
+			base_type_end = type_get_base_end(base_type)
+			
+			token = base_type_end
+			putc('*)
 			
-			p = parse_type_result
 			:typedef_loop
-				if *1p == 255 goto typedef_loop_end
-				ident = p
-				p = memchr(p, 0)
-				p += 1
-				type = *8p
-				p += 8
-				puts(ident)
+				prefix = token
+				prefix_end = type_get_prefix_end(prefix)
+				if *1prefix_end != TOKEN_IDENTIFIER goto typedef_no_ident
+				ident = prefix_end + 8
+				ident = *8ident
+				suffix = prefix_end + 16
+				suffix_end = type_get_suffix_end(suffix)
+				
+				putc('B)
+				putc(':)
+				print_tokens(base_type, base_type_end)
+				putc('P)
+				putc(':)
+				print_tokens(prefix, prefix_end)
+				putc('S)
 				putc(':)
-				putc(32)
+				print_tokens(suffix, suffix_end)
+				
+				type = types_bytes_used
+				parse_type_declarators(prefix, prefix_end, suffix, suffix_end)
+				parse_base_type(base_type)
+				
 				print_type(type)
 				putc(10)
+				
 				ident_list_add(typedefs, ident, type)
+				token = suffix_end
+				if *1token == SYMBOL_SEMICOLON goto typedef_loop_end
+				if *1token != SYMBOL_COMMA goto bad_typedef
+				token += 16 ; skip comma
 				goto typedef_loop
 			:typedef_loop_end
-			
 			token += 16 ; skip semicolon
 			goto parse_tokens_loop
 		:typedef_no_ident
@@ -68,10 +93,10 @@ function parse_tokens
 		:str_typedef_no_ident
 			string No identifier in typedef declaration.
 			byte 0
-		:typedef_no_semicolon
-			token_error(tokens, .str_typedef_no_semicolon)
-		:str_typedef_no_semicolon
-			string typedef does not end with a semicolon.
+		:bad_typedef
+			token_error(tokens, .str_bad_typedef)
+		:str_bad_typedef
+			string Bad typedef.
 			byte 0
 	:parse_tokens_eof
 	return
@@ -147,25 +172,12 @@ function parse_type
 	;     struct Thing (*things[5])(void), *something_else[3];
 	;     BBBBBBBBBBBB PP      SSSSSSSSSS  P              SSS
 	; Here, we call `struct Thing` the "base type".
-	
-	argument p_token
-	local typeid
-	local token
+	byte 0xcc
+
+; return the end of the base for this type.
+function type_get_base_end
+	argument token
 	local c
-	local p
-	local n
-	local base_type_end
-	local depth
-	local prefix
-	local prefix_end
-	local suffix
-	local suffix_end
-	local ident
-	
-	token = *8p_token
-	prefix = token
-	ident_list_clear(parse_type_result)
-	
 	c = *1token
 	if c == KEYWORD_STRUCT goto skip_struct_union_enum
 	if c == KEYWORD_UNION goto skip_struct_union_enum
@@ -186,11 +198,30 @@ function parse_type
 			goto skip_base_type_loop
 	
 	:skip_base_type_loop_end
+	return token
 	
-	; find end of 1st prefix
-	base_type_end = token
-	
-	ident = 0
+	:skip_struct_union_enum
+		token += 16
+		if *1token != TOKEN_IDENTIFIER goto skip_sue_no_name
+			token += 16 ; struct *blah*
+		:skip_sue_no_name
+		if *1token != SYMBOL_LBRACE goto skip_base_type_loop_end ; e.g. struct Something x[5];
+		; okay we have something like
+		;   struct {
+		;       int x, y;
+		;   } test;
+		token_skip_to_matching_rbrace(&token)
+		token += 16
+		goto skip_base_type_loop_end
+	:str_bad_type
+		string Bad type.
+		byte 0
+
+
+; return the end of this type prefix
+function type_get_prefix_end
+	argument token
+	local c
 	:find_prefix_end_loop
 		c = *1token
 		if c == TOKEN_IDENTIFIER goto found_prefix_end
@@ -211,19 +242,23 @@ function parse_type
 			token += 16
 			goto find_prefix_end_loop
 	:found_prefix_end
-	prefix_end = token
-	
-	if *1token != TOKEN_IDENTIFIER goto parse_type_no_ident
-	token += 8
-	ident = *8token
-	token += 8
-	:parse_type_no_ident
-	
-	
-	suffix = token
+	return token
+
+; return the end of this type suffix
+; NOTE: you must pass in the PREFIX.
+; (In general, we can't find the end of the suffix without knowing the prefix.)
+;    int (*x);
+;            ^ suffix ends here
+;    (int *)
+;          ^ suffix ends here
+function type_get_suffix_end
+	argument prefix
+	local depth
+	local token
+	local c
 	
 	; find end of suffix
-	token = base_type_end ; start back here so we can keep track of bracket depth
+	token = prefix
 	depth = 0 ; parenthesis/square bracket depth
 	:suffix_end_loop
 		c = *1token
@@ -234,7 +269,7 @@ function parse_type
 		if c == SYMBOL_RPAREN goto suffix_end_decdepth
 		if c == SYMBOL_TIMES goto suffix_end_cont
 		if depth == 0 goto suffix_end_found
-		if c == TOKEN_EOF goto pt_bad_type
+		if c == TOKEN_EOF goto type_get_suffix_bad_type
 		goto suffix_end_cont
 		
 		:suffix_end_incdepth
@@ -249,87 +284,45 @@ function parse_type
 			goto suffix_end_loop
 	:suffix_end_found
 	
-	suffix_end = token
-	
-#define TYPEDEBUG ;
-	TYPEDEBUG	putc('B)
-	TYPEDEBUG	putc('a)
-	TYPEDEBUG	putc('s)
-	TYPEDEBUG	putc(':)
-	TYPEDEBUG	putc(32)
-	TYPEDEBUG	print_tokens(*8p_token, base_type_end)
-	TYPEDEBUG	putc('P)
-	TYPEDEBUG	putc('r)
-	TYPEDEBUG	putc('e)
-	TYPEDEBUG	putc(':)
-	TYPEDEBUG	putc(32)
-	TYPEDEBUG	print_tokens(prefix, prefix_end)
-	TYPEDEBUG	putc('S)
-	TYPEDEBUG	putc('u)
-	TYPEDEBUG	putc('f)
-	TYPEDEBUG	putc(':)
-	TYPEDEBUG	putc(32)
-	TYPEDEBUG	print_tokens(suffix, suffix_end)
-	
-	typeid = types_bytes_used
-	p = types + typeid
-	p = parse_type_given_base_prefix_suffix(*8p_token, prefix, prefix_end, suffix, suffix_end, p)
-	if ident == 0 goto type_no_ident
-		ident_list_add(parse_type_result, typeid)
-		typeid = 0
-	:type_no_ident
-	*8p_token = suffix_end
-	types_bytes_used = p - types
-	return typeid
-	
-	:skip_struct_union_enum
-		token += 16
-		if *1token != TOKEN_IDENTIFIER goto skip_sue_no_name
-			token += 16 ; struct *blah*
-		:skip_sue_no_name
-		if *1token != SYMBOL_LBRACE goto skip_base_type_loop_end ; e.g. struct Something x[5];
-		; okay we have something like
-		;   struct {
-		;       int x, y;
-		;   } test;
-		token_skip_to_matching_rbrace(&token)
-		token += 16
-		goto skip_base_type_loop_end
-	:pt_bad_type
-		token_error(*8p_token, .str_bad_type)
-	:str_bad_type
-		string Bad type.
-		byte 0
+	return token
+	:type_get_suffix_bad_type
+		token_error(prefix, .str_bad_type)
 
-function parse_type_given_base_prefix_suffix
-	argument base_type
+
+; writes to *(types + types_bytes_used), and updates types_bytes_used
+function parse_type_declarators
 	argument prefix
 	argument prefix_end
 	argument suffix
 	argument suffix_end
-	argument out
 	local p
 	local expr
 	local n
 	local c
 	local depth
+	local out
 	
 	; main loop for parsing types
-	:parse_type_loop
+	:type_declarators_loop
 		p = prefix_end - 16
 		if *1suffix == SYMBOL_LSQUARE goto parse_array_type
 		if *1suffix == SYMBOL_LPAREN goto parse_function_type
 		if *1p == SYMBOL_TIMES goto parse_pointer_type
-		if suffix == suffix_end goto parse_base_type
+		if suffix == suffix_end goto type_declarators_loop_end
 		if *1suffix == SYMBOL_RPAREN goto parse_type_remove_parentheses
-		goto bps_bad_type
+		goto parse_typedecls_bad_type
 		
 		:parse_pointer_type
+			out = types + types_bytes_used
 			*1out = TYPE_POINTER
-			out += 1
+			types_bytes_used += 1
 			prefix_end = p
-			goto parse_type_loop
+			goto type_declarators_loop
 		:parse_array_type
+			out = types + types_bytes_used
+			*1out = TYPE_ARRAY
+			types_bytes_used += 1
+			
 			local prev_types
 			local prev_types_bytes_used
 			; little hack to avoid screwing up types like  double[sizeof(int)]
@@ -339,74 +332,74 @@ function parse_type_given_base_prefix_suffix
 			types = malloc(4000)
 			types_init(types, &types_bytes_used)
 			
-			
 			expr = malloc(4000)
-			*1out = TYPE_ARRAY
-			out += 1
 			p = suffix
 			token_skip_to_matching_rsquare(&p)
 			suffix += 16 ; skip [
 			parse_expression(suffix, p, expr)
 			;print_expression(expr)
 			;putc(10)
-			evaluate_constant_expression(base_type, expr, &n)
+			evaluate_constant_expression(prefix, expr, &n)
 			if n < 0 goto bad_array_size
-			*8out = n
-			out += 8
 			free(expr)
 			free(types)
 			types = prev_types
 			types_bytes_used = prev_types_bytes_used
+			
+			out = types + types_bytes_used
+			*8out = n
+			types_bytes_used += 8
+			
 			suffix = p + 16
-			goto parse_type_loop
+			goto type_declarators_loop
 			:bad_array_size
-				token_error(base_type, .str_bad_array_size)
+				token_error(suffix, .str_bad_array_size)
 			:str_bad_array_size
 				string Very large or negative array size.
 				byte 0 
 		:parse_function_type
-			local prev_parse_type_result
-			prev_parse_type_result = parse_type_result
-			parse_type_result = ident_list_create(16000)
 			p = suffix + 16
+			out = types + types_bytes_used
 			*1out = TYPE_FUNCTION
-			out += 1
+			types_bytes_used += 1
 			:function_type_loop
-				if *1p == SYMBOL_RPAREN goto function_type_loop_end ; only needed for 1st iteration
-				n = parse_type(&p)
-				if n != 0 goto fparam_have_type
-				c = ident_list_len(parse_type_result)
-				if c != 1 goto bps_bad_type
-				n = ident_list_value_at_index(parse_type_result, 0)
-				:fparam_have_type
-				n += type_length(n)
-				out = types + n
-				if *1p == SYMBOL_RPAREN goto function_type_loop_end
-				if *1p != SYMBOL_COMMA goto bps_bad_type
-				p += 16
-				goto function_type_loop
-			:function_type_loop_end
+				byte 0xcc ; @TODO
+			out = types + types_bytes_used
 			*1out = 0
-			out += 1
+			types_bytes_used += 1
 			suffix = p + 16
-			ident_list_free(parse_type_result)
-			parse_type_result = prev_parse_type_result
-			goto parse_type_loop
+			goto type_declarators_loop
 		:parse_type_remove_parentheses
-			if *1p != SYMBOL_LPAREN goto bps_bad_type
+			if *1p != SYMBOL_LPAREN goto parse_typedecls_bad_type
 			prefix_end = p
 			suffix += 16
-			goto parse_type_loop
-	:parse_base_type
-	if *1prefix == TOKEN_IDENTIFIER goto base_type_typedef
-	if *1prefix == KEYWORD_STRUCT goto base_type_struct
-	if *1prefix == KEYWORD_UNION goto base_type_union
-	if *1prefix == KEYWORD_ENUM goto base_type_enum
-	if *1prefix == KEYWORD_FLOAT goto base_type_float
-	if *1prefix == KEYWORD_VOID goto base_type_void
+			goto type_declarators_loop
+	:type_declarators_loop_end
+	return 0
+	:parse_typedecls_bad_type
+		token_error(prefix, .str_bad_type)
+
+; writes to *(types + types_bytes_used), and updates types_bytes_used (no return value)
+function parse_base_type
+	argument base_type
+	local out
+	local flags
+	local p
+	local c
+	local depth
+	local expr
+	
+	out = types + types_bytes_used
+	
+	c = *1base_type
+	if c == TOKEN_IDENTIFIER goto base_type_typedef
+	if c == KEYWORD_STRUCT goto base_type_struct
+	if c == KEYWORD_UNION goto base_type_union
+	if c == KEYWORD_ENUM goto base_type_enum
+	if c == KEYWORD_FLOAT goto base_type_float
+	if c == KEYWORD_VOID goto base_type_void
 	
 	; "normal" type like int, unsigned char, etc.
-	local flags
 	; annoyingly, all of these are equivalent to `unsigned long`:
 	;    unsigned long int
 	;    long unsigned int
@@ -420,7 +413,7 @@ function parse_type_given_base_prefix_suffix
 #define PARSETYPE_FLAG_LONG 16
 #define PARSETYPE_FLAG_DOUBLE 32
 	flags = 0
-	p = prefix
+	p = base_type
 	:base_type_normal_loop
 		c = *1p
 		p += 16
@@ -467,6 +460,8 @@ function parse_type_given_base_prefix_suffix
 	if flags == 32 goto base_type_double ; `double`
 	if flags == 48 goto base_type_double ; `long double` (we use the same type for double and long double)
 	
+	goto bad_base_type
+	
 	:base_type_char
 		*1out = TYPE_CHAR
 		out += 1
@@ -505,10 +500,12 @@ function parse_type_given_base_prefix_suffix
 		goto base_type_done
 	
 	:base_type_done
-	return out
+	types_bytes_used = out - types
+	return 0
+	
 	:base_type_struct
 	:base_type_union
-		p = prefix + 16
+		p = base_type + 16
 		if *1p != TOKEN_IDENTIFIER goto base_type_struct_definition
 		p += 16
 		if *1p == SYMBOL_LBRACE goto base_type_struct_definition
@@ -527,7 +524,7 @@ function parse_type_given_base_prefix_suffix
 			out += 1
 			goto base_type_done
 		:base_type_struct_definition
-			if *1p != SYMBOL_LBRACE goto bps_bad_type
+			if *1p != SYMBOL_LBRACE goto bad_base_type
 			byte 0xcc ; @TODO
 	:base_type_enum
 		local q
@@ -536,9 +533,9 @@ function parse_type_given_base_prefix_suffix
 		out += 1
 		types_bytes_used = out - types
 		
-		p = prefix + 16
+		p = base_type + 16
 		if *1p == SYMBOL_LBRACE goto enum_definition
-		if *1p != TOKEN_IDENTIFIER goto bps_bad_type ; e.g. enum int x;
+		if *1p != TOKEN_IDENTIFIER goto bad_base_type ; e.g. enum int x;
 		p += 16
 		if *1p == SYMBOL_LBRACE goto enum_definition
 		goto base_type_done ; just using an enum type, not defining it.
@@ -571,7 +568,7 @@ function parse_type_given_base_prefix_suffix
 					if *1q == SYMBOL_COMMA goto enum_comma_loop_end
 					if *1q == SYMBOL_RBRACE goto enum_comma_loop_end
 					:enum_comma_deep
-					if *1q == TOKEN_EOF goto bps_bad_type
+					if *1q == TOKEN_EOF goto bad_base_type
 					c = *1q
 					q += 16
 					if c == SYMBOL_LPAREN goto enum_comma_incdepth
@@ -625,18 +622,22 @@ function parse_type_given_base_prefix_suffix
 		out += 1
 		goto base_type_done	
 	:base_type_typedef
-		p = prefix + 8
+		p = base_type + 8
 		c = ident_list_lookup(typedefs, *8p)
-		if c == 0 goto bps_bad_type
-		n = type_length(c)
+		if c == 0 goto bad_base_type
+		local len
+		len = type_length(c)
 		c += types
-		memcpy(out, c, n)
-		out += n
+		memcpy(out, c, len)
+		out += len
 		goto base_type_done
 	
-	:bps_bad_type
-		token_error(base_type, .str_bad_type)
-	
+	:bad_base_type
+		token_error(base_type, .str_bad_base_type)
+	:str_bad_base_type
+		string Bad base type.
+		byte 0
+
 ; how many bytes does it take to encode this type?
 function type_length
 	argument type
@@ -1258,7 +1259,11 @@ function type_sizeof
 	if c == TYPE_POINTER goto return_8
 	if c == TYPE_FUNCTION goto return_8
 	if c == TYPE_ARRAY goto sizeof_array
-	byte 0xcc ;  @TODO
+	fputs(2, .str_sizeof_ni) ;  @TODO
+	exit(1)
+	:str_sizeof_ni
+		string type_sizeof for this type not implemented.
+		byte 0
 	
 	:sizeof_array
 		local n
-- 
cgit v1.2.3