summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorpommicket <pommicket@gmail.com>2022-01-11 00:09:11 -0500
committerpommicket <pommicket@gmail.com>2022-01-11 00:09:11 -0500
commit97dca844c802c96ee36610b1febf7ac2b5365bf1 (patch)
tree01c578196c7fb755696b49b0435625683947b16a
parent34dfc3238d047f4e9d670ad7d489fcb2013f390e (diff)
start tokenization
-rw-r--r--05/constants.b294
-rw-r--r--05/main.b34
-rw-r--r--05/main.c21
-rw-r--r--05/preprocess.b41
-rw-r--r--05/tokenize.b487
-rw-r--r--05/util.b2
6 files changed, 813 insertions, 66 deletions
diff --git a/05/constants.b b/05/constants.b
index 9effeee..370f197 100644
--- a/05/constants.b
+++ b/05/constants.b
@@ -1,37 +1,115 @@
-; #define KEYWORD_AUTO 101 (auto only exists in C for legacy reasons and doesn't appear in TCC's source code)
-#define KEYWORD_DOUBLE 102
-#define KEYWORD_INT 103
-#define KEYWORD_STRUCT 104
-#define KEYWORD_BREAK 105
-#define KEYWORD_ELSE 106
-#define KEYWORD_LONG 107
-#define KEYWORD_SWITCH 108
-#define KEYWORD_CASE 109
-#define KEYWORD_ENUM 110
-#define KEYWORD_REGISTER 111
-#define KEYWORD_TYPEDEF 112
-#define KEYWORD_CHAR 113
-#define KEYWORD_EXTERN 114
-#define KEYWORD_RETURN 115
-#define KEYWORD_UNION 116
-; #define KEYWORD_CONST 117 (we can just #define const)
-#define KEYWORD_FLOAT 118
-#define KEYWORD_SHORT 119
-#define KEYWORD_UNSIGNED 120
-#define KEYWORD_CONTINUE 121
-#define KEYWORD_FOR 122
-; #define KEYWORD_SIGNED 123 (again, just #define signed)
-#define KEYWORD_VOID 124
-#define KEYWORD_DEFAULT 125
-#define KEYWORD_GOTO 126
-#define KEYWORD_SIZEOF 127
-; #define KEYWORD_VOLATILE 128 (just #define volatile if need be)
-#define KEYWORD_DO 129
-#define KEYWORD_IF 130
-#define KEYWORD_STATIC 131
-#define KEYWORD_WHILE 132
+; C OPERATOR PRECEDENCE
+; lowest
+; 1 ,
+; 2 = += -= *= /= %= <<= >>= &= ^= |=
+; 3 ? ... :
+; 4 ||
+; 5 &&
+; 6 |
+; 7 ^
+; 8 &
+; 9 == !=
+; a < > <= >=
+; b << >>
+; c + -
+; d * / %
+; e unary prefixes ++ -- & * + - ~ !
+; f . -> () [] postfix ++ --
+; highest
+; NB: for equal precedence, operators are applied left-to-right except for assignment operators (precedence 2)
-:str_missing_closing_bracket
+; pattern for binary operators is: 0x10px where p is precedence
+; NB: these four can also be unary: & * + -
+#define MASK_SYMBOL_PRECEDENCE 0x0ff0
+#define SYMBOL_COMMA 0x1010
+#define SYMBOL_EQ 0x1020
+#define SYMBOL_PLUS_EQ 0x1021
+#define SYMBOL_MINUS_EQ 0x1022
+#define SYMBOL_TIMES_EQ 0x1023
+#define SYMBOL_DIV_EQ 0x1024
+#define SYMBOL_PERCENT_EQ 0x1025
+#define SYMBOL_LSHIFT_EQ 0x1026
+#define SYMBOL_RSHIFT_EQ 0x1027
+#define SYMBOL_AND_EQ 0x1028
+#define SYMBOL_XOR_EQ 0x1029
+#define SYMBOL_OR_EQ 0x102a
+#define SYMBOL_QUESTION 0x1030
+#define SYMBOL_OR_OR 0x1040
+#define SYMBOL_AND_AND 0x1050
+#define SYMBOL_OR 0x1060
+#define SYMBOL_XOR 0x1070
+#define SYMBOL_AND 0x1080
+#define SYMBOL_EQ_EQ 0x1090
+#define SYMBOL_NOT_EQ 0x1091
+#define SYMBOL_LT 0x10a0
+#define SYMBOL_GT 0x10a1
+#define SYMBOL_LT_EQ 0x10a2
+#define SYMBOL_GT_EQ 0x10a3
+#define SYMBOL_LSHIFT 0x10b0
+#define SYMBOL_RSHIFT 0x10b1
+#define SYMBOL_PLUS 0x10c0
+#define SYMBOL_MINUS 0x10c1
+#define SYMBOL_TIMES 0x10d0
+#define SYMBOL_DIV 0x10d1
+#define SYMBOL_PERCENT 0x10d2
+
+#define SYMBOL_PLUS_PLUS 100
+#define SYMBOL_MINUS_MINUS 101
+#define SYMBOL_NOT 102
+#define SYMBOL_TILDE 103
+#define SYMBOL_ARROW 104
+#define SYMBOL_DOTDOTDOT 105
+#define SYMBOL_COLON 106
+#define SYMBOL_LBRACE 107
+#define SYMBOL_RBRACE 108
+#define SYMBOL_LSQUARE 109
+#define SYMBOL_RSQUARE 110
+#define SYMBOL_LPAREN 111
+#define SYMBOL_RPAREN 112
+#define SYMBOL_SEMICOLON 113
+
+
+#define TOKEN_IDENTIFIER 1
+#define TOKEN_CONSTANT_FLOAT 2
+#define TOKEN_CONSTANT_INT 3
+#define TOKEN_CONSTANT_CHAR 4
+#define TOKEN_STRING 5
+
+
+; #define KEYWORD_AUTO 21 (@NONSTANDARD auto only exists in C for legacy reasons and doesn't appear in TCC's source code)
+#define KEYWORD_DOUBLE 22
+#define KEYWORD_INT 23
+#define KEYWORD_STRUCT 24
+#define KEYWORD_BREAK 25
+#define KEYWORD_ELSE 26
+#define KEYWORD_LONG 27
+#define KEYWORD_SWITCH 28
+#define KEYWORD_CASE 29
+#define KEYWORD_ENUM 30
+; #define KEYWORD_REGISTER 31 (we can just #define register)
+#define KEYWORD_TYPEDEF 32
+#define KEYWORD_CHAR 33
+#define KEYWORD_EXTERN 34
+#define KEYWORD_RETURN 35
+#define KEYWORD_UNION 36
+; #define KEYWORD_CONST 37 (we can just #define const)
+#define KEYWORD_FLOAT 38
+#define KEYWORD_SHORT 39
+#define KEYWORD_UNSIGNED 40
+#define KEYWORD_CONTINUE 41
+#define KEYWORD_FOR 42
+; #define KEYWORD_SIGNED 43 (again, just #define signed)
+#define KEYWORD_VOID 44
+#define KEYWORD_DEFAULT 45
+#define KEYWORD_GOTO 46
+#define KEYWORD_SIZEOF 47
+; #define KEYWORD_VOLATILE 48 (just #define volatile if need be)
+#define KEYWORD_DO 49
+#define KEYWORD_IF 50
+#define KEYWORD_STATIC 51
+#define KEYWORD_WHILE 52
+
+:str_missing_closing_paren
string Missing closing ).
byte 0
:str_comment_start
@@ -76,7 +154,7 @@
:str_div_eq
string /=
byte 0
-:str_remainder_eq
+:str_percent_eq
string %=
byte 0
:str_and_eq
@@ -109,6 +187,78 @@
:str_hash_hash
string ##
byte 0
+:str_eq
+ string =
+ byte 0
+:str_not
+ string !
+ byte 0
+:str_tilde
+ string ~
+ byte 0
+:str_lt
+ string <
+ byte 0
+:str_gt
+ string >
+ byte 0
+:str_and
+ string &
+ byte 0
+:str_or
+ string |
+ byte 0
+:str_xor
+ string ^
+ byte 0
+:str_plus
+ string +
+ byte 0
+:str_minus
+ string -
+ byte 0
+:str_times
+ string *
+ byte 0
+:str_div
+ string /
+ byte 0
+:str_percent
+ string %
+ byte 0
+:str_question
+ string ?
+ byte 0
+:str_comma
+ string ,
+ byte 0
+:str_colon
+ string :
+ byte 0
+:str_semicolon
+ byte 59
+ byte 0
+:str_dot
+ string .
+ byte 0
+:str_lparen
+ string (
+ byte 0
+:str_rparen
+ string )
+ byte 0
+:str_lsquare
+ string [
+ byte 0
+:str_rsquare
+ string ]
+ byte 0
+:str_lbrace
+ string {
+ byte 0
+:str_rbrace
+ string }
+ byte 0
:str_error
string error
byte 0
@@ -145,6 +295,82 @@
:str_endif
string endif
byte 0
+:str_double
+ string double
+ byte 0
+:str_int
+ string int
+ byte 0
+:str_struct
+ string struct
+ byte 0
+:str_break
+ string break
+ byte 0
+:str_long
+ string long
+ byte 0
+:str_switch
+ string switch
+ byte 0
+:str_case
+ string case
+ byte 0
+:str_enum
+ string enum
+ byte 0
+:str_typedef
+ string typedef
+ byte 0
+:str_char
+ string char
+ byte 0
+:str_extern
+ string extern
+ byte 0
+:str_return
+ string return
+ byte 0
+:str_union
+ string union
+ byte 0
+:str_float
+ string float
+ byte 0
+:str_short
+ string short
+ byte 0
+:str_unsigned
+ string unsigned
+ byte 0
+:str_continue
+ string continue
+ byte 0
+:str_for
+ string for
+ byte 0
+:str_void
+ string void
+ byte 0
+:str_default
+ string default
+ byte 0
+:str_goto
+ string goto
+ byte 0
+:str_sizeof
+ string sizeof
+ byte 0
+:str_do
+ string do
+ byte 0
+:str_static
+ string static
+ byte 0
+:str_while
+ string while
+ byte 0
+
:str___FILE__
string __FILE__
byte 0
diff --git a/05/main.b b/05/main.b
index a42dcc7..8ca9ab5 100644
--- a/05/main.b
+++ b/05/main.b
@@ -15,11 +15,24 @@ global function_macros_size
global object_macros
global function_macros
+; accepts EITHER file index OR pointer to filename
+function fprint_filename
+ argument fd
+ argument file
+ if file ] 65535 goto print_filename_string
+ file = file_get(file)
+ fputs(2, file)
+ ; (fallthrough)
+ :print_filename_string
+ fputs(2, file)
+ return
+
+; accepts EITHER file index OR pointer to filename
function compile_error
argument file
argument line
argument message
- fputs(2, file)
+ fprint_filename(2, file)
fputc(2, ':)
fputn(2, line)
fputs(2, .str_error_prefix)
@@ -27,11 +40,12 @@ function compile_error
fputc(2, 10)
exit(1)
+; accepts EITHER file index OR pointer to filename
function compile_warning
argument file
argument line
argument message
- fputs(2, file)
+ fprint_filename(2, file)
fputc(2, ':)
fputn(2, line)
fputs(2, .str_warning_prefix)
@@ -52,6 +66,7 @@ function compile_warning
#include util.b
#include constants.b
#include preprocess.b
+#include tokenize.b
function main
argument argv2
@@ -62,10 +77,13 @@ function main
local output_filename
local pptokens
local processed_pptokens
+ local tokens
dat_banned_objmacros = 255
dat_banned_fmacros = 255
+ file_list = malloc(40000)
+ *1file_list = 255
object_macros = malloc(4000000)
function_macros = malloc(4000000)
@@ -77,15 +95,19 @@ function main
output_filename = argv2
:have_filenames
pptokens = split_into_preprocessing_tokens(input_filename)
- print_pptokens(pptokens)
- print_separator()
+ ;print_pptokens(pptokens)
+ ;print_separator()
processed_pptokens = malloc(16000000)
translation_phase_4(input_filename, pptokens, processed_pptokens)
free(pptokens)
pptokens = processed_pptokens
print_pptokens(pptokens)
- print_object_macros()
- print_function_macros()
+ print_separator()
+ ;print_object_macros()
+ ;print_function_macros()
+ tokens = malloc(16000000)
+ tokenize(pptokens, tokens)
+ print_tokens(tokens)
exit(0)
:usage_error
diff --git a/05/main.c b/05/main.c
index f0094c1..8502974 100644
--- a/05/main.c
+++ b/05/main.c
@@ -1,21 +1,2 @@
-#define F(x) x x
-F(2
-3)
-
-#define STRINGIFY(x) #x
-#define LINE_NUMBER 1982
-#define INC_FILE STRINGIFY(macro_test.c)
-
-#include INC_FILE /* include macro test */
-
-a
-#ifndef INC_FILEd
-
-xglue(LINE_,NUMBER)
-#else
-Hello
-#endif
-b
-
-#pragma
+sizeof(int)
diff --git a/05/preprocess.b b/05/preprocess.b
index 00c5f1a..2379fec 100644
--- a/05/preprocess.b
+++ b/05/preprocess.b
@@ -129,7 +129,7 @@ function split_into_preprocessing_tokens
if b != 0 goto pptoken_2_chars
b = str_startswith(in, .str_div_eq)
if b != 0 goto pptoken_2_chars
- b = str_startswith(in, .str_remainder_eq)
+ b = str_startswith(in, .str_percent_eq)
if b != 0 goto pptoken_2_chars
b = str_startswith(in, .str_and_eq)
if b != 0 goto pptoken_2_chars
@@ -533,6 +533,10 @@ function translation_phase_4
if b != 0 goto pp_directive_include
b = str_equals(in, .str_ifdef)
if b != 0 goto pp_directive_ifdef
+ b = str_equals(in, .str_if)
+ if b != 0 goto pp_directive_if
+ b = str_equals(in, .str_elif)
+ if b != 0 goto pp_directive_else ; treat elif the same as else at this point
b = str_equals(in, .str_ifndef)
if b != 0 goto pp_directive_ifndef
b = str_equals(in, .str_else)
@@ -822,13 +826,29 @@ function translation_phase_4
goto process_pptoken
:pp_directive_else
; assume we got here from an if, so skip this
- pptoken_skip(&in)
+ pptoken_skip_to_newline(&in)
preprocessor_skip_if(filename, &line_number, &in, &out)
goto process_pptoken
:pp_directive_endif
; assume we got here from an if/elif/else, just ignore it.
pptoken_skip(&in)
goto process_pptoken
+ :pp_directive_if
+ local if_pptokens
+ pptoken_skip(&in)
+ pptoken_skip_spaces(&in)
+
+ if_pptokens = malloc(4000)
+ p = if_pptokens
+ macro_replacement_to_terminator(filename, line_number, &in, &p, 10)
+ ;@TODO: there's no point in doing this until we have parsing
+ ; we'll have to evaluate constant expressions anyways for array declarations
+ fputs(2, .str_if_not_implemented)
+ byte 0xcc
+ :str_if_not_implemented
+ string #if not implemented.
+ byte 10
+ byte 0
:unrecognized_directive
compile_error(filename, line_number, .str_unrecognized_directive)
:str_unrecognized_directive
@@ -843,7 +863,7 @@ function translation_phase_4
string Macro redefinition.
byte 0
:phase4_missing_closing_bracket
- compile_error(filename, line_number, .str_missing_closing_bracket)
+ compile_error(filename, line_number, .str_missing_closing_paren)
:bad_macro_params
compile_error(filename, line_number, .str_bad_macro_params)
:str_bad_macro_params
@@ -876,7 +896,7 @@ function translation_phase_4
; skip body of #if / #elif / #else. This will advance *p_in to:
-; - the next unmatched #elif
+; - right before the next unmatched #elif, replacing it with a #if
; OR - right after the next #else
; OR - right after the next #endif
; whichever comes first
@@ -932,7 +952,16 @@ function preprocessor_skip_if
goto preprocessor_skip_if_loop ; some unimportant directive
:skip_if_elif
if if_depth > 0 goto preprocessor_skip_if_loop
- in -= 2 ; return to #
+ ; replace #elif with #if (kinda sketchy)
+ *1in = '#
+ in += 1
+ *1in = 0
+ in += 1
+ *1in = 'i
+ in += 1
+ *1in = 'f
+ in -= 5
+ *1in = 10 ; we need a newline so the #elif actually gets handled
goto preprocessor_skip_if_loop_end
:skip_if_inc_depth
if_depth += 1
@@ -1363,7 +1392,7 @@ function fmacro_arg_end
return in
:fmacro_missing_closing_bracket
- compile_error(filename, *8p_line_number, .str_missing_closing_bracket)
+ compile_error(filename, *8p_line_number, .str_missing_closing_paren)
function print_object_macros
print_macros(object_macros)
diff --git a/05/tokenize.b b/05/tokenize.b
new file mode 100644
index 0000000..6b8d7ff
--- /dev/null
+++ b/05/tokenize.b
@@ -0,0 +1,487 @@
+global file_list ; initialized in main -- null-separated 255-terminated array of strings
+
+; get the name of the file with the given index
+function file_get
+ argument idx
+ local p
+ p = file_list
+ :file_get_loop
+ if idx == 0 goto file_got
+ if *1p == 255 goto file_uhoh
+ idx -= 1
+ p = memchr(p, 0)
+ p += 1
+ goto file_get_loop
+ :file_got
+ return p
+ :file_uhoh
+ fputs(2, .str_bad_file_index)
+ exit(1)
+ :str_bad_file_index
+ string Bad file index. This shouldn't happen.
+ byte 10
+ byte 0
+
+; get the index of the given file, returns -1 if file does not exist
+function file_get_index
+ argument filename
+ local p
+ local b
+ local i
+ p = file_list
+ i = 0
+ :file_get_index_loop
+ if *1p == 255 goto return_minus1
+ b = str_equals(p, filename)
+ if b != 0 goto file_found
+ i += 1
+ p = memchr(p, 0)
+ p += 1
+ goto file_get_index_loop
+ :file_found
+ return i
+
+; add to list of files if not already there
+function file_add
+ argument filename
+ local p
+ p = file_get_index(filename)
+ if p != -1 goto return_0
+ p = memchr(file_list, 255)
+ p = strcpy(p, filename)
+ p += 1
+ *1p = 255
+ return
+
+; turn pptokens into tokens, written to out.
+; tokens are 16 bytes and have the following format:
+; ushort type
+; ushort file
+; uint line
+; ulong data
+function tokenize
+ argument pptokens
+ argument out
+ local in
+ local file
+ local line_number
+ local b
+ in = pptokens
+ :tokenize_loop
+ if *1in == '$ goto tokenize_line_directive
+ if *1in == 32 goto tokenize_skip_pptoken
+ if *1in == 10 goto tokenize_newline
+ if *1in == 0 goto tokenize_loop_end
+
+ b = str_equals(in, .str_comma)
+ if b != 0 goto keyword_comma
+ b = str_equals(in, .str_eq)
+ if b != 0 goto keyword_eq
+ b = str_equals(in, .str_plus_eq)
+ if b != 0 goto keyword_plus_eq
+ b = str_equals(in, .str_minus_eq)
+ if b != 0 goto keyword_minus_eq
+ b = str_equals(in, .str_times_eq)
+ if b != 0 goto keyword_times_eq
+ b = str_equals(in, .str_div_eq)
+ if b != 0 goto keyword_div_eq
+ b = str_equals(in, .str_percent_eq)
+ if b != 0 goto keyword_percent_eq
+ b = str_equals(in, .str_lshift_eq)
+ if b != 0 goto keyword_rshift_eq
+ b = str_equals(in, .str_and_eq)
+ if b != 0 goto keyword_and_eq
+ b = str_equals(in, .str_or_eq)
+ if b != 0 goto keyword_or_eq
+ b = str_equals(in, .str_question)
+ if b != 0 goto keyword_question
+ b = str_equals(in, .str_or_or)
+ if b != 0 goto keyword_or_or
+ b = str_equals(in, .str_and_and)
+ if b != 0 goto keyword_and_and
+ b = str_equals(in, .str_or)
+ if b != 0 goto keyword_or
+ b = str_equals(in, .str_xor)
+ if b != 0 goto keyword_xor
+ b = str_equals(in, .str_and)
+ if b != 0 goto keyword_and
+ b = str_equals(in, .str_eq_eq)
+ if b != 0 goto keyword_eq_eq
+ b = str_equals(in, .str_not_eq)
+ if b != 0 goto keyword_not_eq
+ b = str_equals(in, .str_lt)
+ if b != 0 goto keyword_lt
+ b = str_equals(in, .str_gt)
+ if b != 0 goto keyword_gt
+ b = str_equals(in, .str_lt_eq)
+ if b != 0 goto keyword_lt_eq
+ b = str_equals(in, .str_gt_eq)
+ if b != 0 goto keyword_gt_eq
+ b = str_equals(in, .str_lshift)
+ if b != 0 goto keyword_lshift
+ b = str_equals(in, .str_rshift)
+ if b != 0 goto keyword_rshift
+ b = str_equals(in, .str_plus)
+ if b != 0 goto keyword_plus
+ b = str_equals(in, .str_minus)
+ if b != 0 goto keyword_minus
+ b = str_equals(in, .str_times)
+ if b != 0 goto keyword_times
+ b = str_equals(in, .str_div)
+ if b != 0 goto keyword_div
+ b = str_equals(in, .str_percent)
+ if b != 0 goto keyword_percent
+ b = str_equals(in, .str_plus_plus)
+ if b != 0 goto keyword_plus_plus
+ b = str_equals(in, .str_minus_minus)
+ if b != 0 goto keyword_minus_minus
+ b = str_equals(in, .str_not)
+ if b != 0 goto keyword_not
+ b = str_equals(in, .str_tilde)
+ if b != 0 goto keyword_tilde
+ b = str_equals(in, .str_arrow)
+ if b != 0 goto keyword_arrow
+ b = str_equals(in, .str_dotdotdot)
+ if b != 0 goto keyword_dotdotdot
+ b = str_equals(in, .str_colon)
+ if b != 0 goto keyword_colon
+ b = str_equals(in, .str_lbrace)
+ if b != 0 goto keyword_lbrace
+ b = str_equals(in, .str_rbrace)
+ if b != 0 goto keyword_rbrace
+ b = str_equals(in, .str_lsquare)
+ if b != 0 goto keyword_lsquare
+ b = str_equals(in, .str_rsquare)
+ if b != 0 goto keyword_rsquare
+ b = str_equals(in, .str_lparen)
+ if b != 0 goto keyword_lparen
+ b = str_equals(in, .str_rparen)
+ if b != 0 goto keyword_rparen
+ b = str_equals(in, .str_semicolon)
+ if b != 0 goto keyword_semicolon
+ b = str_equals(in, .str_double)
+ if b != 0 goto keyword_double
+ b = str_equals(in, .str_int)
+ if b != 0 goto keyword_int
+ b = str_equals(in, .str_struct)
+ if b != 0 goto keyword_struct
+ b = str_equals(in, .str_break)
+ if b != 0 goto keyword_break
+ b = str_equals(in, .str_else)
+ if b != 0 goto keyword_else
+ b = str_equals(in, .str_long)
+ if b != 0 goto keyword_long
+ b = str_equals(in, .str_switch)
+ if b != 0 goto keyword_switch
+ b = str_equals(in, .str_case)
+ if b != 0 goto keyword_case
+ b = str_equals(in, .str_enum)
+ if b != 0 goto keyword_enum
+ b = str_equals(in, .str_typedef)
+ if b != 0 goto keyword_typedef
+ b = str_equals(in, .str_char)
+ if b != 0 goto keyword_char
+ b = str_equals(in, .str_extern)
+ if b != 0 goto keyword_extern
+ b = str_equals(in, .str_return)
+ if b != 0 goto keyword_return
+ b = str_equals(in, .str_union)
+ if b != 0 goto keyword_union
+ b = str_equals(in, .str_float)
+ if b != 0 goto keyword_float
+ b = str_equals(in, .str_short)
+ if b != 0 goto keyword_short
+ b = str_equals(in, .str_unsigned)
+ if b != 0 goto keyword_unsigned
+ b = str_equals(in, .str_continue)
+ if b != 0 goto keyword_continue
+ b = str_equals(in, .str_for)
+ if b != 0 goto keyword_for
+ b = str_equals(in, .str_void)
+ if b != 0 goto keyword_void
+ b = str_equals(in, .str_default)
+ if b != 0 goto keyword_default
+ b = str_equals(in, .str_goto)
+ if b != 0 goto keyword_goto
+ b = str_equals(in, .str_sizeof)
+ if b != 0 goto keyword_sizeof
+ b = str_equals(in, .str_do)
+ if b != 0 goto keyword_do
+ b = str_equals(in, .str_if)
+ if b != 0 goto keyword_if
+ b = str_equals(in, .str_static)
+ if b != 0 goto keyword_static
+ b = str_equals(in, .str_while)
+ if b != 0 goto keyword_while
+
+ byte 0xcc
+
+ :tokenize_newline
+ line_number += 1
+ pptoken_skip(&in)
+ goto tokenize_loop
+ :tokenize_skip_pptoken
+ pptoken_skip(&in)
+ goto tokenize_loop
+ :tokenize_line_directive
+ in += 1
+ line_number = stoi(in)
+ in = memchr(in, 32)
+ in += 1
+ file_add(in)
+ file = file_get_index(in)
+ pptoken_skip(&in)
+ goto tokenize_loop
+ :tokenize_keyword
+ *2out = b ; type
+ out += 2
+ *2out = file
+ out += 2
+ *4out = line_number
+ out += 4
+ ; no data
+ out += 8
+ pptoken_skip(&in)
+ goto tokenize_loop
+ :keyword_comma
+ b = SYMBOL_COMMA
+ goto tokenize_keyword
+ :keyword_eq
+ b = SYMBOL_EQ
+ goto tokenize_keyword
+ :keyword_plus_eq
+ b = SYMBOL_PLUS_EQ
+ goto tokenize_keyword
+ :keyword_minus_eq
+ b = SYMBOL_MINUS_EQ
+ goto tokenize_keyword
+ :keyword_times_eq
+ b = SYMBOL_TIMES_EQ
+ goto tokenize_keyword
+ :keyword_div_eq
+ b = SYMBOL_DIV_EQ
+ goto tokenize_keyword
+ :keyword_percent_eq
+ b = SYMBOL_PERCENT_EQ
+ goto tokenize_keyword
+ :keyword_lshift_eq
+ b = SYMBOL_LSHIFT_EQ
+ goto tokenize_keyword
+ :keyword_rshift_eq
+ b = SYMBOL_RSHIFT_EQ
+ goto tokenize_keyword
+ :keyword_and_eq
+ b = SYMBOL_AND_EQ
+ goto tokenize_keyword
+ :keyword_xor_eq
+ b = SYMBOL_XOR_EQ
+ goto tokenize_keyword
+ :keyword_or_eq
+ b = SYMBOL_OR_EQ
+ goto tokenize_keyword
+ :keyword_question
+ b = SYMBOL_QUESTION
+ goto tokenize_keyword
+ :keyword_or_or
+ b = SYMBOL_OR_OR
+ goto tokenize_keyword
+ :keyword_and_and
+ b = SYMBOL_AND_AND
+ goto tokenize_keyword
+ :keyword_or
+ b = SYMBOL_OR
+ goto tokenize_keyword
+ :keyword_xor
+ b = SYMBOL_XOR
+ goto tokenize_keyword
+ :keyword_and
+ b = SYMBOL_AND
+ goto tokenize_keyword
+ :keyword_eq_eq
+ b = SYMBOL_EQ_EQ
+ goto tokenize_keyword
+ :keyword_not_eq
+ b = SYMBOL_NOT_EQ
+ goto tokenize_keyword
+ :keyword_lt
+ b = SYMBOL_LT
+ goto tokenize_keyword
+ :keyword_gt
+ b = SYMBOL_GT
+ goto tokenize_keyword
+ :keyword_lt_eq
+ b = SYMBOL_LT_EQ
+ goto tokenize_keyword
+ :keyword_gt_eq
+ b = SYMBOL_GT_EQ
+ goto tokenize_keyword
+ :keyword_lshift
+ b = SYMBOL_LSHIFT
+ goto tokenize_keyword
+ :keyword_rshift
+ b = SYMBOL_RSHIFT
+ goto tokenize_keyword
+ :keyword_plus
+ b = SYMBOL_PLUS
+ goto tokenize_keyword
+ :keyword_minus
+ b = SYMBOL_MINUS
+ goto tokenize_keyword
+ :keyword_times
+ b = SYMBOL_TIMES
+ goto tokenize_keyword
+ :keyword_div
+ b = SYMBOL_DIV
+ goto tokenize_keyword
+ :keyword_percent
+ b = SYMBOL_PERCENT
+ goto tokenize_keyword
+ :keyword_plus_plus
+ b = SYMBOL_PLUS_PLUS
+ goto tokenize_keyword
+ :keyword_minus_minus
+ b = SYMBOL_MINUS_MINUS
+ goto tokenize_keyword
+ :keyword_not
+ b = SYMBOL_NOT
+ goto tokenize_keyword
+ :keyword_tilde
+ b = SYMBOL_TILDE
+ goto tokenize_keyword
+ :keyword_arrow
+ b = SYMBOL_ARROW
+ goto tokenize_keyword
+ :keyword_dotdotdot
+ b = SYMBOL_DOTDOTDOT
+ goto tokenize_keyword
+ :keyword_colon
+ b = SYMBOL_COLON
+ goto tokenize_keyword
+ :keyword_lbrace
+ b = SYMBOL_LBRACE
+ goto tokenize_keyword
+ :keyword_rbrace
+ b = SYMBOL_RBRACE
+ goto tokenize_keyword
+ :keyword_lsquare
+ b = SYMBOL_LSQUARE
+ goto tokenize_keyword
+ :keyword_rsquare
+ b = SYMBOL_RSQUARE
+ goto tokenize_keyword
+ :keyword_lparen
+ b = SYMBOL_LPAREN
+ goto tokenize_keyword
+ :keyword_rparen
+ b = SYMBOL_RPAREN
+ goto tokenize_keyword
+ :keyword_semicolon
+ b = SYMBOL_SEMICOLON
+ goto tokenize_keyword
+ :keyword_double
+ b = KEYWORD_DOUBLE
+ goto tokenize_keyword
+ :keyword_int
+ b = KEYWORD_INT
+ goto tokenize_keyword
+ :keyword_struct
+ b = KEYWORD_STRUCT
+ goto tokenize_keyword
+ :keyword_break
+ b = KEYWORD_BREAK
+ goto tokenize_keyword
+ :keyword_else
+ b = KEYWORD_ELSE
+ goto tokenize_keyword
+ :keyword_long
+ b = KEYWORD_LONG
+ goto tokenize_keyword
+ :keyword_switch
+ b = KEYWORD_SWITCH
+ goto tokenize_keyword
+ :keyword_case
+ b = KEYWORD_CASE
+ goto tokenize_keyword
+ :keyword_enum
+ b = KEYWORD_ENUM
+ goto tokenize_keyword
+ :keyword_typedef
+ b = KEYWORD_TYPEDEF
+ goto tokenize_keyword
+ :keyword_char
+ b = KEYWORD_CHAR
+ goto tokenize_keyword
+ :keyword_extern
+ b = KEYWORD_EXTERN
+ goto tokenize_keyword
+ :keyword_return
+ b = KEYWORD_RETURN
+ goto tokenize_keyword
+ :keyword_union
+ b = KEYWORD_UNION
+ goto tokenize_keyword
+ :keyword_float
+ b = KEYWORD_FLOAT
+ goto tokenize_keyword
+ :keyword_short
+ b = KEYWORD_SHORT
+ goto tokenize_keyword
+ :keyword_unsigned
+ b = KEYWORD_UNSIGNED
+ goto tokenize_keyword
+ :keyword_continue
+ b = KEYWORD_CONTINUE
+ goto tokenize_keyword
+ :keyword_for
+ b = KEYWORD_FOR
+ goto tokenize_keyword
+ :keyword_void
+ b = KEYWORD_VOID
+ goto tokenize_keyword
+ :keyword_default
+ b = KEYWORD_DEFAULT
+ goto tokenize_keyword
+ :keyword_goto
+ b = KEYWORD_GOTO
+ goto tokenize_keyword
+ :keyword_sizeof
+ b = KEYWORD_SIZEOF
+ goto tokenize_keyword
+ :keyword_do
+ b = KEYWORD_DO
+ goto tokenize_keyword
+ :keyword_if
+ b = KEYWORD_IF
+ goto tokenize_keyword
+ :keyword_static
+ b = KEYWORD_STATIC
+ goto tokenize_keyword
+ :keyword_while
+ b = KEYWORD_WHILE
+ goto tokenize_keyword
+ :tokenize_loop_end
+
+ return 0
+
+function print_tokens
+ argument tokens
+ local p
+ p = tokens
+ :print_tokens_loop
+ if *2p == 0 goto print_tokens_loop_end
+ putn(*2p)
+ p += 2
+ putc(':)
+ putn(*2p)
+ p += 2
+ putc(':)
+ putn(*4p)
+ p += 4
+ putc(':)
+ putn(*8p)
+ p += 8
+ putc(32)
+ goto print_tokens_loop
+ :print_tokens_loop_end
+ putc(10)
+ return
diff --git a/05/util.b b/05/util.b
index 495e890..119d2d1 100644
--- a/05/util.b
+++ b/05/util.b
@@ -370,6 +370,8 @@ function exit
return 0
:return_1
return 1
+:return_minus1
+ return -1
function syscall
; I've done some testing, and this should be okay even if