diff options
Diffstat (limited to '05/tokenize.b')
-rw-r--r-- | 05/tokenize.b | 419 |
1 files changed, 60 insertions, 359 deletions
diff --git a/05/tokenize.b b/05/tokenize.b index 6b8d7ff..3a0d37d 100644 --- a/05/tokenize.b +++ b/05/tokenize.b @@ -53,6 +53,49 @@ function file_add *1p = 255 return +; return keyword ID associated with str, or 0 if it's not a keyword +function get_keyword_id + argument keyword_str + local p + local c + local b + p = .keyword_table + :keyword_id_loop + c = *1p + if c == 255 goto no_such_keyword_str + p += 1 + b = str_equals(keyword_str, p) + if b != 0 goto got_keyword_id + p = memchr(p, 0) + p += 1 + goto keyword_id_loop + :no_such_keyword_str + return 0 + :got_keyword_id + return c + +; get string associated with keyword id, or "@BAD_KEYWORD_ID" if it's not a keyword +function get_keyword_str + argument keyword_id + local p + local c + local b + p = .keyword_table + :keyword_str_loop + c = *1p + if c == 255 goto no_such_keyword_id + if c == keyword_id goto found_keyword_id + p = memchr(p, 0) + p += 1 + goto keyword_str_loop + :found_keyword_id + return p + 1 + :no_such_keyword_id + return .str_no_such_keyword_id + :str_no_such_keyword_id + string @BAD_KEYWORD_ID + byte 0 + ; turn pptokens into tokens, written to out. ; tokens are 16 bytes and have the following format: ; ushort type @@ -73,146 +116,8 @@ function tokenize if *1in == 10 goto tokenize_newline if *1in == 0 goto tokenize_loop_end - b = str_equals(in, .str_comma) - if b != 0 goto keyword_comma - b = str_equals(in, .str_eq) - if b != 0 goto keyword_eq - b = str_equals(in, .str_plus_eq) - if b != 0 goto keyword_plus_eq - b = str_equals(in, .str_minus_eq) - if b != 0 goto keyword_minus_eq - b = str_equals(in, .str_times_eq) - if b != 0 goto keyword_times_eq - b = str_equals(in, .str_div_eq) - if b != 0 goto keyword_div_eq - b = str_equals(in, .str_percent_eq) - if b != 0 goto keyword_percent_eq - b = str_equals(in, .str_lshift_eq) - if b != 0 goto keyword_rshift_eq - b = str_equals(in, .str_and_eq) - if b != 0 goto keyword_and_eq - b = str_equals(in, .str_or_eq) - if b != 0 goto keyword_or_eq - b = str_equals(in, .str_question) - if b != 0 goto keyword_question - b = str_equals(in, .str_or_or) - if b != 0 goto keyword_or_or - b = str_equals(in, .str_and_and) - if b != 0 goto keyword_and_and - b = str_equals(in, .str_or) - if b != 0 goto keyword_or - b = str_equals(in, .str_xor) - if b != 0 goto keyword_xor - b = str_equals(in, .str_and) - if b != 0 goto keyword_and - b = str_equals(in, .str_eq_eq) - if b != 0 goto keyword_eq_eq - b = str_equals(in, .str_not_eq) - if b != 0 goto keyword_not_eq - b = str_equals(in, .str_lt) - if b != 0 goto keyword_lt - b = str_equals(in, .str_gt) - if b != 0 goto keyword_gt - b = str_equals(in, .str_lt_eq) - if b != 0 goto keyword_lt_eq - b = str_equals(in, .str_gt_eq) - if b != 0 goto keyword_gt_eq - b = str_equals(in, .str_lshift) - if b != 0 goto keyword_lshift - b = str_equals(in, .str_rshift) - if b != 0 goto keyword_rshift - b = str_equals(in, .str_plus) - if b != 0 goto keyword_plus - b = str_equals(in, .str_minus) - if b != 0 goto keyword_minus - b = str_equals(in, .str_times) - if b != 0 goto keyword_times - b = str_equals(in, .str_div) - if b != 0 goto keyword_div - b = str_equals(in, .str_percent) - if b != 0 goto keyword_percent - b = str_equals(in, .str_plus_plus) - if b != 0 goto keyword_plus_plus - b = str_equals(in, .str_minus_minus) - if b != 0 goto keyword_minus_minus - b = str_equals(in, .str_not) - if b != 0 goto keyword_not - b = str_equals(in, .str_tilde) - if b != 0 goto keyword_tilde - b = str_equals(in, .str_arrow) - if b != 0 goto keyword_arrow - b = str_equals(in, .str_dotdotdot) - if b != 0 goto keyword_dotdotdot - b = str_equals(in, .str_colon) - if b != 0 goto keyword_colon - b = str_equals(in, .str_lbrace) - if b != 0 goto keyword_lbrace - b = str_equals(in, .str_rbrace) - if b != 0 goto keyword_rbrace - b = str_equals(in, .str_lsquare) - if b != 0 goto keyword_lsquare - b = str_equals(in, .str_rsquare) - if b != 0 goto keyword_rsquare - b = str_equals(in, .str_lparen) - if b != 0 goto keyword_lparen - b = str_equals(in, .str_rparen) - if b != 0 goto keyword_rparen - b = str_equals(in, .str_semicolon) - if b != 0 goto keyword_semicolon - b = str_equals(in, .str_double) - if b != 0 goto keyword_double - b = str_equals(in, .str_int) - if b != 0 goto keyword_int - b = str_equals(in, .str_struct) - if b != 0 goto keyword_struct - b = str_equals(in, .str_break) - if b != 0 goto keyword_break - b = str_equals(in, .str_else) - if b != 0 goto keyword_else - b = str_equals(in, .str_long) - if b != 0 goto keyword_long - b = str_equals(in, .str_switch) - if b != 0 goto keyword_switch - b = str_equals(in, .str_case) - if b != 0 goto keyword_case - b = str_equals(in, .str_enum) - if b != 0 goto keyword_enum - b = str_equals(in, .str_typedef) - if b != 0 goto keyword_typedef - b = str_equals(in, .str_char) - if b != 0 goto keyword_char - b = str_equals(in, .str_extern) - if b != 0 goto keyword_extern - b = str_equals(in, .str_return) - if b != 0 goto keyword_return - b = str_equals(in, .str_union) - if b != 0 goto keyword_union - b = str_equals(in, .str_float) - if b != 0 goto keyword_float - b = str_equals(in, .str_short) - if b != 0 goto keyword_short - b = str_equals(in, .str_unsigned) - if b != 0 goto keyword_unsigned - b = str_equals(in, .str_continue) - if b != 0 goto keyword_continue - b = str_equals(in, .str_for) - if b != 0 goto keyword_for - b = str_equals(in, .str_void) - if b != 0 goto keyword_void - b = str_equals(in, .str_default) - if b != 0 goto keyword_default - b = str_equals(in, .str_goto) - if b != 0 goto keyword_goto - b = str_equals(in, .str_sizeof) - if b != 0 goto keyword_sizeof - b = str_equals(in, .str_do) - if b != 0 goto keyword_do - b = str_equals(in, .str_if) - if b != 0 goto keyword_if - b = str_equals(in, .str_static) - if b != 0 goto keyword_static - b = str_equals(in, .str_while) - if b != 0 goto keyword_while + b = get_keyword_id(in) + if b != 0 goto tokenize_keyword byte 0xcc @@ -243,222 +148,6 @@ function tokenize out += 8 pptoken_skip(&in) goto tokenize_loop - :keyword_comma - b = SYMBOL_COMMA - goto tokenize_keyword - :keyword_eq - b = SYMBOL_EQ - goto tokenize_keyword - :keyword_plus_eq - b = SYMBOL_PLUS_EQ - goto tokenize_keyword - :keyword_minus_eq - b = SYMBOL_MINUS_EQ - goto tokenize_keyword - :keyword_times_eq - b = SYMBOL_TIMES_EQ - goto tokenize_keyword - :keyword_div_eq - b = SYMBOL_DIV_EQ - goto tokenize_keyword - :keyword_percent_eq - b = SYMBOL_PERCENT_EQ - goto tokenize_keyword - :keyword_lshift_eq - b = SYMBOL_LSHIFT_EQ - goto tokenize_keyword - :keyword_rshift_eq - b = SYMBOL_RSHIFT_EQ - goto tokenize_keyword - :keyword_and_eq - b = SYMBOL_AND_EQ - goto tokenize_keyword - :keyword_xor_eq - b = SYMBOL_XOR_EQ - goto tokenize_keyword - :keyword_or_eq - b = SYMBOL_OR_EQ - goto tokenize_keyword - :keyword_question - b = SYMBOL_QUESTION - goto tokenize_keyword - :keyword_or_or - b = SYMBOL_OR_OR - goto tokenize_keyword - :keyword_and_and - b = SYMBOL_AND_AND - goto tokenize_keyword - :keyword_or - b = SYMBOL_OR - goto tokenize_keyword - :keyword_xor - b = SYMBOL_XOR - goto tokenize_keyword - :keyword_and - b = SYMBOL_AND - goto tokenize_keyword - :keyword_eq_eq - b = SYMBOL_EQ_EQ - goto tokenize_keyword - :keyword_not_eq - b = SYMBOL_NOT_EQ - goto tokenize_keyword - :keyword_lt - b = SYMBOL_LT - goto tokenize_keyword - :keyword_gt - b = SYMBOL_GT - goto tokenize_keyword - :keyword_lt_eq - b = SYMBOL_LT_EQ - goto tokenize_keyword - :keyword_gt_eq - b = SYMBOL_GT_EQ - goto tokenize_keyword - :keyword_lshift - b = SYMBOL_LSHIFT - goto tokenize_keyword - :keyword_rshift - b = SYMBOL_RSHIFT - goto tokenize_keyword - :keyword_plus - b = SYMBOL_PLUS - goto tokenize_keyword - :keyword_minus - b = SYMBOL_MINUS - goto tokenize_keyword - :keyword_times - b = SYMBOL_TIMES - goto tokenize_keyword - :keyword_div - b = SYMBOL_DIV - goto tokenize_keyword - :keyword_percent - b = SYMBOL_PERCENT - goto tokenize_keyword - :keyword_plus_plus - b = SYMBOL_PLUS_PLUS - goto tokenize_keyword - :keyword_minus_minus - b = SYMBOL_MINUS_MINUS - goto tokenize_keyword - :keyword_not - b = SYMBOL_NOT - goto tokenize_keyword - :keyword_tilde - b = SYMBOL_TILDE - goto tokenize_keyword - :keyword_arrow - b = SYMBOL_ARROW - goto tokenize_keyword - :keyword_dotdotdot - b = SYMBOL_DOTDOTDOT - goto tokenize_keyword - :keyword_colon - b = SYMBOL_COLON - goto tokenize_keyword - :keyword_lbrace - b = SYMBOL_LBRACE - goto tokenize_keyword - :keyword_rbrace - b = SYMBOL_RBRACE - goto tokenize_keyword - :keyword_lsquare - b = SYMBOL_LSQUARE - goto tokenize_keyword - :keyword_rsquare - b = SYMBOL_RSQUARE - goto tokenize_keyword - :keyword_lparen - b = SYMBOL_LPAREN - goto tokenize_keyword - :keyword_rparen - b = SYMBOL_RPAREN - goto tokenize_keyword - :keyword_semicolon - b = SYMBOL_SEMICOLON - goto tokenize_keyword - :keyword_double - b = KEYWORD_DOUBLE - goto tokenize_keyword - :keyword_int - b = KEYWORD_INT - goto tokenize_keyword - :keyword_struct - b = KEYWORD_STRUCT - goto tokenize_keyword - :keyword_break - b = KEYWORD_BREAK - goto tokenize_keyword - :keyword_else - b = KEYWORD_ELSE - goto tokenize_keyword - :keyword_long - b = KEYWORD_LONG - goto tokenize_keyword - :keyword_switch - b = KEYWORD_SWITCH - goto tokenize_keyword - :keyword_case - b = KEYWORD_CASE - goto tokenize_keyword - :keyword_enum - b = KEYWORD_ENUM - goto tokenize_keyword - :keyword_typedef - b = KEYWORD_TYPEDEF - goto tokenize_keyword - :keyword_char - b = KEYWORD_CHAR - goto tokenize_keyword - :keyword_extern - b = KEYWORD_EXTERN - goto tokenize_keyword - :keyword_return - b = KEYWORD_RETURN - goto tokenize_keyword - :keyword_union - b = KEYWORD_UNION - goto tokenize_keyword - :keyword_float - b = KEYWORD_FLOAT - goto tokenize_keyword - :keyword_short - b = KEYWORD_SHORT - goto tokenize_keyword - :keyword_unsigned - b = KEYWORD_UNSIGNED - goto tokenize_keyword - :keyword_continue - b = KEYWORD_CONTINUE - goto tokenize_keyword - :keyword_for - b = KEYWORD_FOR - goto tokenize_keyword - :keyword_void - b = KEYWORD_VOID - goto tokenize_keyword - :keyword_default - b = KEYWORD_DEFAULT - goto tokenize_keyword - :keyword_goto - b = KEYWORD_GOTO - goto tokenize_keyword - :keyword_sizeof - b = KEYWORD_SIZEOF - goto tokenize_keyword - :keyword_do - b = KEYWORD_DO - goto tokenize_keyword - :keyword_if - b = KEYWORD_IF - goto tokenize_keyword - :keyword_static - b = KEYWORD_STATIC - goto tokenize_keyword - :keyword_while - b = KEYWORD_WHILE - goto tokenize_keyword :tokenize_loop_end return 0 @@ -466,18 +155,27 @@ function tokenize function print_tokens argument tokens local p + local s p = tokens :print_tokens_loop if *2p == 0 goto print_tokens_loop_end - putn(*2p) + if *2p > 20 goto print_token_keyword + fputs(2, .str_print_bad_token) + exit(1) + :print_token_keyword + s = get_keyword_str(*2p) + puts(s) + goto print_token_data + + :print_token_data p += 2 - putc(':) + putc('@) putn(*2p) p += 2 putc(':) putn(*4p) p += 4 - putc(':) + putc(61) putn(*8p) p += 8 putc(32) @@ -485,3 +183,6 @@ function print_tokens :print_tokens_loop_end putc(10) return + :str_print_bad_token + string Unrecognized token type in print_tokens. Aborting. + byte 10 |