diff options
Diffstat (limited to '05/tokenize.b')
-rw-r--r-- | 05/tokenize.b | 487 |
1 files changed, 487 insertions, 0 deletions
diff --git a/05/tokenize.b b/05/tokenize.b new file mode 100644 index 0000000..6b8d7ff --- /dev/null +++ b/05/tokenize.b @@ -0,0 +1,487 @@ +global file_list ; initialized in main -- null-separated 255-terminated array of strings + +; get the name of the file with the given index +function file_get + argument idx + local p + p = file_list + :file_get_loop + if idx == 0 goto file_got + if *1p == 255 goto file_uhoh + idx -= 1 + p = memchr(p, 0) + p += 1 + goto file_get_loop + :file_got + return p + :file_uhoh + fputs(2, .str_bad_file_index) + exit(1) + :str_bad_file_index + string Bad file index. This shouldn't happen. + byte 10 + byte 0 + +; get the index of the given file, returns -1 if file does not exist +function file_get_index + argument filename + local p + local b + local i + p = file_list + i = 0 + :file_get_index_loop + if *1p == 255 goto return_minus1 + b = str_equals(p, filename) + if b != 0 goto file_found + i += 1 + p = memchr(p, 0) + p += 1 + goto file_get_index_loop + :file_found + return i + +; add to list of files if not already there +function file_add + argument filename + local p + p = file_get_index(filename) + if p != -1 goto return_0 + p = memchr(file_list, 255) + p = strcpy(p, filename) + p += 1 + *1p = 255 + return + +; turn pptokens into tokens, written to out. +; tokens are 16 bytes and have the following format: +; ushort type +; ushort file +; uint line +; ulong data +function tokenize + argument pptokens + argument out + local in + local file + local line_number + local b + in = pptokens + :tokenize_loop + if *1in == '$ goto tokenize_line_directive + if *1in == 32 goto tokenize_skip_pptoken + if *1in == 10 goto tokenize_newline + if *1in == 0 goto tokenize_loop_end + + b = str_equals(in, .str_comma) + if b != 0 goto keyword_comma + b = str_equals(in, .str_eq) + if b != 0 goto keyword_eq + b = str_equals(in, .str_plus_eq) + if b != 0 goto keyword_plus_eq + b = str_equals(in, .str_minus_eq) + if b != 0 goto keyword_minus_eq + b = str_equals(in, .str_times_eq) + if b != 0 goto keyword_times_eq + b = str_equals(in, .str_div_eq) + if b != 0 goto keyword_div_eq + b = str_equals(in, .str_percent_eq) + if b != 0 goto keyword_percent_eq + b = str_equals(in, .str_lshift_eq) + if b != 0 goto keyword_rshift_eq + b = str_equals(in, .str_and_eq) + if b != 0 goto keyword_and_eq + b = str_equals(in, .str_or_eq) + if b != 0 goto keyword_or_eq + b = str_equals(in, .str_question) + if b != 0 goto keyword_question + b = str_equals(in, .str_or_or) + if b != 0 goto keyword_or_or + b = str_equals(in, .str_and_and) + if b != 0 goto keyword_and_and + b = str_equals(in, .str_or) + if b != 0 goto keyword_or + b = str_equals(in, .str_xor) + if b != 0 goto keyword_xor + b = str_equals(in, .str_and) + if b != 0 goto keyword_and + b = str_equals(in, .str_eq_eq) + if b != 0 goto keyword_eq_eq + b = str_equals(in, .str_not_eq) + if b != 0 goto keyword_not_eq + b = str_equals(in, .str_lt) + if b != 0 goto keyword_lt + b = str_equals(in, .str_gt) + if b != 0 goto keyword_gt + b = str_equals(in, .str_lt_eq) + if b != 0 goto keyword_lt_eq + b = str_equals(in, .str_gt_eq) + if b != 0 goto keyword_gt_eq + b = str_equals(in, .str_lshift) + if b != 0 goto keyword_lshift + b = str_equals(in, .str_rshift) + if b != 0 goto keyword_rshift + b = str_equals(in, .str_plus) + if b != 0 goto keyword_plus + b = str_equals(in, .str_minus) + if b != 0 goto keyword_minus + b = str_equals(in, .str_times) + if b != 0 goto keyword_times + b = str_equals(in, .str_div) + if b != 0 goto keyword_div + b = str_equals(in, .str_percent) + if b != 0 goto keyword_percent + b = str_equals(in, .str_plus_plus) + if b != 0 goto keyword_plus_plus + b = str_equals(in, .str_minus_minus) + if b != 0 goto keyword_minus_minus + b = str_equals(in, .str_not) + if b != 0 goto keyword_not + b = str_equals(in, .str_tilde) + if b != 0 goto keyword_tilde + b = str_equals(in, .str_arrow) + if b != 0 goto keyword_arrow + b = str_equals(in, .str_dotdotdot) + if b != 0 goto keyword_dotdotdot + b = str_equals(in, .str_colon) + if b != 0 goto keyword_colon + b = str_equals(in, .str_lbrace) + if b != 0 goto keyword_lbrace + b = str_equals(in, .str_rbrace) + if b != 0 goto keyword_rbrace + b = str_equals(in, .str_lsquare) + if b != 0 goto keyword_lsquare + b = str_equals(in, .str_rsquare) + if b != 0 goto keyword_rsquare + b = str_equals(in, .str_lparen) + if b != 0 goto keyword_lparen + b = str_equals(in, .str_rparen) + if b != 0 goto keyword_rparen + b = str_equals(in, .str_semicolon) + if b != 0 goto keyword_semicolon + b = str_equals(in, .str_double) + if b != 0 goto keyword_double + b = str_equals(in, .str_int) + if b != 0 goto keyword_int + b = str_equals(in, .str_struct) + if b != 0 goto keyword_struct + b = str_equals(in, .str_break) + if b != 0 goto keyword_break + b = str_equals(in, .str_else) + if b != 0 goto keyword_else + b = str_equals(in, .str_long) + if b != 0 goto keyword_long + b = str_equals(in, .str_switch) + if b != 0 goto keyword_switch + b = str_equals(in, .str_case) + if b != 0 goto keyword_case + b = str_equals(in, .str_enum) + if b != 0 goto keyword_enum + b = str_equals(in, .str_typedef) + if b != 0 goto keyword_typedef + b = str_equals(in, .str_char) + if b != 0 goto keyword_char + b = str_equals(in, .str_extern) + if b != 0 goto keyword_extern + b = str_equals(in, .str_return) + if b != 0 goto keyword_return + b = str_equals(in, .str_union) + if b != 0 goto keyword_union + b = str_equals(in, .str_float) + if b != 0 goto keyword_float + b = str_equals(in, .str_short) + if b != 0 goto keyword_short + b = str_equals(in, .str_unsigned) + if b != 0 goto keyword_unsigned + b = str_equals(in, .str_continue) + if b != 0 goto keyword_continue + b = str_equals(in, .str_for) + if b != 0 goto keyword_for + b = str_equals(in, .str_void) + if b != 0 goto keyword_void + b = str_equals(in, .str_default) + if b != 0 goto keyword_default + b = str_equals(in, .str_goto) + if b != 0 goto keyword_goto + b = str_equals(in, .str_sizeof) + if b != 0 goto keyword_sizeof + b = str_equals(in, .str_do) + if b != 0 goto keyword_do + b = str_equals(in, .str_if) + if b != 0 goto keyword_if + b = str_equals(in, .str_static) + if b != 0 goto keyword_static + b = str_equals(in, .str_while) + if b != 0 goto keyword_while + + byte 0xcc + + :tokenize_newline + line_number += 1 + pptoken_skip(&in) + goto tokenize_loop + :tokenize_skip_pptoken + pptoken_skip(&in) + goto tokenize_loop + :tokenize_line_directive + in += 1 + line_number = stoi(in) + in = memchr(in, 32) + in += 1 + file_add(in) + file = file_get_index(in) + pptoken_skip(&in) + goto tokenize_loop + :tokenize_keyword + *2out = b ; type + out += 2 + *2out = file + out += 2 + *4out = line_number + out += 4 + ; no data + out += 8 + pptoken_skip(&in) + goto tokenize_loop + :keyword_comma + b = SYMBOL_COMMA + goto tokenize_keyword + :keyword_eq + b = SYMBOL_EQ + goto tokenize_keyword + :keyword_plus_eq + b = SYMBOL_PLUS_EQ + goto tokenize_keyword + :keyword_minus_eq + b = SYMBOL_MINUS_EQ + goto tokenize_keyword + :keyword_times_eq + b = SYMBOL_TIMES_EQ + goto tokenize_keyword + :keyword_div_eq + b = SYMBOL_DIV_EQ + goto tokenize_keyword + :keyword_percent_eq + b = SYMBOL_PERCENT_EQ + goto tokenize_keyword + :keyword_lshift_eq + b = SYMBOL_LSHIFT_EQ + goto tokenize_keyword + :keyword_rshift_eq + b = SYMBOL_RSHIFT_EQ + goto tokenize_keyword + :keyword_and_eq + b = SYMBOL_AND_EQ + goto tokenize_keyword + :keyword_xor_eq + b = SYMBOL_XOR_EQ + goto tokenize_keyword + :keyword_or_eq + b = SYMBOL_OR_EQ + goto tokenize_keyword + :keyword_question + b = SYMBOL_QUESTION + goto tokenize_keyword + :keyword_or_or + b = SYMBOL_OR_OR + goto tokenize_keyword + :keyword_and_and + b = SYMBOL_AND_AND + goto tokenize_keyword + :keyword_or + b = SYMBOL_OR + goto tokenize_keyword + :keyword_xor + b = SYMBOL_XOR + goto tokenize_keyword + :keyword_and + b = SYMBOL_AND + goto tokenize_keyword + :keyword_eq_eq + b = SYMBOL_EQ_EQ + goto tokenize_keyword + :keyword_not_eq + b = SYMBOL_NOT_EQ + goto tokenize_keyword + :keyword_lt + b = SYMBOL_LT + goto tokenize_keyword + :keyword_gt + b = SYMBOL_GT + goto tokenize_keyword + :keyword_lt_eq + b = SYMBOL_LT_EQ + goto tokenize_keyword + :keyword_gt_eq + b = SYMBOL_GT_EQ + goto tokenize_keyword + :keyword_lshift + b = SYMBOL_LSHIFT + goto tokenize_keyword + :keyword_rshift + b = SYMBOL_RSHIFT + goto tokenize_keyword + :keyword_plus + b = SYMBOL_PLUS + goto tokenize_keyword + :keyword_minus + b = SYMBOL_MINUS + goto tokenize_keyword + :keyword_times + b = SYMBOL_TIMES + goto tokenize_keyword + :keyword_div + b = SYMBOL_DIV + goto tokenize_keyword + :keyword_percent + b = SYMBOL_PERCENT + goto tokenize_keyword + :keyword_plus_plus + b = SYMBOL_PLUS_PLUS + goto tokenize_keyword + :keyword_minus_minus + b = SYMBOL_MINUS_MINUS + goto tokenize_keyword + :keyword_not + b = SYMBOL_NOT + goto tokenize_keyword + :keyword_tilde + b = SYMBOL_TILDE + goto tokenize_keyword + :keyword_arrow + b = SYMBOL_ARROW + goto tokenize_keyword + :keyword_dotdotdot + b = SYMBOL_DOTDOTDOT + goto tokenize_keyword + :keyword_colon + b = SYMBOL_COLON + goto tokenize_keyword + :keyword_lbrace + b = SYMBOL_LBRACE + goto tokenize_keyword + :keyword_rbrace + b = SYMBOL_RBRACE + goto tokenize_keyword + :keyword_lsquare + b = SYMBOL_LSQUARE + goto tokenize_keyword + :keyword_rsquare + b = SYMBOL_RSQUARE + goto tokenize_keyword + :keyword_lparen + b = SYMBOL_LPAREN + goto tokenize_keyword + :keyword_rparen + b = SYMBOL_RPAREN + goto tokenize_keyword + :keyword_semicolon + b = SYMBOL_SEMICOLON + goto tokenize_keyword + :keyword_double + b = KEYWORD_DOUBLE + goto tokenize_keyword + :keyword_int + b = KEYWORD_INT + goto tokenize_keyword + :keyword_struct + b = KEYWORD_STRUCT + goto tokenize_keyword + :keyword_break + b = KEYWORD_BREAK + goto tokenize_keyword + :keyword_else + b = KEYWORD_ELSE + goto tokenize_keyword + :keyword_long + b = KEYWORD_LONG + goto tokenize_keyword + :keyword_switch + b = KEYWORD_SWITCH + goto tokenize_keyword + :keyword_case + b = KEYWORD_CASE + goto tokenize_keyword + :keyword_enum + b = KEYWORD_ENUM + goto tokenize_keyword + :keyword_typedef + b = KEYWORD_TYPEDEF + goto tokenize_keyword + :keyword_char + b = KEYWORD_CHAR + goto tokenize_keyword + :keyword_extern + b = KEYWORD_EXTERN + goto tokenize_keyword + :keyword_return + b = KEYWORD_RETURN + goto tokenize_keyword + :keyword_union + b = KEYWORD_UNION + goto tokenize_keyword + :keyword_float + b = KEYWORD_FLOAT + goto tokenize_keyword + :keyword_short + b = KEYWORD_SHORT + goto tokenize_keyword + :keyword_unsigned + b = KEYWORD_UNSIGNED + goto tokenize_keyword + :keyword_continue + b = KEYWORD_CONTINUE + goto tokenize_keyword + :keyword_for + b = KEYWORD_FOR + goto tokenize_keyword + :keyword_void + b = KEYWORD_VOID + goto tokenize_keyword + :keyword_default + b = KEYWORD_DEFAULT + goto tokenize_keyword + :keyword_goto + b = KEYWORD_GOTO + goto tokenize_keyword + :keyword_sizeof + b = KEYWORD_SIZEOF + goto tokenize_keyword + :keyword_do + b = KEYWORD_DO + goto tokenize_keyword + :keyword_if + b = KEYWORD_IF + goto tokenize_keyword + :keyword_static + b = KEYWORD_STATIC + goto tokenize_keyword + :keyword_while + b = KEYWORD_WHILE + goto tokenize_keyword + :tokenize_loop_end + + return 0 + +function print_tokens + argument tokens + local p + p = tokens + :print_tokens_loop + if *2p == 0 goto print_tokens_loop_end + putn(*2p) + p += 2 + putc(':) + putn(*2p) + p += 2 + putc(':) + putn(*4p) + p += 4 + putc(':) + putn(*8p) + p += 8 + putc(32) + goto print_tokens_loop + :print_tokens_loop_end + putc(10) + return |