diff options
-rw-r--r-- | 05/constants.b | 389 | ||||
-rw-r--r-- | 05/main.c | 2 | ||||
-rw-r--r-- | 05/tokenize.b | 419 |
3 files changed, 326 insertions, 484 deletions
diff --git a/05/constants.b b/05/constants.b index 370f197..82da6b2 100644 --- a/05/constants.b +++ b/05/constants.b @@ -18,55 +18,51 @@ ; highest ; NB: for equal precedence, operators are applied left-to-right except for assignment operators (precedence 2) -; pattern for binary operators is: 0x10px where p is precedence -; NB: these four can also be unary: & * + - -#define MASK_SYMBOL_PRECEDENCE 0x0ff0 -#define SYMBOL_COMMA 0x1010 -#define SYMBOL_EQ 0x1020 -#define SYMBOL_PLUS_EQ 0x1021 -#define SYMBOL_MINUS_EQ 0x1022 -#define SYMBOL_TIMES_EQ 0x1023 -#define SYMBOL_DIV_EQ 0x1024 -#define SYMBOL_PERCENT_EQ 0x1025 -#define SYMBOL_LSHIFT_EQ 0x1026 -#define SYMBOL_RSHIFT_EQ 0x1027 -#define SYMBOL_AND_EQ 0x1028 -#define SYMBOL_XOR_EQ 0x1029 -#define SYMBOL_OR_EQ 0x102a -#define SYMBOL_QUESTION 0x1030 -#define SYMBOL_OR_OR 0x1040 -#define SYMBOL_AND_AND 0x1050 -#define SYMBOL_OR 0x1060 -#define SYMBOL_XOR 0x1070 -#define SYMBOL_AND 0x1080 -#define SYMBOL_EQ_EQ 0x1090 -#define SYMBOL_NOT_EQ 0x1091 -#define SYMBOL_LT 0x10a0 -#define SYMBOL_GT 0x10a1 -#define SYMBOL_LT_EQ 0x10a2 -#define SYMBOL_GT_EQ 0x10a3 -#define SYMBOL_LSHIFT 0x10b0 -#define SYMBOL_RSHIFT 0x10b1 -#define SYMBOL_PLUS 0x10c0 -#define SYMBOL_MINUS 0x10c1 -#define SYMBOL_TIMES 0x10d0 -#define SYMBOL_DIV 0x10d1 -#define SYMBOL_PERCENT 0x10d2 - -#define SYMBOL_PLUS_PLUS 100 -#define SYMBOL_MINUS_MINUS 101 -#define SYMBOL_NOT 102 -#define SYMBOL_TILDE 103 -#define SYMBOL_ARROW 104 -#define SYMBOL_DOTDOTDOT 105 -#define SYMBOL_COLON 106 -#define SYMBOL_LBRACE 107 -#define SYMBOL_RBRACE 108 -#define SYMBOL_LSQUARE 109 -#define SYMBOL_RSQUARE 110 -#define SYMBOL_LPAREN 111 -#define SYMBOL_RPAREN 112 -#define SYMBOL_SEMICOLON 113 +#define SYMBOL_COMMA 200 +#define SYMBOL_EQ 201 +#define SYMBOL_PLUS_EQ 202 +#define SYMBOL_MINUS_EQ 203 +#define SYMBOL_TIMES_EQ 204 +#define SYMBOL_DIV_EQ 205 +#define SYMBOL_PERCENT_EQ 206 +#define SYMBOL_LSHIFT_EQ 207 +#define SYMBOL_RSHIFT_EQ 208 +#define SYMBOL_AND_EQ 209 +#define SYMBOL_XOR_EQ 210 +#define SYMBOL_OR_EQ 211 +#define SYMBOL_QUESTION 212 +#define SYMBOL_OR_OR 213 +#define SYMBOL_AND_AND 214 +#define SYMBOL_OR 215 +#define SYMBOL_XOR 216 +#define SYMBOL_AND 217 +#define SYMBOL_EQ_EQ 218 +#define SYMBOL_NOT_EQ 219 +#define SYMBOL_LT 220 +#define SYMBOL_GT 221 +#define SYMBOL_LT_EQ 222 +#define SYMBOL_GT_EQ 223 +#define SYMBOL_LSHIFT 224 +#define SYMBOL_RSHIFT 225 +#define SYMBOL_PLUS 226 +#define SYMBOL_MINUS 227 +#define SYMBOL_TIMES 228 +#define SYMBOL_DIV 229 +#define SYMBOL_PERCENT 230 +#define SYMBOL_PLUS_PLUS 231 +#define SYMBOL_MINUS_MINUS 232 +#define SYMBOL_NOT 233 +#define SYMBOL_TILDE 234 +#define SYMBOL_ARROW 235 +#define SYMBOL_DOTDOTDOT 236 +#define SYMBOL_COLON 237 +#define SYMBOL_LBRACE 238 +#define SYMBOL_RBRACE 239 +#define SYMBOL_LSQUARE 240 +#define SYMBOL_RSQUARE 241 +#define SYMBOL_LPAREN 242 +#define SYMBOL_RPAREN 243 +#define SYMBOL_SEMICOLON 244 #define TOKEN_IDENTIFIER 1 @@ -109,6 +105,225 @@ #define KEYWORD_STATIC 51 #define KEYWORD_WHILE 52 +:keyword_table + byte SYMBOL_SEMICOLON + byte 59 + byte 0 + byte SYMBOL_EQ + string = + byte 0 + byte SYMBOL_LBRACE + string { + byte 0 + byte SYMBOL_RBRACE + string } + byte 0 + byte SYMBOL_LSQUARE + string [ + byte 0 + byte SYMBOL_RSQUARE + string ] + byte 0 + byte SYMBOL_LPAREN + string ( + byte 0 + byte SYMBOL_RPAREN + string ) + byte 0 + byte SYMBOL_COMMA + string , + byte 0 + byte SYMBOL_PLUS_EQ + string += + byte 0 + byte SYMBOL_MINUS_EQ + string -= + byte 0 + byte SYMBOL_TIMES_EQ + string *= + byte 0 + byte SYMBOL_DIV_EQ + string /= + byte 0 + byte SYMBOL_PERCENT_EQ + string %= + byte 0 + byte SYMBOL_LSHIFT_EQ + string <<= + byte 0 + byte SYMBOL_RSHIFT_EQ + string >>= + byte 0 + byte SYMBOL_AND_EQ + string &= + byte 0 + byte SYMBOL_XOR_EQ + string ^= + byte 0 + byte SYMBOL_OR_EQ + string |= + byte 0 + byte SYMBOL_QUESTION + string ? + byte 0 + byte SYMBOL_OR_OR + string || + byte 0 + byte SYMBOL_AND_AND + string && + byte 0 + byte SYMBOL_OR + string | + byte 0 + byte SYMBOL_XOR + string ^ + byte 0 + byte SYMBOL_AND + string & + byte 0 + byte SYMBOL_EQ_EQ + string == + byte 0 + byte SYMBOL_NOT_EQ + string != + byte 0 + byte SYMBOL_LT + string < + byte 0 + byte SYMBOL_GT + string > + byte 0 + byte SYMBOL_LT_EQ + string <= + byte 0 + byte SYMBOL_GT_EQ + string >= + byte 0 + byte SYMBOL_LSHIFT + string << + byte 0 + byte SYMBOL_RSHIFT + string >> + byte 0 + byte SYMBOL_PLUS + string + + byte 0 + byte SYMBOL_MINUS + string - + byte 0 + byte SYMBOL_TIMES + string * + byte 0 + byte SYMBOL_DIV + string / + byte 0 + byte SYMBOL_PERCENT + string % + byte 0 + byte SYMBOL_PLUS_PLUS + string ++ + byte 0 + byte SYMBOL_MINUS_MINUS + string -- + byte 0 + byte SYMBOL_NOT + string ! + byte 0 + byte SYMBOL_TILDE + string ~ + byte 0 + byte SYMBOL_ARROW + string -> + byte 0 + byte SYMBOL_DOTDOTDOT + string ... + byte 0 + byte SYMBOL_COLON + string : + byte 0 + byte KEYWORD_DOUBLE + string double + byte 0 + byte KEYWORD_INT + string int + byte 0 + byte KEYWORD_STRUCT + string struct + byte 0 + byte KEYWORD_BREAK + string break + byte 0 + byte KEYWORD_ELSE + string else + byte 0 + byte KEYWORD_LONG + string long + byte 0 + byte KEYWORD_SWITCH + string switch + byte 0 + byte KEYWORD_CASE + string case + byte 0 + byte KEYWORD_ENUM + string enum + byte 0 + byte KEYWORD_TYPEDEF + string typedef + byte 0 + byte KEYWORD_CHAR + string char + byte 0 + byte KEYWORD_EXTERN + string extern + byte 0 + byte KEYWORD_RETURN + string return + byte 0 + byte KEYWORD_UNION + string union + byte 0 + byte KEYWORD_FLOAT + string float + byte 0 + byte KEYWORD_SHORT + string short + byte 0 + byte KEYWORD_UNSIGNED + string unsigned + byte 0 + byte KEYWORD_CONTINUE + string continue + byte 0 + byte KEYWORD_FOR + string for + byte 0 + byte KEYWORD_VOID + string void + byte 0 + byte KEYWORD_DEFAULT + string default + byte 0 + byte KEYWORD_GOTO + string goto + byte 0 + byte KEYWORD_SIZEOF + string sizeof + byte 0 + byte KEYWORD_DO + string do + byte 0 + byte KEYWORD_IF + string if + byte 0 + byte KEYWORD_STATIC + string static + byte 0 + byte KEYWORD_WHILE + string while + byte 0 + byte 255 + :str_missing_closing_paren string Missing closing ). byte 0 @@ -295,82 +510,6 @@ :str_endif string endif byte 0 -:str_double - string double - byte 0 -:str_int - string int - byte 0 -:str_struct - string struct - byte 0 -:str_break - string break - byte 0 -:str_long - string long - byte 0 -:str_switch - string switch - byte 0 -:str_case - string case - byte 0 -:str_enum - string enum - byte 0 -:str_typedef - string typedef - byte 0 -:str_char - string char - byte 0 -:str_extern - string extern - byte 0 -:str_return - string return - byte 0 -:str_union - string union - byte 0 -:str_float - string float - byte 0 -:str_short - string short - byte 0 -:str_unsigned - string unsigned - byte 0 -:str_continue - string continue - byte 0 -:str_for - string for - byte 0 -:str_void - string void - byte 0 -:str_default - string default - byte 0 -:str_goto - string goto - byte 0 -:str_sizeof - string sizeof - byte 0 -:str_do - string do - byte 0 -:str_static - string static - byte 0 -:str_while - string while - byte 0 - :str___FILE__ string __FILE__ byte 0 @@ -1,2 +1,4 @@ sizeof(int) +double * = &; + diff --git a/05/tokenize.b b/05/tokenize.b index 6b8d7ff..3a0d37d 100644 --- a/05/tokenize.b +++ b/05/tokenize.b @@ -53,6 +53,49 @@ function file_add *1p = 255 return +; return keyword ID associated with str, or 0 if it's not a keyword +function get_keyword_id + argument keyword_str + local p + local c + local b + p = .keyword_table + :keyword_id_loop + c = *1p + if c == 255 goto no_such_keyword_str + p += 1 + b = str_equals(keyword_str, p) + if b != 0 goto got_keyword_id + p = memchr(p, 0) + p += 1 + goto keyword_id_loop + :no_such_keyword_str + return 0 + :got_keyword_id + return c + +; get string associated with keyword id, or "@BAD_KEYWORD_ID" if it's not a keyword +function get_keyword_str + argument keyword_id + local p + local c + local b + p = .keyword_table + :keyword_str_loop + c = *1p + if c == 255 goto no_such_keyword_id + if c == keyword_id goto found_keyword_id + p = memchr(p, 0) + p += 1 + goto keyword_str_loop + :found_keyword_id + return p + 1 + :no_such_keyword_id + return .str_no_such_keyword_id + :str_no_such_keyword_id + string @BAD_KEYWORD_ID + byte 0 + ; turn pptokens into tokens, written to out. ; tokens are 16 bytes and have the following format: ; ushort type @@ -73,146 +116,8 @@ function tokenize if *1in == 10 goto tokenize_newline if *1in == 0 goto tokenize_loop_end - b = str_equals(in, .str_comma) - if b != 0 goto keyword_comma - b = str_equals(in, .str_eq) - if b != 0 goto keyword_eq - b = str_equals(in, .str_plus_eq) - if b != 0 goto keyword_plus_eq - b = str_equals(in, .str_minus_eq) - if b != 0 goto keyword_minus_eq - b = str_equals(in, .str_times_eq) - if b != 0 goto keyword_times_eq - b = str_equals(in, .str_div_eq) - if b != 0 goto keyword_div_eq - b = str_equals(in, .str_percent_eq) - if b != 0 goto keyword_percent_eq - b = str_equals(in, .str_lshift_eq) - if b != 0 goto keyword_rshift_eq - b = str_equals(in, .str_and_eq) - if b != 0 goto keyword_and_eq - b = str_equals(in, .str_or_eq) - if b != 0 goto keyword_or_eq - b = str_equals(in, .str_question) - if b != 0 goto keyword_question - b = str_equals(in, .str_or_or) - if b != 0 goto keyword_or_or - b = str_equals(in, .str_and_and) - if b != 0 goto keyword_and_and - b = str_equals(in, .str_or) - if b != 0 goto keyword_or - b = str_equals(in, .str_xor) - if b != 0 goto keyword_xor - b = str_equals(in, .str_and) - if b != 0 goto keyword_and - b = str_equals(in, .str_eq_eq) - if b != 0 goto keyword_eq_eq - b = str_equals(in, .str_not_eq) - if b != 0 goto keyword_not_eq - b = str_equals(in, .str_lt) - if b != 0 goto keyword_lt - b = str_equals(in, .str_gt) - if b != 0 goto keyword_gt - b = str_equals(in, .str_lt_eq) - if b != 0 goto keyword_lt_eq - b = str_equals(in, .str_gt_eq) - if b != 0 goto keyword_gt_eq - b = str_equals(in, .str_lshift) - if b != 0 goto keyword_lshift - b = str_equals(in, .str_rshift) - if b != 0 goto keyword_rshift - b = str_equals(in, .str_plus) - if b != 0 goto keyword_plus - b = str_equals(in, .str_minus) - if b != 0 goto keyword_minus - b = str_equals(in, .str_times) - if b != 0 goto keyword_times - b = str_equals(in, .str_div) - if b != 0 goto keyword_div - b = str_equals(in, .str_percent) - if b != 0 goto keyword_percent - b = str_equals(in, .str_plus_plus) - if b != 0 goto keyword_plus_plus - b = str_equals(in, .str_minus_minus) - if b != 0 goto keyword_minus_minus - b = str_equals(in, .str_not) - if b != 0 goto keyword_not - b = str_equals(in, .str_tilde) - if b != 0 goto keyword_tilde - b = str_equals(in, .str_arrow) - if b != 0 goto keyword_arrow - b = str_equals(in, .str_dotdotdot) - if b != 0 goto keyword_dotdotdot - b = str_equals(in, .str_colon) - if b != 0 goto keyword_colon - b = str_equals(in, .str_lbrace) - if b != 0 goto keyword_lbrace - b = str_equals(in, .str_rbrace) - if b != 0 goto keyword_rbrace - b = str_equals(in, .str_lsquare) - if b != 0 goto keyword_lsquare - b = str_equals(in, .str_rsquare) - if b != 0 goto keyword_rsquare - b = str_equals(in, .str_lparen) - if b != 0 goto keyword_lparen - b = str_equals(in, .str_rparen) - if b != 0 goto keyword_rparen - b = str_equals(in, .str_semicolon) - if b != 0 goto keyword_semicolon - b = str_equals(in, .str_double) - if b != 0 goto keyword_double - b = str_equals(in, .str_int) - if b != 0 goto keyword_int - b = str_equals(in, .str_struct) - if b != 0 goto keyword_struct - b = str_equals(in, .str_break) - if b != 0 goto keyword_break - b = str_equals(in, .str_else) - if b != 0 goto keyword_else - b = str_equals(in, .str_long) - if b != 0 goto keyword_long - b = str_equals(in, .str_switch) - if b != 0 goto keyword_switch - b = str_equals(in, .str_case) - if b != 0 goto keyword_case - b = str_equals(in, .str_enum) - if b != 0 goto keyword_enum - b = str_equals(in, .str_typedef) - if b != 0 goto keyword_typedef - b = str_equals(in, .str_char) - if b != 0 goto keyword_char - b = str_equals(in, .str_extern) - if b != 0 goto keyword_extern - b = str_equals(in, .str_return) - if b != 0 goto keyword_return - b = str_equals(in, .str_union) - if b != 0 goto keyword_union - b = str_equals(in, .str_float) - if b != 0 goto keyword_float - b = str_equals(in, .str_short) - if b != 0 goto keyword_short - b = str_equals(in, .str_unsigned) - if b != 0 goto keyword_unsigned - b = str_equals(in, .str_continue) - if b != 0 goto keyword_continue - b = str_equals(in, .str_for) - if b != 0 goto keyword_for - b = str_equals(in, .str_void) - if b != 0 goto keyword_void - b = str_equals(in, .str_default) - if b != 0 goto keyword_default - b = str_equals(in, .str_goto) - if b != 0 goto keyword_goto - b = str_equals(in, .str_sizeof) - if b != 0 goto keyword_sizeof - b = str_equals(in, .str_do) - if b != 0 goto keyword_do - b = str_equals(in, .str_if) - if b != 0 goto keyword_if - b = str_equals(in, .str_static) - if b != 0 goto keyword_static - b = str_equals(in, .str_while) - if b != 0 goto keyword_while + b = get_keyword_id(in) + if b != 0 goto tokenize_keyword byte 0xcc @@ -243,222 +148,6 @@ function tokenize out += 8 pptoken_skip(&in) goto tokenize_loop - :keyword_comma - b = SYMBOL_COMMA - goto tokenize_keyword - :keyword_eq - b = SYMBOL_EQ - goto tokenize_keyword - :keyword_plus_eq - b = SYMBOL_PLUS_EQ - goto tokenize_keyword - :keyword_minus_eq - b = SYMBOL_MINUS_EQ - goto tokenize_keyword - :keyword_times_eq - b = SYMBOL_TIMES_EQ - goto tokenize_keyword - :keyword_div_eq - b = SYMBOL_DIV_EQ - goto tokenize_keyword - :keyword_percent_eq - b = SYMBOL_PERCENT_EQ - goto tokenize_keyword - :keyword_lshift_eq - b = SYMBOL_LSHIFT_EQ - goto tokenize_keyword - :keyword_rshift_eq - b = SYMBOL_RSHIFT_EQ - goto tokenize_keyword - :keyword_and_eq - b = SYMBOL_AND_EQ - goto tokenize_keyword - :keyword_xor_eq - b = SYMBOL_XOR_EQ - goto tokenize_keyword - :keyword_or_eq - b = SYMBOL_OR_EQ - goto tokenize_keyword - :keyword_question - b = SYMBOL_QUESTION - goto tokenize_keyword - :keyword_or_or - b = SYMBOL_OR_OR - goto tokenize_keyword - :keyword_and_and - b = SYMBOL_AND_AND - goto tokenize_keyword - :keyword_or - b = SYMBOL_OR - goto tokenize_keyword - :keyword_xor - b = SYMBOL_XOR - goto tokenize_keyword - :keyword_and - b = SYMBOL_AND - goto tokenize_keyword - :keyword_eq_eq - b = SYMBOL_EQ_EQ - goto tokenize_keyword - :keyword_not_eq - b = SYMBOL_NOT_EQ - goto tokenize_keyword - :keyword_lt - b = SYMBOL_LT - goto tokenize_keyword - :keyword_gt - b = SYMBOL_GT - goto tokenize_keyword - :keyword_lt_eq - b = SYMBOL_LT_EQ - goto tokenize_keyword - :keyword_gt_eq - b = SYMBOL_GT_EQ - goto tokenize_keyword - :keyword_lshift - b = SYMBOL_LSHIFT - goto tokenize_keyword - :keyword_rshift - b = SYMBOL_RSHIFT - goto tokenize_keyword - :keyword_plus - b = SYMBOL_PLUS - goto tokenize_keyword - :keyword_minus - b = SYMBOL_MINUS - goto tokenize_keyword - :keyword_times - b = SYMBOL_TIMES - goto tokenize_keyword - :keyword_div - b = SYMBOL_DIV - goto tokenize_keyword - :keyword_percent - b = SYMBOL_PERCENT - goto tokenize_keyword - :keyword_plus_plus - b = SYMBOL_PLUS_PLUS - goto tokenize_keyword - :keyword_minus_minus - b = SYMBOL_MINUS_MINUS - goto tokenize_keyword - :keyword_not - b = SYMBOL_NOT - goto tokenize_keyword - :keyword_tilde - b = SYMBOL_TILDE - goto tokenize_keyword - :keyword_arrow - b = SYMBOL_ARROW - goto tokenize_keyword - :keyword_dotdotdot - b = SYMBOL_DOTDOTDOT - goto tokenize_keyword - :keyword_colon - b = SYMBOL_COLON - goto tokenize_keyword - :keyword_lbrace - b = SYMBOL_LBRACE - goto tokenize_keyword - :keyword_rbrace - b = SYMBOL_RBRACE - goto tokenize_keyword - :keyword_lsquare - b = SYMBOL_LSQUARE - goto tokenize_keyword - :keyword_rsquare - b = SYMBOL_RSQUARE - goto tokenize_keyword - :keyword_lparen - b = SYMBOL_LPAREN - goto tokenize_keyword - :keyword_rparen - b = SYMBOL_RPAREN - goto tokenize_keyword - :keyword_semicolon - b = SYMBOL_SEMICOLON - goto tokenize_keyword - :keyword_double - b = KEYWORD_DOUBLE - goto tokenize_keyword - :keyword_int - b = KEYWORD_INT - goto tokenize_keyword - :keyword_struct - b = KEYWORD_STRUCT - goto tokenize_keyword - :keyword_break - b = KEYWORD_BREAK - goto tokenize_keyword - :keyword_else - b = KEYWORD_ELSE - goto tokenize_keyword - :keyword_long - b = KEYWORD_LONG - goto tokenize_keyword - :keyword_switch - b = KEYWORD_SWITCH - goto tokenize_keyword - :keyword_case - b = KEYWORD_CASE - goto tokenize_keyword - :keyword_enum - b = KEYWORD_ENUM - goto tokenize_keyword - :keyword_typedef - b = KEYWORD_TYPEDEF - goto tokenize_keyword - :keyword_char - b = KEYWORD_CHAR - goto tokenize_keyword - :keyword_extern - b = KEYWORD_EXTERN - goto tokenize_keyword - :keyword_return - b = KEYWORD_RETURN - goto tokenize_keyword - :keyword_union - b = KEYWORD_UNION - goto tokenize_keyword - :keyword_float - b = KEYWORD_FLOAT - goto tokenize_keyword - :keyword_short - b = KEYWORD_SHORT - goto tokenize_keyword - :keyword_unsigned - b = KEYWORD_UNSIGNED - goto tokenize_keyword - :keyword_continue - b = KEYWORD_CONTINUE - goto tokenize_keyword - :keyword_for - b = KEYWORD_FOR - goto tokenize_keyword - :keyword_void - b = KEYWORD_VOID - goto tokenize_keyword - :keyword_default - b = KEYWORD_DEFAULT - goto tokenize_keyword - :keyword_goto - b = KEYWORD_GOTO - goto tokenize_keyword - :keyword_sizeof - b = KEYWORD_SIZEOF - goto tokenize_keyword - :keyword_do - b = KEYWORD_DO - goto tokenize_keyword - :keyword_if - b = KEYWORD_IF - goto tokenize_keyword - :keyword_static - b = KEYWORD_STATIC - goto tokenize_keyword - :keyword_while - b = KEYWORD_WHILE - goto tokenize_keyword :tokenize_loop_end return 0 @@ -466,18 +155,27 @@ function tokenize function print_tokens argument tokens local p + local s p = tokens :print_tokens_loop if *2p == 0 goto print_tokens_loop_end - putn(*2p) + if *2p > 20 goto print_token_keyword + fputs(2, .str_print_bad_token) + exit(1) + :print_token_keyword + s = get_keyword_str(*2p) + puts(s) + goto print_token_data + + :print_token_data p += 2 - putc(':) + putc('@) putn(*2p) p += 2 putc(':) putn(*4p) p += 4 - putc(':) + putc(61) putn(*8p) p += 8 putc(32) @@ -485,3 +183,6 @@ function print_tokens :print_tokens_loop_end putc(10) return + :str_print_bad_token + string Unrecognized token type in print_tokens. Aborting. + byte 10 |