function parse_expression argument tokens argument tokens_end argument out local in local a local b local c local p local n local best local best_precedence local depth local value :parse_expression_top if tokens == tokens_end goto empty_expression p = tokens + 16 if p == tokens_end goto single_token_expression if *1tokens != SYMBOL_LPAREN goto parse_expression_not_entirely_in_parens p = tokens_end - 16 if *1p != SYMBOL_RPAREN goto parse_expression_not_entirely_in_parens depth = 1 ; bracket depth p = tokens + 16 a = tokens_end - 16 ; stop point :expr_paren_check_loop if p >= a goto expr_paren_check_loop_end c = *1p p += 16 if c == SYMBOL_LPAREN goto expr_paren_check_loop_incdepth if c == SYMBOL_RPAREN goto expr_paren_check_loop_decdepth goto expr_paren_check_loop :expr_paren_check_loop_incdepth depth += 1 goto expr_paren_check_loop :expr_paren_check_loop_decdepth depth -= 1 if depth == 0 goto parse_expression_not_entirely_in_parens goto expr_paren_check_loop :expr_paren_check_loop_end ; if we made it this far, the expression is entirely in parenthesis, e.g. (x+2) tokens += 16 tokens_end -= 16 goto parse_expression_top :parse_expression_not_entirely_in_parens ; look for the operator with the lowest precedence not in brackets depth = 0 ; paren/square bracket depth b = 1 ; first token? -- i.e. is this operator unary p = tokens best = 0 best_precedence = 1000 goto expr_find_operator_loop_first :expr_find_operator_loop b = 0 :expr_find_operator_loop_first if p >= tokens_end goto expr_find_operator_loop_end c = *1p p += 16 if c == SYMBOL_LPAREN goto expr_findop_incdepth if c == SYMBOL_RPAREN goto expr_findop_decdepth if c == SYMBOL_LSQUARE goto expr_findop_incdepth if c == SYMBOL_RSQUARE goto expr_findop_decdepth if depth > 0 goto expr_find_operator_loop if depth < 0 goto expr_too_many_closing_brackets a = operator_precedence(c, b) n = a n -= operator_right_associative(c) ; ensure that the leftmost += / -= / etc. is processed first if n > best_precedence goto expr_find_operator_loop ; new best! best = p - 16 best_precedence = a goto expr_find_operator_loop :expr_findop_incdepth depth += 1 goto expr_find_operator_loop :expr_findop_decdepth depth -= 1 goto expr_find_operator_loop :expr_find_operator_loop_end if best == 0 goto unrecognized_expression if best == tokens goto parse_expr_unary ; it's a binary expression. c = *1best if c == SYMBOL_PLUS_PLUS goto parse_postincrement if c == SYMBOL_MINUS_MINUS goto parse_postdecrement if c == SYMBOL_QUESTION goto parse_conditional *1out = binop_symbol_to_expression_type(c) out += 8 if c == SYMBOL_DOT goto parse_expr_member if c == SYMBOL_ARROW goto parse_expr_member out = parse_expression(tokens, best, out) ; first operand p = best + 16 out = parse_expression(p, tokens_end, out) ; second operand return out ;@TODO: casts :parse_expr_unary if c == KEYWORD_SIZEOF goto parse_expr_sizeof byte 0xcc ; @TODO :parse_expr_sizeof byte 0xcc ; @TODO :parse_expr_member ; -> or . p = best + 16 if *1p != TOKEN_IDENTIFIER goto bad_expression p += 8 *8out = *8p ; copy identifier name p += 8 if p != tokens_end goto bad_expression ; e.g. foo->bar hello out += 8 out = parse_expression(tokens, best, out) return out :parse_conditional byte 0xcc ; @TODO :parse_postincrement *1out = EXPRESSION_POST_INCREMENT out += 8 p = tokens_end - 16 if *1p != SYMBOL_PLUS_PLUS goto bad_expression ; e.g. a ++ b out = parse_expression(tokens, p, out) return out :parse_postdecrement *1out = EXPRESSION_POST_DECREMENT out += 8 p = tokens_end - 16 if *1p != SYMBOL_MINUS_MINUS goto bad_expression ; e.g. a -- b out = parse_expression(tokens, p, out) return out :single_token_expression in = tokens c = *1in if c == TOKEN_CONSTANT_INT goto expression_integer if c == TOKEN_CONSTANT_CHAR goto expression_integer ; character constants are basically the same as integer constants if c == TOKEN_CONSTANT_FLOAT goto expression_float if c == TOKEN_STRING_LITERAL goto expression_string_literal goto unrecognized_expression :expression_integer *1out = EXPRESSION_CONSTANT_INT p = in + 8 value = *8p p = out + 8 *8p = value p = in + 1 a = int_suffix_to_type(*1p) ; what the suffix says the type should be b = int_value_to_type(value) ; what the value says the type should be (if the value is too large to fit in int) a = max_signed(a, b) ; take the maximum of the two types ; make sure that if the integer has a u suffix, the type will be unsigned a &= b | 0xfe p = out + 4 *4p = a in += 16 out += 16 return out :expression_float *1out = EXPRESSION_CONSTANT_FLOAT p = in + 8 value = *8p p = out + 8 *8p = value p = in + 1 a = float_suffix_to_type(*1p) p = out + 4 *4p = a in += 16 out += 16 return out :expression_string_literal *1out = EXPRESSION_STRING_LITERAL p = in + 8 value = *8p p = out + 8 *8p = value ; we already know this is char* p = out + 4 *4p = TYPE_POINTER_TO_CHAR in += 16 out += 16 return out :empty_expression token_error(tokens, .str_empty_expression) :str_empty_expression string Empty expression. byte 0 :bad_expression token_error(tokens, .str_bad_expression) :str_bad_expression string Bad expression. byte 0 :unrecognized_expression token_error(tokens, .str_unrecognized_expression) :str_unrecognized_expression string Unrecognized expression. byte 0 :expr_too_many_closing_brackets token_error(tokens, .str_too_many_closing_brackets) :str_too_many_closing_brackets string Too many closing brackets. byte 0 :return_type_int return TYPE_INT :return_type_long return TYPE_LONG :return_type_unsigned_int return TYPE_UNSIGNED_INT :return_type_unsigned_long return TYPE_UNSIGNED_LONG :return_type_float return TYPE_FLOAT :return_type_double return TYPE_DOUBLE ; return precedence of given operator, or 0xffff if not an operator function operator_precedence argument op argument is_unary_prefix if is_unary_prefix != 0 goto operator_precedence_unary ; see "C OPERATOR PRECEDENCE" in constants.b if op == SYMBOL_COMMA goto return_0x10 if op == SYMBOL_EQ goto return_0x20 if op == SYMBOL_PLUS_EQ goto return_0x20 if op == SYMBOL_MINUS_EQ goto return_0x20 if op == SYMBOL_TIMES_EQ goto return_0x20 if op == SYMBOL_DIV_EQ goto return_0x20 if op == SYMBOL_PERCENT_EQ goto return_0x20 if op == SYMBOL_LSHIFT_EQ goto return_0x20 if op == SYMBOL_RSHIFT_EQ goto return_0x20 if op == SYMBOL_AND_EQ goto return_0x20 if op == SYMBOL_OR_EQ goto return_0x20 if op == SYMBOL_XOR_EQ goto return_0x20 if op == SYMBOL_QUESTION goto return_0x30 if op == SYMBOL_OR_OR goto return_0x40 if op == SYMBOL_AND_AND goto return_0x50 if op == SYMBOL_OR goto return_0x60 if op == SYMBOL_XOR goto return_0x70 if op == SYMBOL_AND goto return_0x80 if op == SYMBOL_EQ_EQ goto return_0x90 if op == SYMBOL_NOT_EQ goto return_0x90 if op == SYMBOL_LT goto return_0xa0 if op == SYMBOL_GT goto return_0xa0 if op == SYMBOL_LT_EQ goto return_0xa0 if op == SYMBOL_GT_EQ goto return_0xa0 if op == SYMBOL_LSHIFT goto return_0xb0 if op == SYMBOL_RSHIFT goto return_0xb0 if op == SYMBOL_PLUS goto return_0xc0 if op == SYMBOL_MINUS goto return_0xc0 if op == SYMBOL_TIMES goto return_0xd0 if op == SYMBOL_DIV goto return_0xd0 if op == SYMBOL_PERCENT goto return_0xd0 if op == SYMBOL_ARROW goto return_0xf0 if op == SYMBOL_DOT goto return_0xf0 if op == SYMBOL_LPAREN goto return_0xf0 ; function call if op == SYMBOL_LSQUARE goto return_0xf0 ; subscript if op == SYMBOL_PLUS_PLUS goto return_0xf0 if op == SYMBOL_MINUS_MINUS goto return_0xf0 return 0xffff :operator_precedence_unary if op == KEYWORD_SIZEOF goto return_0xe0 if op == SYMBOL_PLUS_PLUS goto return_0xe0 if op == SYMBOL_MINUS_MINUS goto return_0xe0 if op == SYMBOL_AND goto return_0xe0 if op == SYMBOL_TIMES goto return_0xe0 if op == SYMBOL_PLUS goto return_0xe0 if op == SYMBOL_MINUS goto return_0xe0 if op == SYMBOL_TILDE goto return_0xe0 if op == SYMBOL_NOT goto return_0xe0 return 0xffff ; is this operator right-associative? most C operators are left associative, ; but += / -= / etc. are not function operator_right_associative argument op if op < SYMBOL_EQ goto return_0 if op > SYMBOL_OR_EQ goto return_0 goto return_1 :binop_table byte SYMBOL_COMMA byte EXPRESSION_COMMA byte SYMBOL_EQ byte EXPRESSION_ASSIGN byte SYMBOL_PLUS_EQ byte EXPRESSION_ASSIGN_ADD byte SYMBOL_MINUS_EQ byte EXPRESSION_ASSIGN_SUB byte SYMBOL_TIMES_EQ byte EXPRESSION_ASSIGN_MUL byte SYMBOL_DIV_EQ byte EXPRESSION_ASSIGN_DIV byte SYMBOL_PERCENT_EQ byte EXPRESSION_ASSIGN_REMAINDER byte SYMBOL_LSHIFT_EQ byte EXPRESSION_ASSIGN_LSHIFT byte SYMBOL_RSHIFT_EQ byte EXPRESSION_ASSIGN_RSHIFT byte SYMBOL_AND_EQ byte EXPRESSION_ASSIGN_AND byte SYMBOL_OR_EQ byte EXPRESSION_ASSIGN_OR byte SYMBOL_XOR_EQ byte EXPRESSION_ASSIGN_XOR byte SYMBOL_OR_OR byte EXPRESSION_LOGICAL_OR byte SYMBOL_AND_AND byte EXPRESSION_LOGICAL_AND byte SYMBOL_OR byte EXPRESSION_BITWISE_OR byte SYMBOL_XOR byte EXPRESSION_BITWISE_XOR byte SYMBOL_AND byte EXPRESSION_BITWISE_AND byte SYMBOL_EQ_EQ byte EXPRESSION_EQ byte SYMBOL_NOT_EQ byte EXPRESSION_NEQ byte SYMBOL_LT byte EXPRESSION_LT byte SYMBOL_GT byte EXPRESSION_GT byte SYMBOL_LT_EQ byte EXPRESSION_LEQ byte SYMBOL_GT_EQ byte EXPRESSION_GEQ byte SYMBOL_LSHIFT byte EXPRESSION_LSHIFT byte SYMBOL_RSHIFT byte EXPRESSION_RSHIFT byte SYMBOL_PLUS byte EXPRESSION_ADD byte SYMBOL_MINUS byte EXPRESSION_SUB byte SYMBOL_TIMES byte EXPRESSION_MUL byte SYMBOL_DIV byte EXPRESSION_DIV byte SYMBOL_PERCENT byte EXPRESSION_REMAINDER byte SYMBOL_ARROW byte EXPRESSION_ARROW byte SYMBOL_DOT byte EXPRESSION_DOT byte SYMBOL_LSQUARE byte EXPRESSION_SUBSCRIPT byte 0 byte 0 function binop_symbol_to_expression_type argument op local p p = .binop_table :binop_symbol_to_expression_type_loop if *1p == op goto binop_symbol_to_expression_type_found p += 2 if *1p != 0 goto binop_symbol_to_expression_type_loop return 0 :binop_symbol_to_expression_type_found p += 1 return *1p function binop_expression_type_to_symbol argument exprtype local p p = .binop_table :binop_expr2symb_type_loop p += 1 if *1p == exprtype goto binop_expr2symb_type_found p += 1 if *1p != 0 goto binop_expr2symb_type_loop return 0 :binop_expr2symb_type_found p -= 1 return *1p function int_suffix_to_type argument suffix if suffix == NUMBER_SUFFIX_L goto return_type_long if suffix == NUMBER_SUFFIX_U goto return_type_unsigned_int if suffix == NUMBER_SUFFIX_UL goto return_type_unsigned_long goto return_type_int function float_suffix_to_type argument suffix if suffix == NUMBER_SUFFIX_F goto return_type_float goto return_type_double ; smallest integer type which can fit this value, only using unsigned if necessary function int_value_to_type argument value if value [ 0x80000000 goto return_type_int if value [ 0x8000000000000000 goto return_type_long goto return_type_unsigned_long ; returns pointer to end of expression function print_expression argument expression local c local b local p p = expression + 4 if *4p == 0 goto print_expr_skip_type putc(40) print_type(*4p) putc(41) :print_expr_skip_type c = *1expression if c == EXPRESSION_CONSTANT_INT goto print_expr_int if c == EXPRESSION_CONSTANT_FLOAT goto print_expr_float if c == EXPRESSION_STRING_LITERAL goto print_expr_str if c == EXPRESSION_POST_INCREMENT goto print_post_increment if c == EXPRESSION_POST_DECREMENT goto print_post_decrement if c == EXPRESSION_DOT goto print_expr_dot if c == EXPRESSION_ARROW goto print_expr_arrow b = binop_expression_type_to_symbol(c) if b != 0 goto print_expr_binop puts(.str_print_bad_expr) exit(1) :str_print_bad_expr string Bad expression passed to print_expression. byte 10 byte 0 :print_expr_int expression += 8 putn(*8expression) expression += 8 return expression :print_expr_float expression += 8 putx64(*8expression) expression += 8 return expression :print_expr_str expression += 8 putc('0) putc('x) putx32(*8expression) expression += 8 return expression :print_expr_binop putc(40) expression += 8 expression = print_expression(expression) ; 1st operand b = get_keyword_str(b) puts(b) expression = print_expression(expression) ; 2nd operand putc(41) return expression :print_expr_dot putc(40) expression += 8 p = *8expression expression += 8 expression = print_expression(expression) putc('.) puts(p) putc(41) return expression :print_expr_arrow putc(40) expression += 8 p = *8expression expression += 8 expression = print_expression(expression) puts(.str_arrow) puts(p) putc(41) return expression :print_post_increment putc(40) expression += 8 expression = print_expression(expression) putc('+) putc('+) putc(41) return expression :print_post_decrement putc(40) expression += 8 expression = print_expression(expression) putc('-) putc('-) putc(41) return expression ; NOTE: to make things easier, the format which this outputs isn't the same as C's, specifically we have ; *int for pointer to int and [5]int for array of 5 ints function print_type argument type local c :print_type_top c = types + type c = *1c if c == TYPE_VOID goto print_type_void if c == TYPE_CHAR goto print_type_char if c == TYPE_UNSIGNED_CHAR goto print_type_unsigned_char if c == TYPE_SHORT goto print_type_short if c == TYPE_UNSIGNED_SHORT goto print_type_unsigned_short if c == TYPE_INT goto print_type_int if c == TYPE_UNSIGNED_INT goto print_type_unsigned_int if c == TYPE_LONG goto print_type_long if c == TYPE_UNSIGNED_LONG goto print_type_unsigned_long if c == TYPE_FLOAT goto print_type_float if c == TYPE_DOUBLE goto print_type_double if c == TYPE_POINTER goto print_type_pointer if c == TYPE_ARRAY goto print_type_array if c == TYPE_STRUCT goto print_type_struct if c == TYPE_UNION goto print_type_union fputs(2, .str_bad_print_type) exit(1) :str_bad_print_type string Bad type passed to print_type. byte 10 byte 0 :print_type_void return puts(.str_void) :print_type_char return puts(.str_char) :print_type_unsigned_char return puts(.str_unsigned_char) :print_type_short return puts(.str_short) :print_type_unsigned_short return puts(.str_unsigned_short) :print_type_int return puts(.str_int) :print_type_unsigned_int return puts(.str_unsigned_int) :print_type_long return puts(.str_long) :print_type_unsigned_long return puts(.str_unsigned_long) :print_type_float return puts(.str_float) :print_type_double return puts(.str_double) :print_type_pointer putc('*) type += 1 goto print_type_top :print_type_array putc('[) type += 1 putn(*8type) ; UNALIGNED putc(']) type += 8 goto print_type_top :print_type_struct return puts(.str_struct) :print_type_union return puts(.str_union)