From 017a70ee7e7d4df408c582789e15e632e8266993 Mon Sep 17 00:00:00 2001 From: pommicket Date: Thu, 10 Feb 2022 21:09:52 -0500 Subject: generating code for casts! --- 05/codegen.b | 400 +++++++++++++++++++++++++++++++++++++++++++++++++------- 05/main.b | 50 +++---- 05/main.c | 2 +- 05/parse.b | 22 ++-- 05/preprocess.b | 8 +- 05/tokenize.b | 4 +- 05/util.b | 16 +-- 7 files changed, 405 insertions(+), 97 deletions(-) (limited to '05') diff --git a/05/codegen.b b/05/codegen.b index 1f0d077..2b0ec29 100644 --- a/05/codegen.b +++ b/05/codegen.b @@ -21,6 +21,7 @@ global codegen_second_pass ; = 0 on first global pass, 1 on second global pass global functions_addresses ; ident list of addresses global functions_labels ; ident list of ident lists of label addresses global curr_function_labels ; ident list of labels for current function (written to in 1st pass, read from in 2nd pass) +global curr_function_return_type #define REG_RAX 0 #define REG_RBX 3 @@ -76,7 +77,72 @@ function emit_mov_reg *1code_output = n code_output += 1 return - + +function emit_mov_rax_imm64 + argument imm64 + ; 48 b8 IMM64 + *2code_output = 0xb848 + code_output += 2 + *8code_output = imm64 + code_output += 8 + return + +function emit_movsx_rax_al + ; 48 0f be c0 + *4code_output = 0xc0be0f48 + code_output += 4 + return + +function emit_movsx_rax_ax + ; 48 0f bf c0 + *4code_output = 0xc0bf0f48 + code_output += 4 + return + +function emit_movsx_rax_eax + ; 48 63 c0 + *2code_output = 0x6348 + code_output += 2 + *1code_output = 0xc0 + code_output += 1 + return + +function emit_movzx_rax_al + ; 48 0f b6 c0 + *4code_output = 0xc0b60f48 + code_output += 4 + return + +function emit_movzx_rax_ax + ; 48 0f b7 c0 + *4code_output = 0xc0b70f48 + code_output += 4 + return + +function emit_mov_eax_eax + ; 89 c0 + *2code_output = 0xc089 + code_output += 2 + return + +function emit_mov_qword_rsp_plus_imm32_rax + argument imm32 + ; 48 89 84 24 IMM32 + *4code_output = 0x24848948 + code_output += 4 + *4code_output = imm32 + code_output += 4 + return + +function emit_mov_rax_qword_rsp_plus_imm32 + argument imm32 + ; 48 8b 84 24 IMM32 + *4code_output = 0x24848b48 + code_output += 4 + *4code_output = imm32 + code_output += 4 + return + function emit_sub_rsp_imm32 argument imm32 @@ -117,33 +183,6 @@ function emit_ret code_output += 1 return -function emit_mov_qword_rsp_plus_imm32_rax - argument imm32 - ; 48 89 84 24 IMM32 - *4code_output = 0x24848948 - code_output += 4 - *4code_output = imm32 - code_output += 4 - return - -function emit_mov_rax_qword_rsp_plus_imm32 - argument imm32 - ; 48 8b 84 24 IMM32 - *4code_output = 0x24848b48 - code_output += 4 - *4code_output = imm32 - code_output += 4 - return - -function emit_mov_rax_imm64 - argument imm64 - ; 48 b8 IMM64 - *2code_output = 0xb848 - code_output += 2 - *8code_output = imm64 - code_output += 8 - return - function emit_call_rax ; ff d0 *2code_output = 0xd0ff @@ -185,6 +224,59 @@ function emit_movsq code_output += 2 return +function emit_movss_xmm0_dword_rax + ; f3 0f 10 00 + *4code_output = 0x00100ff3 + code_output += 4 + return + +function emit_movsd_xmm0_qword_rax + ; f2 0f 10 00 + *4code_output = 0x00100ff2 + code_output += 4 + return + +function emit_movss_dword_rax_xmm0 + ; f3 0f 11 00 + *4code_output = 0x00110ff3 + code_output += 4 + return + +function emit_movsd_qword_rax_xmm0 + ; f2 0f 11 00 + *4code_output = 0x00110ff2 + code_output += 4 + return + +function emit_cvtss2sd_xmm0_xmm0 + ; f3 0f 5a c0 + *4code_output = 0xc05a0ff3 + code_output += 4 + return + +function emit_cvtsd2ss_xmm0_xmm0 + ; f2 0f 5a c0 + *4code_output = 0xc05a0ff2 + code_output += 4 + return + +function emit_cvttsd2si_rax_xmm0 + ; f2 48 0f 2c c0 + *4code_output = 0x2c0f48f2 + code_output += 4 + *1code_output = 0xc0 + code_output += 1 + return + +function emit_cvtsi2sd_xmm0_rax + ; f2 48 0f 2a c0 + *4code_output = 0x2a0f48f2 + code_output += 4 + *1code_output = 0xc0 + code_output += 1 + return + + ; make sure you put the return value in the proper place before calling this function generate_return emit_mov_reg(REG_RSP, REG_RBP) @@ -193,24 +285,6 @@ function generate_return emit_ret() return -; returns pointer to end of expression -function generate_push_expression - argument expr - local c - c = *1expr - if c == EXPRESSION_CONSTANT_INT goto generate_push_int - - die(.str_genpushexprNI) - :str_genpushexprNI - string generate_push_expression not implemented. - byte 0 - :generate_push_int - expr += 8 - emit_mov_rax_imm64(*8expr) - emit_push_rax() - expr += 8 - return expr - ; copy sizeof(type) bytes, rounded up to the nearest 8, from rsi to rdi function generate_copy_rsi_to_rdi_qwords argument type @@ -230,6 +304,225 @@ function generate_copy_rsi_to_rdi_qwords emit_movsq() return +; cast whatever was just pushed onto the stack from from_type to to_type +; `statement` is used for errors +function generate_cast_top_of_stack + argument statement + argument from_type + argument to_type + local from + local to + local c + local d + + from = types + from_type + to = types + to_type + + if *1to == TYPE_VOID goto return_0 ; cast to void my ass + if *1from == TYPE_VOID goto bad_gen_cast ; cast from void to something - that's bad + if *1from == TYPE_ARRAY goto bad_gen_cast ; cast array (this probably won't ever happen because of decaying) + if *1to == TYPE_ARRAY goto bad_gen_cast ; cast to array + if *1from == TYPE_FUNCTION goto bad_gen_cast ; shouldn't happen + if *1to == TYPE_FUNCTION goto bad_gen_cast ; shouldn't happen + if *1to == TYPE_STRUCT goto gen_cast_to_struct + if *1from == TYPE_STRUCT goto bad_gen_cast ; cast from struct to something else + if *1to < TYPE_FLOAT goto gen_cast_to_integer + if *1to == TYPE_POINTER goto gen_cast_to_integer ; pointers are basically integers + + ; cast to float/double + if *1from == TYPE_POINTER goto bad_gen_cast ; pointer to float/double + if *1to == *1from goto return_0 + if *1from == TYPE_DOUBLE goto gen_cast_double_to_float + if *1from == TYPE_FLOAT goto gen_cast_float_to_double + ; int to float/double + if *1to == TYPE_FLOAT goto gen_cast_int_to_float + if *1to == TYPE_DOUBLE goto gen_cast_int_to_double + + goto bad_gen_cast ; in theory we shouldn't get here + + :gen_cast_to_integer + if *1from == *1to goto return_0 ; casting from type to same type + if *1from == TYPE_POINTER goto return_0 ; no need to do anything + ; cast float/double to integer + if *1from == TYPE_FLOAT goto gen_cast_float_to_int + if *1from == TYPE_DOUBLE goto gen_cast_double_to_int + + c = type_sizeof(*1from) + d = type_sizeof(*1to) + if d > c goto return_0 ; casting to bigger type, so we're good + if c == 8 goto return_0 ; casting from unsigned/signed long to unsigned/signed long, we're good + + ; mov rax, [rsp] + emit_mov_rax_qword_rsp_plus_imm32(0) + + ; now sign/zero extend the lower part of rax to the whole of rax + if *1to == TYPE_CHAR goto gen_cast_integer_to_signed_char + if *1to == TYPE_UNSIGNED_CHAR goto gen_cast_integer_to_unsigned_char + if *1to == TYPE_SHORT goto gen_cast_integer_to_signed_short + if *1to == TYPE_UNSIGNED_SHORT goto gen_cast_integer_to_unsigned_short + if *1to == TYPE_INT goto gen_cast_integer_to_signed_int + if *1to == TYPE_UNSIGNED_INT goto gen_cast_integer_to_unsigned_int + + goto bad_gen_cast ; in theory we shouldn't get here + + :int2int_cast_cont + ; mov [rsp], rax + emit_mov_qword_rsp_plus_imm32_rax(0) + return + + :gen_cast_integer_to_signed_char + emit_movsx_rax_al() + goto int2int_cast_cont + :gen_cast_integer_to_unsigned_char + emit_movzx_rax_al() + goto int2int_cast_cont + :gen_cast_integer_to_signed_short + emit_movsx_rax_ax() + goto int2int_cast_cont + :gen_cast_integer_to_unsigned_short + emit_movzx_rax_ax() + goto int2int_cast_cont + :gen_cast_integer_to_signed_int + emit_movsx_rax_eax() + goto int2int_cast_cont + :gen_cast_integer_to_unsigned_int + emit_mov_eax_eax() + goto int2int_cast_cont + :gen_cast_to_struct + ; this is necessary because we add an implicit cast for return values + ; so if we didn't have this, we wouldn't be able to return structs. + if *1from != TYPE_STRUCT goto bad_gen_cast + from += 1 + to += 1 + if *8from != *8to goto bad_gen_cast + return ; no casting needed; these are the same type + :gen_cast_double_to_float + ; mov rax, rsp + emit_mov_reg(REG_RAX, REG_RSP) + ; movsd xmm0, [rax] + emit_movsd_xmm0_qword_rax() + ; cvtsd2ss xmm0, xmm0 + emit_cvtsd2ss_xmm0_xmm0() + ; movsd [rax], xmm0 + emit_movsd_qword_rax_xmm0() + return + :gen_cast_float_to_double + ; mov rax, rsp + emit_mov_reg(REG_RAX, REG_RSP) + ; movss xmm0, [rax] + emit_movss_xmm0_dword_rax() + ; cvtss2sd xmm0, xmm0 + emit_cvtss2sd_xmm0_xmm0() + ; movss [rax], xmm0 + emit_movss_dword_rax_xmm0() + return + :gen_cast_int_to_float + ; to reduce # of instructions, we first convert int to double, then double to float + ; mov rax, [rsp] + emit_mov_rax_qword_rsp_plus_imm32(0) + ; cvtsi2sd xmm0, rax + emit_cvtsi2sd_xmm0_rax() + ; cvtsd2ss xmm0, xmm0 + emit_cvtsd2ss_xmm0_xmm0() + ; mov rax, rsp + emit_mov_reg(REG_RAX, REG_RSP) + ; movss [rax], xmm0 + emit_movss_dword_rax_xmm0() + ; it shouldn't matter that there's junk at [rsp+4] + return + :gen_cast_int_to_double + ; mov rax, [rsp] + emit_mov_rax_qword_rsp_plus_imm32(0) + ; cvtsi2sd xmm0, rax + emit_cvtsi2sd_xmm0_rax() + ; mov rax, rsp + emit_mov_reg(REG_RAX, REG_RSP) + ; movsd [rax], xmm0 + emit_movsd_qword_rax_xmm0() + return + :gen_cast_float_to_int + ; mov rax, rsp + emit_mov_reg(REG_RAX, REG_RSP) + ; movss xmm0, [rax] + emit_movss_xmm0_dword_rax() + ; convert float to double, then double to int + ; cvtss2sd xmm0, xmm0 + emit_cvtss2sd_xmm0_xmm0() + ; cvttsd2si rax, xmm0 + emit_cvttsd2si_rax_xmm0() + ; mov [rsp], rax + emit_mov_qword_rsp_plus_imm32_rax(0) + return + :gen_cast_double_to_int + ; mov rax, rsp + emit_mov_reg(REG_RAX, REG_RSP) + ; movsd xmm0, [rax] + emit_movsd_xmm0_qword_rax() + ; cvttsd2si rax, xmm0 + emit_cvttsd2si_rax_xmm0() + ; mov [rsp], rax + emit_mov_qword_rsp_plus_imm32_rax(0) + return + + :bad_gen_cast + print_statement_location(statement) + puts(.str_bad_gen_cast1) + print_type(from_type) + puts(.str_bad_gen_cast2) + print_type(to_type) + putc(10) + exit(1) + :str_bad_gen_cast1 + string : Error: Cannot convert type + byte 32 + byte 0 + :str_bad_gen_cast2 + string to type + byte 32 + byte 0 +; `statement` is used for errors +; returns pointer to end of expression +function generate_push_expression + argument statement + argument expr + local b + local c + local type + type = expr + 4 + type = *4type + + c = *1expr + if c == EXPRESSION_CONSTANT_INT goto generate_push_int + if c == EXPRESSION_CONSTANT_FLOAT goto generate_push_float + if c == EXPRESSION_CAST goto generate_cast + + die(.str_genpushexprNI) + :str_genpushexprNI + string generate_push_expression not implemented. + byte 0 + :generate_cast + expr += 4 + c = *4expr ; cast type + expr += 8 + b = *4expr ; original type + expr -= 4 + expr = generate_push_expression(statement, expr) + generate_cast_top_of_stack(statement, b, c) + return expr + :generate_push_float + expr += 8 + emit_mov_rax_imm64(*8expr) + emit_push_rax() + generate_cast_top_of_stack(statement, TYPE_DOUBLE, type) + expr += 8 + return expr + :generate_push_int + expr += 8 + emit_mov_rax_imm64(*8expr) + emit_push_rax() + expr += 8 + return expr + function generate_statement argument statement local dat1 @@ -268,22 +561,29 @@ function generate_statement return :gen_return if dat1 == 0 goto gen_return_noexpr - generate_push_expression(dat1) + generate_push_expression(statement, dat1) + p = dat1 + 4 ; pointer to dat1 type + generate_cast_top_of_stack(statement, *4p, curr_function_return_type) ; copy sizeof(return expression) rounded up to 8 bytes from [rsp] to [rbp+16] emit_mov_reg(REG_RSI, REG_RSP) emit_lea_rax_rbp_plus_imm32(16) emit_mov_reg(REG_RDI, REG_RAX) - p = dat1 + 4 generate_copy_rsi_to_rdi_qwords(*4p) :gen_return_noexpr generate_return() return + function generate_function argument function_name argument function_statement + local function_type local out0 + function_type = ident_list_lookup(function_types, function_name) + + curr_function_return_type = functype_return_type(function_type) + if codegen_second_pass != 0 goto genf_second_pass curr_function_labels = ident_list_create(4000) ; ~ 200 labels per function should be plenty ident_list_add(functions_labels, function_name, curr_function_labels) @@ -334,8 +634,8 @@ function generate_functions :function_addr_mismatch ; address of function on 2nd pass doesn't line up with 1st pass - fputs(2, .str_function_addr_mismatch) - fputs(2, function_name) + puts(.str_function_addr_mismatch) + puts(function_name) exit(1) :str_function_addr_mismatch string Function address on first pass doesn't match 2nd pass: diff --git a/05/main.b b/05/main.b index 0e356d7..7e570f5 100644 --- a/05/main.b +++ b/05/main.b @@ -108,27 +108,31 @@ function types_init p += 1 *8ptypes_bytes_used = p - types - return + return -function fprint_token_location - argument fd +function print_token_location argument token token += 2 - fprint_filename(fd, *2token) + print_filename(*2token) token += 2 - fputc(fd, ':) - fputn(fd, *4token) + putc(':) + putn(*4token) return +function print_statement_location + argument statement + ; statements & tokens have the same format for locations! + print_token_location(statement) + return + ; accepts EITHER file index OR pointer to filename -function fprint_filename - argument fd +function print_filename argument file if file ] 65535 goto print_filename_string file = file_get(file) ; (fallthrough) :print_filename_string - fputs(2, file) + puts(file) return ; accepts EITHER file index OR pointer to filename @@ -136,12 +140,12 @@ function compile_error argument file argument line argument message - fprint_filename(2, file) - fputc(2, ':) - fputn(2, line) - fputs(2, .str_error_prefix) - fputs(2, message) - fputc(2, 10) + print_filename(file) + putc(':) + putn(line) + puts(.str_error_prefix) + puts(message) + putc(10) exit(1) function token_error @@ -161,12 +165,12 @@ function compile_warning argument file argument line argument message - fprint_filename(2, file) - fputc(2, ':) - fputn(2, line) - fputs(2, .str_warning_prefix) - fputs(2, message) - fputc(2, 10) + print_filename(file) + putc(':) + putn(line) + puts(.str_warning_prefix) + puts(message) + putc(10) return :str_error_prefix @@ -306,7 +310,7 @@ function main byte 0 :mmap_output_fd_failed - fputs(2, .str_mmap_output_fd_failed) + puts(.str_mmap_output_fd_failed) exit(1) :str_mmap_output_fd_failed string Couldn't mmap output file. @@ -314,7 +318,7 @@ function main byte 0 :usage_error - fputs(2, .str_usage_error) + puts(.str_usage_error) exit(1) :str_usage_error diff --git a/05/main.c b/05/main.c index 5cbffbf..899e6b9 100644 --- a/05/main.c +++ b/05/main.c @@ -14,5 +14,5 @@ struct B { struct A *blah; } */ int main(int argc, char **argv) { - return 42; + return (float)(long)"hello"; } diff --git a/05/parse.b b/05/parse.b index 7f288ae..618045b 100644 --- a/05/parse.b +++ b/05/parse.b @@ -843,7 +843,9 @@ function parse_statement if *1token == SYMBOL_SEMICOLON goto return_no_expr n = token_next_semicolon_not_in_brackets(token) *8out = expressions_end + p = expressions_end + 4 ; type of expression expressions_end = parse_expression(token, n, expressions_end) + type_decay_array_to_pointer_in_place(*4p) token = n + 16 :return_no_expr out += 32 @@ -2400,7 +2402,7 @@ function type_length type += type_length(type) return type - start :type_length_not_function - fputs(2, .str_type_length_bad_type) + puts(.str_type_length_bad_type) exit(1) :str_type_length_bad_type string Bad type passed to type_length. This shouldn't happen. @@ -2606,7 +2608,7 @@ function parse_expression if c == EXPRESSION_DIV goto type_binary_usual if c == EXPRESSION_REMAINDER goto type_binary_usual_integer - fputs(2, .str_binop_this_shouldnt_happen) + puts(.str_binop_this_shouldnt_happen) exit(1) :str_binop_this_shouldnt_happen string Bad binop symbol (this shouldn't happen). @@ -2769,7 +2771,7 @@ function parse_expression if c == EXPRESSION_DEREFERENCE goto unary_dereference if c == EXPRESSION_PRE_INCREMENT goto unary_type_scalar_nopromote if c == EXPRESSION_PRE_DECREMENT goto unary_type_scalar_nopromote - fputs(2, .str_unop_this_shouldnt_happen) + puts(.str_unop_this_shouldnt_happen) exit(1) :str_unop_this_shouldnt_happen string Bad unary symbol (this shouldn't happen). @@ -2798,7 +2800,9 @@ function parse_expression *4out = a out += 4 p += 16 + a = out + 4 ; pointer to casted expression type out = parse_expression(p, tokens_end, out) + type_decay_array_to_pointer_in_place(*4a) return out :bad_cast token_error(tokens, .str_bad_cast) @@ -2840,7 +2844,7 @@ function parse_expression *4type = a return out :unary_bad_type - fprint_token_location(1, tokens) + print_token_location(tokens) puts(.str_unary_bad_type) print_type(a) putc(10) @@ -3244,7 +3248,7 @@ function type_sizeof if c == TYPE_ARRAY goto sizeof_array if c == TYPE_STRUCT goto sizeof_struct - fputs(2, .str_sizeof_bad) + puts(.str_sizeof_bad) exit(1) :str_sizeof_bad string type_sizeof bad type. @@ -3309,7 +3313,7 @@ function type_alignof if c == TYPE_ARRAY goto alignof_array if c == TYPE_STRUCT goto alignof_struct - fputs(2, .str_alignof_bad) + puts(.str_alignof_bad) exit(1) :str_alignof_bad string type_alignof bad type. @@ -3637,7 +3641,7 @@ function fit_to_type if c == TYPE_LONG goto fit_to_type_long if c == TYPE_UNSIGNED_LONG goto fit_to_type_ulong if c == TYPE_POINTER goto fit_to_type_ulong - fputs(2, .str_bad_fit_to_type) + puts(.str_bad_fit_to_type) exit(1) :str_bad_fit_to_type string Bad type passed to fit_to_type. @@ -3726,7 +3730,7 @@ function bad_types_to_operator argument type1 argument type2 - fprint_token_location(1, token) + print_token_location(token) puts(.str_bad_types_to_operator) print_type(type1) puts(.str_space_and_space) @@ -4276,7 +4280,7 @@ function print_type if c == TYPE_ARRAY goto print_type_array if c == TYPE_STRUCT goto print_type_struct if c == TYPE_FUNCTION goto print_type_function - fputs(2, .str_bad_print_type) + puts(.str_bad_print_type) putnln(type) putnln(c) putnln(types_bytes_used) diff --git a/05/preprocess.b b/05/preprocess.b index 31ab43e..9a7dd5c 100644 --- a/05/preprocess.b +++ b/05/preprocess.b @@ -603,10 +603,10 @@ function translation_phase_4 if b != 0 goto pp_directive_endif goto unrecognized_directive :pp_directive_error - fputs(2, filename) - fputc(2, ':) - fputn(2, line_number) - fputs(2, .str_directive_error) + puts(filename) + putc(':) + putn(line_number) + puts(.str_directive_error) exit(1) :str_directive_error string : #error diff --git a/05/tokenize.b b/05/tokenize.b index 841f7df..809c9ff 100644 --- a/05/tokenize.b +++ b/05/tokenize.b @@ -15,7 +15,7 @@ function file_get :file_got return p :file_uhoh - fputs(2, .str_bad_file_index) + puts(.str_bad_file_index) exit(1) :str_bad_file_index string Bad file index. This shouldn't happen. @@ -609,7 +609,7 @@ function print_tokens if *1p == TOKEN_STRING_LITERAL goto print_token_string_literal if *1p == TOKEN_IDENTIFIER goto print_token_identifier if *1p == TOKEN_EOF goto print_token_eof - fputs(2, .str_print_bad_token) + puts(.str_print_bad_token) exit(1) :print_token_keyword s = get_keyword_str(*1p) diff --git a/05/util.b b/05/util.b index 25ce686..42b6342 100644 --- a/05/util.b +++ b/05/util.b @@ -137,9 +137,9 @@ function max_signed function file_error argument name - fputs(2, .str_file_error) - fputs(2, name) - fputc(2, 10) + puts(.str_file_error) + puts(name) + putc(10) exit(1) :str_file_error @@ -149,8 +149,8 @@ function file_error function die argument message - fputs(2, message) - fputc(2, 10) + puts(message) + putc(10) exit(1) function ftruncate @@ -162,7 +162,7 @@ function ftruncate return :ftruncate_failed - fputs(2, .str_ftruncate_failed) + puts(.str_ftruncate_failed) exit(1) :str_ftruncate_failed string ftruncated failed. @@ -200,7 +200,7 @@ function malloc return memory + 8 :malloc_failed - fputs(2, .str_out_of_memory) + puts(.str_out_of_memory) exit(1) :str_out_of_memory @@ -768,7 +768,7 @@ function leftmost_1bit :leftmost1bit_found return i :leftmost1bit_0 - fputs(2, .str_leftmost1bit_0) + puts(.str_leftmost1bit_0) exit(1) :str_leftmost1bit_0 string 0 passed to leftmost_1bit. -- cgit v1.2.3