diff options
-rw-r--r-- | 05/codegen.b | 128 | ||||
-rw-r--r-- | 05/constants.b | 7 | ||||
-rw-r--r-- | 05/main.c | 6 | ||||
-rw-r--r-- | README.md | 6 |
4 files changed, 134 insertions, 13 deletions
diff --git a/05/codegen.b b/05/codegen.b index 2cd000c..304b612 100644 --- a/05/codegen.b +++ b/05/codegen.b @@ -1,10 +1,10 @@ ; CALLING CONVENTION: ; Here is the process for calling a function: ; - the caller pushes the arguments on to the stack, from right to left -; - the caller subtracts sizeof(return type) from rsp +; - the caller subtracts sizeof(return type) from rsp, rounded up to the nearest 8 bytes ; - the caller calls the function ; - the caller stores away the return value -; - the caller adds (sizeof(return type) + sizeof arg0 + ... + sizeof argn) to rsp +; - the caller adds (sizeof(return type) + sizeof arg0 + ... + sizeof argn) to rsp - where each sizeof is rounded up to the nearest 8 bytes ; STACK LAYOUT: ; arg n ; ... @@ -117,6 +117,45 @@ function emit_ret code_output += 1 return +function emit_mov_qword_rsp_plus_imm32_rax + argument imm32 + ; 48 89 84 24 IMM32 + *4code_output = 0x24848948 + code_output += 4 + *4code_output = imm32 + code_output += 4 + return + +function emit_mov_rax_qword_rsp_plus_imm32 + argument imm32 + ; 48 8b 84 24 IMM32 + *4code_output = 0x24848b48 + code_output += 4 + *4code_output = imm32 + code_output += 4 + return + +function emit_mov_rax_imm64 + argument imm64 + ; 48 b8 IMM64 + *2code_output = 0xb848 + code_output += 2 + *8code_output = imm64 + code_output += 8 + return + +function emit_call_rax + ; ff d0 + *2code_output = 0xd0ff + code_output += 2 + return + +function emit_syscall + ; 0f 05 + *2code_output = 0x050f + code_output += 2 + return + ; make sure you put the return value in the proper place before calling this function generate_return emit_mov_reg(REG_RSP, REG_RBP) @@ -193,7 +232,63 @@ function generate_functions byte 32 byte 0 +; emit ELF header and code. function generate_code + + code_output = output_file_data + emit_qword(0x00010102464c457f) ; elf identifier, 64-bit little endian, ELF version 1 + emit_qword(0) ; reserved + emit_word(2) ; executable file + emit_word(0x3e) ; architecture x86-64 + emit_dword(1) ; ELF version 1 + emit_qword(ENTRY_ADDR) ; entry point + emit_qword(0x40) ; program header table offset + emit_qword(0) ; section header table offset + emit_dword(0) ; flags + emit_word(0x40) ; size of header + emit_word(0x38) ; size of program header + emit_word(3) ; # of program headers = 3 (code, rwdata, rodata) + emit_word(0) ; size of section header + emit_word(0) ; # of section headers + emit_word(0) ; index of .shstrtab + + ; from /usr/include/elf.h: + ;#define PF_X (1 << 0) /* Segment is executable */ + ;#define PF_W (1 << 1) /* Segment is writable */ + ;#define PF_R (1 << 2) /* Segment is readable */ + + ; program header 1 (code) + emit_dword(1) ; loadable segment + emit_dword(1) ; execute only + emit_qword(ENTRY_ADDR) ; offset in file + emit_qword(ENTRY_ADDR) ; virtual address + emit_qword(0) ; physical address + emit_qword(TOTAL_CODE_SIZE) ; size in executable file + emit_qword(TOTAL_CODE_SIZE) ; size when loaded into memory + emit_qword(4096) ; alignment + + ; program header 2 (rodata) + emit_dword(1) ; loadable segment + emit_dword(4) ; read only + emit_qword(RODATA_ADDR) ; offset in file + emit_qword(RODATA_ADDR) ; virtual address + emit_qword(0) ; physical address + emit_qword(RODATA_SIZE) ; size in executable file + emit_qword(RODATA_SIZE) ; size when loaded into memory + emit_qword(4096) ; alignment + + ; program header 3 (rwdata) + emit_dword(1) ; loadable segment + emit_dword(6) ; read/write + emit_qword(RWDATA_ADDR) ; offset in file + emit_qword(RWDATA_ADDR) ; virtual address + emit_qword(0) ; physical address + emit_qword(RWDATA_SIZE) ; size in executable file + emit_qword(RWDATA_SIZE) ; size when loaded into memory + emit_qword(4096) ; alignment + + + local p_func code_output = output_file_data + FUNCTIONS_ADDR codegen_second_pass = 0 @@ -209,9 +304,32 @@ function generate_code ; on entry, we will have: ; argc = *rsp ; argv = rsp + 8 - - - ; @TODO + code_output = output_file_data + ENTRY_ADDR + ; add rsp, 8 + emit_add_rsp_imm32(8) + ; mov rax, rsp (set rax to argv) + emit_mov_reg(REG_RAX, REG_RSP) + ; sub rsp, 32 (undo add rsp, 8 from before and add space for argv, argc, return value) + emit_sub_rsp_imm32(32) + ; mov [rsp+16], rax (put argv in the right place) + emit_mov_qword_rsp_plus_imm32_rax(16) + ; mov rax, [rsp+24] (set rax to argc) + emit_mov_rax_qword_rsp_plus_imm32(24) + ; mov [rsp+8], rax (put argc in the right place) + emit_mov_qword_rsp_plus_imm32_rax(8) + ; mov rax, main + emit_mov_rax_imm64(main_addr) + ; call rax + emit_call_rax() + ; mov rax, [rsp] + emit_mov_rax_qword_rsp_plus_imm32(0) + ; mov rdi, rax + emit_mov_reg(REG_RDI, REG_RAX) + ; mov rax, 0x3c (SYS_exit) + emit_mov_rax_imm64(0x3c) + ; syscall + emit_syscall() + return :no_main_function die(.str_no_main_function) diff --git a/05/constants.b b/05/constants.b index c6555ee..67825a3 100644 --- a/05/constants.b +++ b/05/constants.b @@ -1,15 +1,18 @@ ; this is the format of the executables we produce: ; elf header 2MB addresses 0x000000-0x200000 (no, it won't actually take up that much space) ; entry point 2MB addresses 0x200000-0x3fffff this is where we put the code to call main(), etc. (again, it won't actually take up that much space) -; code (functions) 4MB addresses 0x400000-0x7fffff +; functions 4MB addresses 0x400000-0x7fffff ; read-only data 4MB addresses 0x800000-0xbfffff ; read-write data 4MB addresses 0xc00000-0xffffff ; note that file offsets and runtime addresses are the same. -; you should be able to change these constants without breaking anything: +; you should be able to change these constants (in a way that's consistent) without breaking anything: #define ENTRY_ADDR 0x200000 #define FUNCTIONS_ADDR 0x400000 +#define TOTAL_CODE_SIZE 0x600000 #define RODATA_ADDR 0x800000 +#define RODATA_SIZE 0x400000 #define RWDATA_ADDR 0xc00000 +#define RWDATA_SIZE 0x400000 #define RWDATA_END 0x1000000 #define EXECUTABLE_SIZE 0x1000000 @@ -13,8 +13,6 @@ struct A { struct B *blah; } struct B { struct A *blah; } */ -int main(int argc, char **Argv) { - int i,j; - Argv+argc+i; - j; +int main(int argc, char **argv) { + argv+argc; } @@ -130,12 +130,14 @@ ax bx cx dx sp bp si di │ mov al, [rbx] │ 8a 03 │ load 1 byte from addrress rbx into al │ │ mov rax, [rbp+IMM32] │ 48 8b 85 IMM32 │ load 8 bytes from address rbp+IMM32 │ │ │ │ into rax (note: IMM32 may be negative) │ -│ lea rax, [rbp+IMM32] │ 48 8d 85 IMM32 │ set rax to rbp+IMM32 │ -│ lea rsp, [rbp+IMM32] │ 48 8d a5 IMM32 │ set rsp to rbp+IMM32 │ +│ mov rax, [rsp+IMM32] │ 48 8b 84 24 IMM32 │ load 8 bytes from address rsp+IMM32 │ +│ │ │ into rax (note: IMM32 may be negative) │ │ mov [rbp+IMM32], rax │ 48 89 85 IMM32 │ store rax in 8 bytes at rbp+IMM32 │ │ mov [rsp+IMM32], rax │ 48 89 84 24 IMM32 │ store rax in 8 bytes at rsp+IMM32 │ │ mov [rsp], rbp │ 48 89 2c 24 │ store rbp in 8 bytes at rsp │ │ mov rbp, [rsp] │ 48 8b 2c 24 │ load 8 bytes from rsp into rbp │ +│ lea rax, [rbp+IMM32] │ 48 8d 85 IMM32 │ set rax to rbp+IMM32 │ +│ lea rsp, [rbp+IMM32] │ 48 8d a5 IMM32 │ set rsp to rbp+IMM32 │ │ neg rax │ 48 f7 d8 │ set rax to -rax │ │ add rax, rbx │ 48 01 d8 │ add rbx to rax │ │ sub rax, rbx │ 48 29 d8 │ subtract rbx from rax │ |