summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorpommicket <pommicket@gmail.com>2022-02-10 16:06:17 -0500
committerpommicket <pommicket@gmail.com>2022-02-10 16:06:17 -0500
commitd8bb5b8957f705c3b0489878b19463f52ffb95ed (patch)
tree26adc60807704bf14cba7d37d74e5c896da6b8f0
parentb88de92fc7276a1548abbd8d30ed83d554db4146 (diff)
first working executable!
-rw-r--r--05/codegen.b128
-rw-r--r--05/constants.b7
-rw-r--r--05/main.c6
-rw-r--r--README.md6
4 files changed, 134 insertions, 13 deletions
diff --git a/05/codegen.b b/05/codegen.b
index 2cd000c..304b612 100644
--- a/05/codegen.b
+++ b/05/codegen.b
@@ -1,10 +1,10 @@
; CALLING CONVENTION:
; Here is the process for calling a function:
; - the caller pushes the arguments on to the stack, from right to left
-; - the caller subtracts sizeof(return type) from rsp
+; - the caller subtracts sizeof(return type) from rsp, rounded up to the nearest 8 bytes
; - the caller calls the function
; - the caller stores away the return value
-; - the caller adds (sizeof(return type) + sizeof arg0 + ... + sizeof argn) to rsp
+; - the caller adds (sizeof(return type) + sizeof arg0 + ... + sizeof argn) to rsp - where each sizeof is rounded up to the nearest 8 bytes
; STACK LAYOUT:
; arg n
; ...
@@ -117,6 +117,45 @@ function emit_ret
code_output += 1
return
+function emit_mov_qword_rsp_plus_imm32_rax
+ argument imm32
+ ; 48 89 84 24 IMM32
+ *4code_output = 0x24848948
+ code_output += 4
+ *4code_output = imm32
+ code_output += 4
+ return
+
+function emit_mov_rax_qword_rsp_plus_imm32
+ argument imm32
+ ; 48 8b 84 24 IMM32
+ *4code_output = 0x24848b48
+ code_output += 4
+ *4code_output = imm32
+ code_output += 4
+ return
+
+function emit_mov_rax_imm64
+ argument imm64
+ ; 48 b8 IMM64
+ *2code_output = 0xb848
+ code_output += 2
+ *8code_output = imm64
+ code_output += 8
+ return
+
+function emit_call_rax
+ ; ff d0
+ *2code_output = 0xd0ff
+ code_output += 2
+ return
+
+function emit_syscall
+ ; 0f 05
+ *2code_output = 0x050f
+ code_output += 2
+ return
+
; make sure you put the return value in the proper place before calling this
function generate_return
emit_mov_reg(REG_RSP, REG_RBP)
@@ -193,7 +232,63 @@ function generate_functions
byte 32
byte 0
+; emit ELF header and code.
function generate_code
+
+ code_output = output_file_data
+ emit_qword(0x00010102464c457f) ; elf identifier, 64-bit little endian, ELF version 1
+ emit_qword(0) ; reserved
+ emit_word(2) ; executable file
+ emit_word(0x3e) ; architecture x86-64
+ emit_dword(1) ; ELF version 1
+ emit_qword(ENTRY_ADDR) ; entry point
+ emit_qword(0x40) ; program header table offset
+ emit_qword(0) ; section header table offset
+ emit_dword(0) ; flags
+ emit_word(0x40) ; size of header
+ emit_word(0x38) ; size of program header
+ emit_word(3) ; # of program headers = 3 (code, rwdata, rodata)
+ emit_word(0) ; size of section header
+ emit_word(0) ; # of section headers
+ emit_word(0) ; index of .shstrtab
+
+ ; from /usr/include/elf.h:
+ ;#define PF_X (1 << 0) /* Segment is executable */
+ ;#define PF_W (1 << 1) /* Segment is writable */
+ ;#define PF_R (1 << 2) /* Segment is readable */
+
+ ; program header 1 (code)
+ emit_dword(1) ; loadable segment
+ emit_dword(1) ; execute only
+ emit_qword(ENTRY_ADDR) ; offset in file
+ emit_qword(ENTRY_ADDR) ; virtual address
+ emit_qword(0) ; physical address
+ emit_qword(TOTAL_CODE_SIZE) ; size in executable file
+ emit_qword(TOTAL_CODE_SIZE) ; size when loaded into memory
+ emit_qword(4096) ; alignment
+
+ ; program header 2 (rodata)
+ emit_dword(1) ; loadable segment
+ emit_dword(4) ; read only
+ emit_qword(RODATA_ADDR) ; offset in file
+ emit_qword(RODATA_ADDR) ; virtual address
+ emit_qword(0) ; physical address
+ emit_qword(RODATA_SIZE) ; size in executable file
+ emit_qword(RODATA_SIZE) ; size when loaded into memory
+ emit_qword(4096) ; alignment
+
+ ; program header 3 (rwdata)
+ emit_dword(1) ; loadable segment
+ emit_dword(6) ; read/write
+ emit_qword(RWDATA_ADDR) ; offset in file
+ emit_qword(RWDATA_ADDR) ; virtual address
+ emit_qword(0) ; physical address
+ emit_qword(RWDATA_SIZE) ; size in executable file
+ emit_qword(RWDATA_SIZE) ; size when loaded into memory
+ emit_qword(4096) ; alignment
+
+
+
local p_func
code_output = output_file_data + FUNCTIONS_ADDR
codegen_second_pass = 0
@@ -209,9 +304,32 @@ function generate_code
; on entry, we will have:
; argc = *rsp
; argv = rsp + 8
-
-
- ; @TODO
+ code_output = output_file_data + ENTRY_ADDR
+ ; add rsp, 8
+ emit_add_rsp_imm32(8)
+ ; mov rax, rsp (set rax to argv)
+ emit_mov_reg(REG_RAX, REG_RSP)
+ ; sub rsp, 32 (undo add rsp, 8 from before and add space for argv, argc, return value)
+ emit_sub_rsp_imm32(32)
+ ; mov [rsp+16], rax (put argv in the right place)
+ emit_mov_qword_rsp_plus_imm32_rax(16)
+ ; mov rax, [rsp+24] (set rax to argc)
+ emit_mov_rax_qword_rsp_plus_imm32(24)
+ ; mov [rsp+8], rax (put argc in the right place)
+ emit_mov_qword_rsp_plus_imm32_rax(8)
+ ; mov rax, main
+ emit_mov_rax_imm64(main_addr)
+ ; call rax
+ emit_call_rax()
+ ; mov rax, [rsp]
+ emit_mov_rax_qword_rsp_plus_imm32(0)
+ ; mov rdi, rax
+ emit_mov_reg(REG_RDI, REG_RAX)
+ ; mov rax, 0x3c (SYS_exit)
+ emit_mov_rax_imm64(0x3c)
+ ; syscall
+ emit_syscall()
+
return
:no_main_function
die(.str_no_main_function)
diff --git a/05/constants.b b/05/constants.b
index c6555ee..67825a3 100644
--- a/05/constants.b
+++ b/05/constants.b
@@ -1,15 +1,18 @@
; this is the format of the executables we produce:
; elf header 2MB addresses 0x000000-0x200000 (no, it won't actually take up that much space)
; entry point 2MB addresses 0x200000-0x3fffff this is where we put the code to call main(), etc. (again, it won't actually take up that much space)
-; code (functions) 4MB addresses 0x400000-0x7fffff
+; functions 4MB addresses 0x400000-0x7fffff
; read-only data 4MB addresses 0x800000-0xbfffff
; read-write data 4MB addresses 0xc00000-0xffffff
; note that file offsets and runtime addresses are the same.
-; you should be able to change these constants without breaking anything:
+; you should be able to change these constants (in a way that's consistent) without breaking anything:
#define ENTRY_ADDR 0x200000
#define FUNCTIONS_ADDR 0x400000
+#define TOTAL_CODE_SIZE 0x600000
#define RODATA_ADDR 0x800000
+#define RODATA_SIZE 0x400000
#define RWDATA_ADDR 0xc00000
+#define RWDATA_SIZE 0x400000
#define RWDATA_END 0x1000000
#define EXECUTABLE_SIZE 0x1000000
diff --git a/05/main.c b/05/main.c
index 059291f..98cfb19 100644
--- a/05/main.c
+++ b/05/main.c
@@ -13,8 +13,6 @@ struct A { struct B *blah; }
struct B { struct A *blah; }
*/
-int main(int argc, char **Argv) {
- int i,j;
- Argv+argc+i;
- j;
+int main(int argc, char **argv) {
+ argv+argc;
}
diff --git a/README.md b/README.md
index 982d431..3430546 100644
--- a/README.md
+++ b/README.md
@@ -130,12 +130,14 @@ ax bx cx dx sp bp si di
│ mov al, [rbx] │ 8a 03 │ load 1 byte from addrress rbx into al │
│ mov rax, [rbp+IMM32] │ 48 8b 85 IMM32 │ load 8 bytes from address rbp+IMM32 │
│ │ │ into rax (note: IMM32 may be negative) │
-│ lea rax, [rbp+IMM32] │ 48 8d 85 IMM32 │ set rax to rbp+IMM32 │
-│ lea rsp, [rbp+IMM32] │ 48 8d a5 IMM32 │ set rsp to rbp+IMM32 │
+│ mov rax, [rsp+IMM32] │ 48 8b 84 24 IMM32 │ load 8 bytes from address rsp+IMM32 │
+│ │ │ into rax (note: IMM32 may be negative) │
│ mov [rbp+IMM32], rax │ 48 89 85 IMM32 │ store rax in 8 bytes at rbp+IMM32 │
│ mov [rsp+IMM32], rax │ 48 89 84 24 IMM32 │ store rax in 8 bytes at rsp+IMM32 │
│ mov [rsp], rbp │ 48 89 2c 24 │ store rbp in 8 bytes at rsp │
│ mov rbp, [rsp] │ 48 8b 2c 24 │ load 8 bytes from rsp into rbp │
+│ lea rax, [rbp+IMM32] │ 48 8d 85 IMM32 │ set rax to rbp+IMM32 │
+│ lea rsp, [rbp+IMM32] │ 48 8d a5 IMM32 │ set rsp to rbp+IMM32 │
│ neg rax │ 48 f7 d8 │ set rax to -rax │
│ add rax, rbx │ 48 01 d8 │ add rbx to rax │
│ sub rax, rbx │ 48 29 d8 │ subtract rbx from rax │