summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--05/codegen.b195
-rw-r--r--05/constants.b7
-rw-r--r--05/main.b4
-rw-r--r--05/main.c5
-rw-r--r--README.md3
5 files changed, 210 insertions, 4 deletions
diff --git a/05/codegen.b b/05/codegen.b
new file mode 100644
index 0000000..1507e6d
--- /dev/null
+++ b/05/codegen.b
@@ -0,0 +1,195 @@
+; CALLING CONVENTION:
+; arguments are pushed onto the stack by the caller, from right to left
+; caller must also reserve space on stack for return value
+; so the function puts the return value at [rbp+8] (+8 for stored return address)
+
+
+
+global code_output
+global codegen_second_pass ; = 0 on first global pass, 1 on second global pass
+global functions_addresses ; ident list of addresses
+global functions_labels ; ident list of ident lists of label addresses
+global curr_function_labels ; ident list of labels for current function (written to in 1st pass, read from in 2nd pass)
+
+#define REG_RAX 0
+#define REG_RBX 3
+#define REG_RCX 1
+#define REG_RDX 2
+#define REG_RSP 4
+#define REG_RBP 5
+#define REG_RSI 6
+#define REG_RDI 7
+
+function emit_byte
+ argument byte
+ *1code_output = byte
+ code_output += 1
+ return
+
+function emit_bytes
+ argument bytes
+ argument count
+ memcpy(code_output, bytes, count)
+ code_output += count
+ return
+
+function emit_word
+ argument word
+ *2code_output = word
+ code_output += 2
+ return
+
+function emit_dword
+ argument word
+ *4code_output = word
+ code_output += 4
+ return
+
+function emit_qword
+ argument word
+ *8code_output = word
+ code_output += 8
+ return
+
+; e.g. emit_mov_reg(REG_RAX, REG_RBX) emits mov rax, rbx
+function emit_mov_reg
+ argument dest
+ argument src
+ local n
+
+ ;48 89 (DEST|SRC<<3|0xc0)
+ *2code_output = 0x8948
+ code_output += 2
+ n = 0xc0 | dest
+ n |= src < 3
+ *1code_output = n
+ code_output += 1
+ return
+
+
+function emit_sub_rsp_imm32
+ argument imm32
+ ;48 81 ec IMM32
+ *2code_output = 0x8148
+ code_output += 2
+ *1code_output = 0xec
+ code_output += 1
+ *4code_output = imm32
+ code_output += 4
+ return
+
+function emit_mov_qword_rsp_rbp
+ ; 48 89 2c 24
+ *4code_output = 0x242c8948
+ code_output += 4
+ return
+
+function emit_mov_rbp_qword_rsp
+ ; 48 8b 2c 24
+ *4code_output = 0x242c8b48
+ code_output += 4
+ return
+
+function emit_add_rsp_imm32
+ argument imm32
+ ;48 81 c4 IMM32
+ *2code_output = 0x8148
+ code_output += 2
+ *1code_output = 0xc4
+ code_output += 1
+ *4code_output = imm32
+ code_output += 4
+ return
+
+function emit_ret
+ *1code_output = 0xc3
+ code_output += 1
+ return
+
+; make sure you put the return value in the proper place before calling this
+function generate_return
+ emit_mov_reg(REG_RSP, REG_RBP)
+ emit_mov_rbp_qword_rsp()
+ emit_add_rsp_imm32(8)
+ emit_ret()
+ return
+
+function generate_statement
+ argument statement
+ ; @TODO
+ return
+
+function generate_function
+ argument function_name
+ argument function_statement
+ local out0
+
+ if codegen_second_pass != 0 goto genf_second_pass
+ curr_function_labels = ident_list_create(4000) ; ~ 200 labels per function should be plenty
+ ident_list_add(functions_labels, function_name, curr_function_labels)
+ goto genf_cont
+ :genf_second_pass
+ curr_function_labels = ident_list_lookup(functions_labels, function_name)
+ :genf_cont
+
+ ; prologue
+ emit_sub_rsp_imm32(8)
+ emit_mov_qword_rsp_rbp()
+ emit_mov_reg(REG_RBP, REG_RSP)
+
+ generate_statement(function_statement)
+
+ ; implicit return at end of function
+ generate_return()
+
+ return
+
+function generate_functions
+ local addr
+ local c
+ local p
+ local function_name
+
+ function_name = function_statements
+
+ :genfunctions_loop
+ if *1function_name == 0 goto genfunctions_loop_end
+ addr = code_output - output_file_data ; address of this function
+ if codegen_second_pass != 0 goto genfs_check_addr
+ ; first pass; record address of function
+ ident_list_add(functions_addresses, function_name, addr)
+ goto genfs_cont
+ :genfs_check_addr
+ c = ident_list_lookup(functions_addresses, function_name)
+ if c != addr goto function_addr_mismatch
+ goto genfs_cont
+ :genfs_cont
+ p = memchr(function_name, 0)
+ p += 1
+ generate_function(function_name, p)
+ function_name = p + 8
+ goto genfunctions_loop
+ :genfunctions_loop_end
+ return
+
+ :function_addr_mismatch
+ ; address of function on 2nd pass doesn't line up with 1st pass
+ fputs(2, .str_function_addr_mismatch)
+ fputs(2, function_name)
+ exit(1)
+ :str_function_addr_mismatch
+ string Function address on first pass doesn't match 2nd pass:
+ byte 32
+ byte 0
+
+function generate_code
+ local p_func
+ code_output = output_file_data + FUNCTIONS_ADDR
+ codegen_second_pass = 0
+ generate_functions()
+ code_output = output_file_data + FUNCTIONS_ADDR
+ codegen_second_pass = 1
+ generate_functions()
+ ; generate code at the entry point of the executable
+ ; @TODO
+ return
diff --git a/05/constants.b b/05/constants.b
index 68e3777..b719375 100644
--- a/05/constants.b
+++ b/05/constants.b
@@ -1,10 +1,13 @@
; this is the format of the executables we produce:
-; elf header 4MB addresses 0x000000-0x400000 (no, it won't actually take up that much space)
-; code 4MB addresses 0x400000-0x7fffff
+; elf header 2MB addresses 0x000000-0x200000 (no, it won't actually take up that much space)
+; entry point 2MB addresses 0x200000-0x3fffff this is where we put the code to call main(), etc. (again, it won't actually take up that much space)
+; code (functions) 4MB addresses 0x400000-0x7fffff
; read-only data 4MB addresses 0x800000-0xbfffff
; read-write data 4MB addresses 0xc00000-0xffffff
; note that file offsets and runtime addresses are the same.
; you should be able to change these constants without breaking anything:
+#define ENTRY_ADDR 0x200000
+#define FUNCTIONS_ADDR 0x400000
#define RODATA_ADDR 0x800000
#define RWDATA_ADDR 0xc00000
#define RWDATA_END 0x1000000
diff --git a/05/main.b b/05/main.b
index aa20095..b94dc45 100644
--- a/05/main.b
+++ b/05/main.b
@@ -81,6 +81,7 @@ global function_param_has_no_name
#include preprocess.b
#include tokenize.b
#include parse.b
+#include codegen.b
function types_init
argument _types
@@ -235,6 +236,8 @@ function main
structure_locations = ident_list_create(2000000)
global_variables = ident_list_create(400000)
function_statements = ident_list_create(800000)
+ functions_addresses = ident_list_create(800000)
+ functions_labels = ident_list_create(800000)
function_types = ident_list_create(800000)
function_stmt_data = malloc(800000) ; should be at least 40 bytes * max # of functions
@@ -285,6 +288,7 @@ function main
; NOTE: do NOT free pptokens; identifiers still reference them.
parse_tokens(tokens)
+ generate_code()
p = output_file_data + RODATA_ADDR
munmap(output_file_data, RWDATA_END)
diff --git a/05/main.c b/05/main.c
index b7e07be..56af630 100644
--- a/05/main.c
+++ b/05/main.c
@@ -1,5 +1,3 @@
-#include "tests/parse_stb_truetype.h"
-
/*
; @NONSTANDARD:
; the following does not work:
@@ -14,3 +12,6 @@ This needs to be fixed because otherwise you can't do:
struct A { struct B *blah; }
struct B { struct A *blah; }
*/
+
+int main(void) {
+}
diff --git a/README.md b/README.md
index 02134d9..982d431 100644
--- a/README.md
+++ b/README.md
@@ -105,6 +105,9 @@ In the table below, `IMM64` means a 64-bit *immediate* (a constant number).
`rdx:rax` refers to the 128-bit number you get by combining `rdx` and `rax`.
```
+ax bx cx dx sp bp si di
+0 3 1 2 4 5 6 7
+
┌──────────────────────┬───────────────────┬────────────────────────────────────────┐
│ Instruction │ Encoding │ Description │
├──────────────────────┼───────────────────┼────────────────────────────────────────┤