summaryrefslogtreecommitdiff
path: root/05/codegen.b
blob: 304b6124eac19699cdc9f2e68c66d1abc72cef44 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
; CALLING CONVENTION:
;  Here is the process for calling a function:
;     - the caller pushes the arguments on to the stack, from right to left
;     - the caller subtracts sizeof(return type) from rsp, rounded up to the nearest 8 bytes
;     - the caller calls the function
;     - the caller stores away the return value
;     - the caller adds (sizeof(return type) + sizeof arg0 + ... + sizeof argn) to rsp  - where each sizeof is rounded up to the nearest 8 bytes
; STACK LAYOUT:
;    arg n
;    ...
;    arg 0
;    return value   [rbp+16]
;    return address [rbp+8]
;    old rbp        [rbp]
;    local variables



global code_output
global codegen_second_pass ; = 0 on first global pass, 1 on second global pass
global functions_addresses ; ident list of addresses
global functions_labels ; ident list of ident lists of label addresses
global curr_function_labels ; ident list of labels for current function (written to in 1st pass, read from in 2nd pass)

#define REG_RAX 0
#define REG_RBX 3
#define REG_RCX 1
#define REG_RDX 2
#define REG_RSP 4
#define REG_RBP 5
#define REG_RSI 6
#define REG_RDI 7

function emit_byte
	argument byte
	*1code_output = byte
	code_output += 1
	return

function emit_bytes
	argument bytes
	argument count
	memcpy(code_output, bytes, count)
	code_output += count
	return
	
function emit_word
	argument word
	*2code_output = word
	code_output += 2
	return

function emit_dword
	argument word
	*4code_output = word
	code_output += 4
	return

function emit_qword
	argument word
	*8code_output = word
	code_output += 8
	return

; e.g. emit_mov_reg(REG_RAX, REG_RBX)  emits  mov rax, rbx
function emit_mov_reg
	argument dest
	argument src
	local n
	
	;48 89 (DEST|SRC<<3|0xc0)
	*2code_output = 0x8948
	code_output += 2
	n = 0xc0 | dest
	n |= src < 3
	*1code_output = n
	code_output += 1
	return
	

function emit_sub_rsp_imm32
	argument imm32
	;48 81 ec IMM32
	*2code_output = 0x8148
	code_output += 2
	*1code_output = 0xec
	code_output += 1
	*4code_output = imm32
	code_output += 4
	return

function emit_mov_qword_rsp_rbp
	; 48 89 2c 24
	*4code_output = 0x242c8948
	code_output += 4
	return

function emit_mov_rbp_qword_rsp
	; 48 8b 2c 24
	*4code_output = 0x242c8b48
	code_output += 4
	return

function emit_add_rsp_imm32
	argument imm32
	;48 81 c4 IMM32
	*2code_output = 0x8148
	code_output += 2
	*1code_output = 0xc4
	code_output += 1
	*4code_output = imm32
	code_output += 4
	return

function emit_ret
	*1code_output = 0xc3
	code_output += 1
	return

function emit_mov_qword_rsp_plus_imm32_rax
	argument imm32
	; 48 89 84 24 IMM32
	*4code_output = 0x24848948
	code_output += 4
	*4code_output = imm32
	code_output += 4
	return

function emit_mov_rax_qword_rsp_plus_imm32
	argument imm32
	; 48 8b 84 24 IMM32
	*4code_output = 0x24848b48
	code_output += 4
	*4code_output = imm32
	code_output += 4
	return

function emit_mov_rax_imm64
	argument imm64
	; 48 b8 IMM64
	*2code_output = 0xb848
	code_output += 2
	*8code_output = imm64
	code_output += 8
	return

function emit_call_rax
	; ff d0
	*2code_output = 0xd0ff
	code_output += 2
	return

function emit_syscall
	; 0f 05
	*2code_output = 0x050f
	code_output += 2
	return

; make sure you put the return value in the proper place before calling this
function generate_return
	emit_mov_reg(REG_RSP, REG_RBP)
	emit_mov_rbp_qword_rsp()
	emit_add_rsp_imm32(8)
	emit_ret()
	return

function generate_statement
	argument statement
	; @TODO
	return

function generate_function
	argument function_name
	argument function_statement
	local out0
	
	if codegen_second_pass != 0 goto genf_second_pass
		curr_function_labels = ident_list_create(4000) ; ~ 200 labels per function should be plenty
		ident_list_add(functions_labels, function_name, curr_function_labels)
		goto genf_cont
	:genf_second_pass
		curr_function_labels = ident_list_lookup(functions_labels, function_name)
	:genf_cont
	
	; prologue
	emit_sub_rsp_imm32(8)
	emit_mov_qword_rsp_rbp()
	emit_mov_reg(REG_RBP, REG_RSP)	
	
	generate_statement(function_statement)
	
	; implicit return at end of function
	generate_return()
	
	return

function generate_functions
	local addr
	local c
	local p
	local function_name
	
	function_name = function_statements
	
	:genfunctions_loop
		if *1function_name == 0 goto genfunctions_loop_end
		addr = code_output - output_file_data ; address of this function
		if codegen_second_pass != 0 goto genfs_check_addr
			; first pass; record address of function
			ident_list_add(functions_addresses, function_name, addr)
			goto genfs_cont
		:genfs_check_addr
			c = ident_list_lookup(functions_addresses, function_name)
			if c != addr goto function_addr_mismatch
			goto genfs_cont
		:genfs_cont
		p = memchr(function_name, 0)
		p += 1
		generate_function(function_name, p)
		function_name = p + 8
		goto genfunctions_loop
	:genfunctions_loop_end
	return
	
	:function_addr_mismatch
		; address of function on 2nd pass doesn't line up with 1st pass
		fputs(2, .str_function_addr_mismatch)
		fputs(2, function_name)
		exit(1)
	:str_function_addr_mismatch
		string Function address on first pass doesn't match 2nd pass:
		byte 32
		byte 0

; emit ELF header and code.
function generate_code
	
	code_output = output_file_data
	emit_qword(0x00010102464c457f) ; elf identifier, 64-bit little endian, ELF version 1
	emit_qword(0) ; reserved
	emit_word(2) ; executable file
	emit_word(0x3e) ; architecture x86-64
	emit_dword(1) ; ELF version 1
	emit_qword(ENTRY_ADDR) ; entry point
	emit_qword(0x40) ; program header table offset
	emit_qword(0) ; section header table offset
	emit_dword(0) ; flags
	emit_word(0x40) ; size of header
	emit_word(0x38) ; size of program header
	emit_word(3) ; # of program headers = 3 (code, rwdata, rodata)
	emit_word(0) ; size of section header
	emit_word(0) ; # of section headers
	emit_word(0) ; index of .shstrtab

	; from /usr/include/elf.h:
	;#define PF_X		(1 << 0)	/* Segment is executable */
	;#define PF_W		(1 << 1)	/* Segment is writable */
	;#define PF_R		(1 << 2)	/* Segment is readable */
	
	; program header 1 (code)
	emit_dword(1) ; loadable segment
	emit_dword(1) ; execute only
	emit_qword(ENTRY_ADDR) ; offset in file
	emit_qword(ENTRY_ADDR) ; virtual address
	emit_qword(0) ; physical address
	emit_qword(TOTAL_CODE_SIZE) ; size in executable file
	emit_qword(TOTAL_CODE_SIZE) ; size when loaded into memory
	emit_qword(4096) ; alignment
	
	; program header 2 (rodata)
	emit_dword(1) ; loadable segment
	emit_dword(4) ; read only
	emit_qword(RODATA_ADDR) ; offset in file
	emit_qword(RODATA_ADDR) ; virtual address
	emit_qword(0) ; physical address
	emit_qword(RODATA_SIZE) ; size in executable file
	emit_qword(RODATA_SIZE) ; size when loaded into memory
	emit_qword(4096) ; alignment
	
	; program header 3 (rwdata)
	emit_dword(1) ; loadable segment
	emit_dword(6) ; read/write
	emit_qword(RWDATA_ADDR) ; offset in file
	emit_qword(RWDATA_ADDR) ; virtual address
	emit_qword(0) ; physical address
	emit_qword(RWDATA_SIZE) ; size in executable file
	emit_qword(RWDATA_SIZE) ; size when loaded into memory
	emit_qword(4096) ; alignment
	
	
	
	local p_func
	code_output = output_file_data + FUNCTIONS_ADDR
	codegen_second_pass = 0
	generate_functions()
	code_output = output_file_data + FUNCTIONS_ADDR
	codegen_second_pass = 1
	generate_functions()
	; generate code at the entry point of the executable
	local main_addr
	main_addr = ident_list_lookup(functions_addresses, .str_main)
	if main_addr == 0 goto no_main_function
	
	; on entry, we will have:
	;   argc = *rsp
	;   argv = rsp + 8
	code_output = output_file_data + ENTRY_ADDR
	; add rsp, 8
	emit_add_rsp_imm32(8)
	; mov rax, rsp  (set rax to argv)
	emit_mov_reg(REG_RAX, REG_RSP)
	; sub rsp, 32  (undo add rsp, 8 from before and add space for argv, argc, return value)
	emit_sub_rsp_imm32(32)
	; mov [rsp+16], rax  (put argv in the right place)
	emit_mov_qword_rsp_plus_imm32_rax(16)
	; mov rax, [rsp+24]  (set rax to argc)
	emit_mov_rax_qword_rsp_plus_imm32(24)
	; mov [rsp+8], rax   (put argc in the right place)
	emit_mov_qword_rsp_plus_imm32_rax(8)
	; mov rax, main
	emit_mov_rax_imm64(main_addr)
	; call rax
	emit_call_rax()
	; mov rax, [rsp]
	emit_mov_rax_qword_rsp_plus_imm32(0)
	; mov rdi, rax
	emit_mov_reg(REG_RDI, REG_RAX)
	; mov rax, 0x3c (SYS_exit)
	emit_mov_rax_imm64(0x3c)
	; syscall
	emit_syscall()
		
	return
	:no_main_function
	die(.str_no_main_function)
	:str_no_main_function
		string Error: No main function.
		byte 0