From 5d6b490cce1a99a2541d1fcee101df4331d4d86a Mon Sep 17 00:00:00 2001 From: pommicket Date: Fri, 7 Jan 2022 23:32:27 -0500 Subject: start C compiler --- 05/.gitignore | 1 + 05/Makefile | 11 ++ 05/constants.b | 32 +++++ 05/main.b | 46 ++++++++ 05/main.c | 6 + 05/preprocess.b | 75 ++++++++++++ 05/util.b | 357 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 528 insertions(+) create mode 100644 05/.gitignore create mode 100644 05/Makefile create mode 100644 05/constants.b create mode 100644 05/main.b create mode 100644 05/main.c create mode 100644 05/preprocess.b create mode 100644 05/util.b (limited to '05') diff --git a/05/.gitignore b/05/.gitignore new file mode 100644 index 0000000..f4c3e60 --- /dev/null +++ b/05/.gitignore @@ -0,0 +1 @@ +in04 diff --git a/05/Makefile b/05/Makefile new file mode 100644 index 0000000..7242404 --- /dev/null +++ b/05/Makefile @@ -0,0 +1,11 @@ +all: out04 +in04: *.b ../04a/out04 + ../04a/out04 main.b in04 +out04: in04 ../04/out03 + ../04/out03 in04 out04 +%.html: %.md ../markdown + ../markdown $< +%.out: %.c + ./out04 $< $@ +clean: + rm -f out* README.html *.out diff --git a/05/constants.b b/05/constants.b new file mode 100644 index 0000000..691fe65 --- /dev/null +++ b/05/constants.b @@ -0,0 +1,32 @@ +; #define KEYWORD_AUTO 101 (auto only exists in C for legacy reasons and doesn't appear in TCC's source code) +#define KEYWORD_DOUBLE 102 +#define KEYWORD_INT 103 +#define KEYWORD_STRUCT 104 +#define KEYWORD_BREAK 105 +#define KEYWORD_ELSE 106 +#define KEYWORD_LONG 107 +#define KEYWORD_SWITCH 108 +#define KEYWORD_CASE 109 +#define KEYWORD_ENUM 110 +#define KEYWORD_REGISTER 111 +#define KEYWORD_TYPEDEF 112 +#define KEYWORD_CHAR 113 +#define KEYWORD_EXTERN 114 +#define KEYWORD_RETURN 115 +#define KEYWORD_UNION 116 +; #define KEYWORD_CONST 117 (we can just #define const) +#define KEYWORD_FLOAT 118 +#define KEYWORD_SHORT 119 +#define KEYWORD_UNSIGNED 120 +#define KEYWORD_CONTINUE 121 +#define KEYWORD_FOR 122 +; #define KEYWORD_SIGNED 123 (again, just #define signed) +#define KEYWORD_VOID 124 +#define KEYWORD_DEFAULT 125 +#define KEYWORD_GOTO 126 +#define KEYWORD_SIZEOF 127 +; #define KEYWORD_VOLATILE 128 (just #define volatile if need be) +#define KEYWORD_DO 129 +#define KEYWORD_IF 130 +#define KEYWORD_STATIC 131 +#define KEYWORD_WHILE 132 diff --git a/05/main.b b/05/main.b new file mode 100644 index 0000000..6239f71 --- /dev/null +++ b/05/main.b @@ -0,0 +1,46 @@ +; add 24 + 16 = 40 to the stack pointer to put argc, argv in the right place +byte 0x48 +byte 0x81 +byte 0xc4 +byte 40 +byte 0 +byte 0 +byte 0 +goto main + +#include util.b +#include constants.b +#include preprocess.b + +function main + argument argv2 + argument argv1 + argument argv0 + argument argc + local input_filename + local output_filename + + input_filename = .str_default_input_filename + output_filename = .str_default_output_filename + if argc == 1 goto have_filenames + if argc != 3 goto usage_error + input_filename = argv1 + output_filename = argv2 + :have_filenames + split_into_preprocessing_tokens(input_filename) + exit(0) + +:usage_error + fputs(2, .str_usage_error) + exit(1) + +:str_usage_error + string Please either specify no arguments or an input and output file. + +:str_default_input_filename + string main.c + byte 0 + +:str_default_output_filename + string a.out + byte 0 diff --git a/05/main.c b/05/main.c new file mode 100644 index 0000000..fedd283 --- /dev/null +++ b/05/main.c @@ -0,0 +1,6 @@ +test\ +ing/* +I am */testing +that this is working +hello \ +there. diff --git a/05/preprocess.b b/05/preprocess.b new file mode 100644 index 0000000..36fcbd2 --- /dev/null +++ b/05/preprocess.b @@ -0,0 +1,75 @@ +; returns a string of null character-separated preprocessing tokens +; this corresponds to translation phases 1-3 in the C89 standard +function split_into_preprocessing_tokens + argument filename + local fd + local file_contents + local pptokens + local p + local c + local in + local out + local n + + fd = open_r(filename) + file_contents = malloc(2000000) + pptokens = malloc(2000000) + p = file_contents + :pptokens_read_loop + n = syscall(0, fd, p, 4096) + if n == 0 goto pptokens_read_loop_end + p += n + :pptokens_read_loop_end + + ; okay we read the file. first, delete every backslash-newline sequence (phase 2) + local newlines ; we add more newlines to keep line numbers right + newlines = 1 + in = file_contents + out = file_contents + :backslashnewline_loop + c = *1in + if c == 0 goto backslashnewline_loop_end + if c == 10 goto proper_newline_loop + if c != '\ goto not_backslashnewline + p = in + 1 + c = *1p + if c != 10 goto not_backslashnewline + in += 2 ; skip backlash and newline + newlines += 1 ; add one additional newline the next time around to compensate + goto backslashnewline_loop + :not_backslashnewline + *1out = *1in + out += 1 + in += 1 + goto backslashnewline_loop + :proper_newline_loop + if newlines == 0 goto proper_newline_loop_end + ; output a newline + *1out = 10 + out += 1 + newlines -= 1 + goto proper_newline_loop + :proper_newline_loop_end + newlines = 1 + in += 1 + goto backslashnewline_loop + :backslashnewline_loop_end + *1out = 0 + + in = file_contents + + fputs(1, file_contents) + + free(file_contents) + close(fd) + return + + :unterminated_comment + fputs(2, .str_unterminated_comment) + fputs(2, filename) + fputc(2, 10) + exit(1) + :str_unterminated_comment + string Unterminated comment in file + byte 32 + byte 0 diff --git a/05/util.b b/05/util.b new file mode 100644 index 0000000..13fed4d --- /dev/null +++ b/05/util.b @@ -0,0 +1,357 @@ + +function file_error + argument name + fputs(2, .str_file_error) + fputs(2, name) + fputc(2, 10) + exit(1) + +:str_file_error + string Error opening file: + byte 32 + byte 0 + +function malloc + argument size + local total_size + local memory + total_size = size + 8 + memory = syscall(9, 0, total_size, 3, 0x22, -1, 0) + if memory ] 0xffffffffffff0000 goto malloc_failed + *8memory = total_size + return memory + 8 + +:malloc_failed + fputs(2, .str_out_of_memory) + exit(1) + +:str_out_of_memory + string Out of memory. + byte 10 + byte 0 + +function free + argument memory + local psize + local size + psize = memory - 8 + size = *8psize + syscall(11, psize, size) + return + +; returns a pointer to a null-terminated string containing the number given +function itos + global 32 itos_string + argument x + local c + local p + p = &itos_string + p += 30 + :itos_loop + c = x % 10 + c += '0 + *1p = c + x /= 10 + if x == 0 goto itos_loop_end + p -= 1 + goto itos_loop + :itos_loop_end + return p + + +; returns the number at the start of the given string +function stoi + argument s + local p + local n + local c + n = 0 + p = s + :stoi_loop + c = *1p + if c < '0 goto stoi_loop_end + if c > '9 goto stoi_loop_end + n *= 10 + n += c - '0 + p += 1 + goto stoi_loop + :stoi_loop_end + return n + +function memchr + argument mem + argument c + local p + local a + p = mem + :memchr_loop + a = *1p + if a == c goto memchr_loop_end + p += 1 + goto memchr_loop + :memchr_loop_end + return p + +function strlen + argument s + local c + local p + p = s + :strlen_loop + c = *1p + if c == 0 goto strlen_loop_end + p += 1 + goto strlen_loop + :strlen_loop_end + return p - s + +function strcpy + argument dest + argument src + local p + local q + local c + p = dest + q = src + :strcpy_loop + c = *1q + *1p = c + if c == 0 goto strcpy_loop_end + p += 1 + q += 1 + goto strcpy_loop + :strcpy_loop_end + return p + +function str_startswith + argument s + argument prefix + local p + local q + local c1 + local c2 + p = s + q = prefix + :str_startswith_loop + c1 = *1p + c2 = *1q + if c2 == 0 goto return_1 + if c1 != c2 goto return_0 + p += 1 + q += 1 + goto str_startswith_loop + +function fputs + argument fd + argument s + local length + length = strlen(s) + syscall(1, fd, s, length) + return + +function puts + argument s + fputs(1, s) + return + +function fputn + argument fd + argument n + local s + s = itos(n) + fputs(fd, s) + return + +function fputc + argument fd + argument c + local p + p = &c + syscall(1, fd, p, 1) + return + +function putc + argument c + fputc(1, c) + return + +; returns 0 at end of file +function fgetc + argument fd + local c + local p + c = 0 + p = &c + syscall(0, fd, p, 1) + return c + +; read a line from fd as a null-terminated string +; returns 0 at end of file, 1 otherwise +function fgets + argument fd + argument buf + argument size + local p + local end + local c + p = buf + end = buf + size + + :fgets_loop + c = fgetc(fd) + if c == 0 goto fgets_eof + if c == 10 goto fgets_eol + *1p = c + p += 1 + if p == end goto fgets_eob + goto fgets_loop + + :fgets_eol ; end of line + *1p = 0 + return 1 + :fgets_eof ; end of file + *1p = 0 + return 0 + :fgets_eob ; end of buffer + p -= 1 + *1p = 0 + return 1 + +; open the given file for reading +function open_r + argument filename + local fd + fd = syscall(2, filename, 0) + if fd < 0 goto open_r_error + return fd + :open_r_error + file_error(filename) + return -1 + +; open the given file for writing with the given mode +function open_w + argument filename + argument mode + local fd + fd = syscall(2, filename, 0x241, mode) + if fd < 0 goto open_w_error + return fd + :open_w_error + file_error(filename) + return -1 + +function close + argument fd + syscall(3, fd) + return + +function isupper + argument c + if c < 'A goto return_0 + if c <= 'Z goto return_1 + goto return_0 + +function exit + argument status_code + syscall(0x3c, status_code) + +:return_0 + return 0 +:return_1 + return 1 + +function syscall + ; I've done some testing, and this should be okay even if + ; rbp-56 goes beyond the end of the stack. + ; mov rax, [rbp-16] + byte 0x48 + byte 0x8b + byte 0x85 + byte 0xf0 + byte 0xff + byte 0xff + byte 0xff + ; mov rdi, rax + byte 0x48 + byte 0x89 + byte 0xc7 + + ; mov rax, [rbp-24] + byte 0x48 + byte 0x8b + byte 0x85 + byte 0xe8 + byte 0xff + byte 0xff + byte 0xff + ; mov rsi, rax + byte 0x48 + byte 0x89 + byte 0xc6 + + ; mov rax, [rbp-32] + byte 0x48 + byte 0x8b + byte 0x85 + byte 0xe0 + byte 0xff + byte 0xff + byte 0xff + ; mov rdx, rax + byte 0x48 + byte 0x89 + byte 0xc2 + + ; mov rax, [rbp-40] + byte 0x48 + byte 0x8b + byte 0x85 + byte 0xd8 + byte 0xff + byte 0xff + byte 0xff + ; mov r10, rax + byte 0x49 + byte 0x89 + byte 0xc2 + + ; mov rax, [rbp-48] + byte 0x48 + byte 0x8b + byte 0x85 + byte 0xd0 + byte 0xff + byte 0xff + byte 0xff + ; mov r8, rax + byte 0x49 + byte 0x89 + byte 0xc0 + + ; mov rax, [rbp-56] + byte 0x48 + byte 0x8b + byte 0x85 + byte 0xc8 + byte 0xff + byte 0xff + byte 0xff + ; mov r9, rax + byte 0x49 + byte 0x89 + byte 0xc1 + + ; mov rax, [rbp-8] + byte 0x48 + byte 0x8b + byte 0x85 + byte 0xf8 + byte 0xff + byte 0xff + byte 0xff + + ; syscall + byte 0x0f + byte 0x05 + + return -- cgit v1.2.3