summaryrefslogtreecommitdiff
path: root/05
diff options
context:
space:
mode:
authorpommicket <pommicket@gmail.com>2022-01-07 23:32:27 -0500
committerpommicket <pommicket@gmail.com>2022-01-07 23:32:27 -0500
commit5d6b490cce1a99a2541d1fcee101df4331d4d86a (patch)
tree902295cbd43a7a32e179412d98f75dd2705fd6db /05
parent262824b21491446bb20acba8be1054207b5f50f0 (diff)
start C compiler
Diffstat (limited to '05')
-rw-r--r--05/.gitignore1
-rw-r--r--05/Makefile11
-rw-r--r--05/constants.b32
-rw-r--r--05/main.b46
-rw-r--r--05/main.c6
-rw-r--r--05/preprocess.b75
-rw-r--r--05/util.b357
7 files changed, 528 insertions, 0 deletions
diff --git a/05/.gitignore b/05/.gitignore
new file mode 100644
index 0000000..f4c3e60
--- /dev/null
+++ b/05/.gitignore
@@ -0,0 +1 @@
+in04
diff --git a/05/Makefile b/05/Makefile
new file mode 100644
index 0000000..7242404
--- /dev/null
+++ b/05/Makefile
@@ -0,0 +1,11 @@
+all: out04
+in04: *.b ../04a/out04
+ ../04a/out04 main.b in04
+out04: in04 ../04/out03
+ ../04/out03 in04 out04
+%.html: %.md ../markdown
+ ../markdown $<
+%.out: %.c
+ ./out04 $< $@
+clean:
+ rm -f out* README.html *.out
diff --git a/05/constants.b b/05/constants.b
new file mode 100644
index 0000000..691fe65
--- /dev/null
+++ b/05/constants.b
@@ -0,0 +1,32 @@
+; #define KEYWORD_AUTO 101 (auto only exists in C for legacy reasons and doesn't appear in TCC's source code)
+#define KEYWORD_DOUBLE 102
+#define KEYWORD_INT 103
+#define KEYWORD_STRUCT 104
+#define KEYWORD_BREAK 105
+#define KEYWORD_ELSE 106
+#define KEYWORD_LONG 107
+#define KEYWORD_SWITCH 108
+#define KEYWORD_CASE 109
+#define KEYWORD_ENUM 110
+#define KEYWORD_REGISTER 111
+#define KEYWORD_TYPEDEF 112
+#define KEYWORD_CHAR 113
+#define KEYWORD_EXTERN 114
+#define KEYWORD_RETURN 115
+#define KEYWORD_UNION 116
+; #define KEYWORD_CONST 117 (we can just #define const)
+#define KEYWORD_FLOAT 118
+#define KEYWORD_SHORT 119
+#define KEYWORD_UNSIGNED 120
+#define KEYWORD_CONTINUE 121
+#define KEYWORD_FOR 122
+; #define KEYWORD_SIGNED 123 (again, just #define signed)
+#define KEYWORD_VOID 124
+#define KEYWORD_DEFAULT 125
+#define KEYWORD_GOTO 126
+#define KEYWORD_SIZEOF 127
+; #define KEYWORD_VOLATILE 128 (just #define volatile if need be)
+#define KEYWORD_DO 129
+#define KEYWORD_IF 130
+#define KEYWORD_STATIC 131
+#define KEYWORD_WHILE 132
diff --git a/05/main.b b/05/main.b
new file mode 100644
index 0000000..6239f71
--- /dev/null
+++ b/05/main.b
@@ -0,0 +1,46 @@
+; add 24 + 16 = 40 to the stack pointer to put argc, argv in the right place
+byte 0x48
+byte 0x81
+byte 0xc4
+byte 40
+byte 0
+byte 0
+byte 0
+goto main
+
+#include util.b
+#include constants.b
+#include preprocess.b
+
+function main
+ argument argv2
+ argument argv1
+ argument argv0
+ argument argc
+ local input_filename
+ local output_filename
+
+ input_filename = .str_default_input_filename
+ output_filename = .str_default_output_filename
+ if argc == 1 goto have_filenames
+ if argc != 3 goto usage_error
+ input_filename = argv1
+ output_filename = argv2
+ :have_filenames
+ split_into_preprocessing_tokens(input_filename)
+ exit(0)
+
+:usage_error
+ fputs(2, .str_usage_error)
+ exit(1)
+
+:str_usage_error
+ string Please either specify no arguments or an input and output file.
+
+:str_default_input_filename
+ string main.c
+ byte 0
+
+:str_default_output_filename
+ string a.out
+ byte 0
diff --git a/05/main.c b/05/main.c
new file mode 100644
index 0000000..fedd283
--- /dev/null
+++ b/05/main.c
@@ -0,0 +1,6 @@
+test\
+ing/*
+I am */testing
+that this is working
+hello \
+there.
diff --git a/05/preprocess.b b/05/preprocess.b
new file mode 100644
index 0000000..36fcbd2
--- /dev/null
+++ b/05/preprocess.b
@@ -0,0 +1,75 @@
+; returns a string of null character-separated preprocessing tokens
+; this corresponds to translation phases 1-3 in the C89 standard
+function split_into_preprocessing_tokens
+ argument filename
+ local fd
+ local file_contents
+ local pptokens
+ local p
+ local c
+ local in
+ local out
+ local n
+
+ fd = open_r(filename)
+ file_contents = malloc(2000000)
+ pptokens = malloc(2000000)
+ p = file_contents
+ :pptokens_read_loop
+ n = syscall(0, fd, p, 4096)
+ if n == 0 goto pptokens_read_loop_end
+ p += n
+ :pptokens_read_loop_end
+
+ ; okay we read the file. first, delete every backslash-newline sequence (phase 2)
+ local newlines ; we add more newlines to keep line numbers right
+ newlines = 1
+ in = file_contents
+ out = file_contents
+ :backslashnewline_loop
+ c = *1in
+ if c == 0 goto backslashnewline_loop_end
+ if c == 10 goto proper_newline_loop
+ if c != '\ goto not_backslashnewline
+ p = in + 1
+ c = *1p
+ if c != 10 goto not_backslashnewline
+ in += 2 ; skip backlash and newline
+ newlines += 1 ; add one additional newline the next time around to compensate
+ goto backslashnewline_loop
+ :not_backslashnewline
+ *1out = *1in
+ out += 1
+ in += 1
+ goto backslashnewline_loop
+ :proper_newline_loop
+ if newlines == 0 goto proper_newline_loop_end
+ ; output a newline
+ *1out = 10
+ out += 1
+ newlines -= 1
+ goto proper_newline_loop
+ :proper_newline_loop_end
+ newlines = 1
+ in += 1
+ goto backslashnewline_loop
+ :backslashnewline_loop_end
+ *1out = 0
+
+ in = file_contents
+
+ fputs(1, file_contents)
+
+ free(file_contents)
+ close(fd)
+ return
+
+ :unterminated_comment
+ fputs(2, .str_unterminated_comment)
+ fputs(2, filename)
+ fputc(2, 10)
+ exit(1)
+ :str_unterminated_comment
+ string Unterminated comment in file
+ byte 32
+ byte 0
diff --git a/05/util.b b/05/util.b
new file mode 100644
index 0000000..13fed4d
--- /dev/null
+++ b/05/util.b
@@ -0,0 +1,357 @@
+
+function file_error
+ argument name
+ fputs(2, .str_file_error)
+ fputs(2, name)
+ fputc(2, 10)
+ exit(1)
+
+:str_file_error
+ string Error opening file:
+ byte 32
+ byte 0
+
+function malloc
+ argument size
+ local total_size
+ local memory
+ total_size = size + 8
+ memory = syscall(9, 0, total_size, 3, 0x22, -1, 0)
+ if memory ] 0xffffffffffff0000 goto malloc_failed
+ *8memory = total_size
+ return memory + 8
+
+:malloc_failed
+ fputs(2, .str_out_of_memory)
+ exit(1)
+
+:str_out_of_memory
+ string Out of memory.
+ byte 10
+ byte 0
+
+function free
+ argument memory
+ local psize
+ local size
+ psize = memory - 8
+ size = *8psize
+ syscall(11, psize, size)
+ return
+
+; returns a pointer to a null-terminated string containing the number given
+function itos
+ global 32 itos_string
+ argument x
+ local c
+ local p
+ p = &itos_string
+ p += 30
+ :itos_loop
+ c = x % 10
+ c += '0
+ *1p = c
+ x /= 10
+ if x == 0 goto itos_loop_end
+ p -= 1
+ goto itos_loop
+ :itos_loop_end
+ return p
+
+
+; returns the number at the start of the given string
+function stoi
+ argument s
+ local p
+ local n
+ local c
+ n = 0
+ p = s
+ :stoi_loop
+ c = *1p
+ if c < '0 goto stoi_loop_end
+ if c > '9 goto stoi_loop_end
+ n *= 10
+ n += c - '0
+ p += 1
+ goto stoi_loop
+ :stoi_loop_end
+ return n
+
+function memchr
+ argument mem
+ argument c
+ local p
+ local a
+ p = mem
+ :memchr_loop
+ a = *1p
+ if a == c goto memchr_loop_end
+ p += 1
+ goto memchr_loop
+ :memchr_loop_end
+ return p
+
+function strlen
+ argument s
+ local c
+ local p
+ p = s
+ :strlen_loop
+ c = *1p
+ if c == 0 goto strlen_loop_end
+ p += 1
+ goto strlen_loop
+ :strlen_loop_end
+ return p - s
+
+function strcpy
+ argument dest
+ argument src
+ local p
+ local q
+ local c
+ p = dest
+ q = src
+ :strcpy_loop
+ c = *1q
+ *1p = c
+ if c == 0 goto strcpy_loop_end
+ p += 1
+ q += 1
+ goto strcpy_loop
+ :strcpy_loop_end
+ return p
+
+function str_startswith
+ argument s
+ argument prefix
+ local p
+ local q
+ local c1
+ local c2
+ p = s
+ q = prefix
+ :str_startswith_loop
+ c1 = *1p
+ c2 = *1q
+ if c2 == 0 goto return_1
+ if c1 != c2 goto return_0
+ p += 1
+ q += 1
+ goto str_startswith_loop
+
+function fputs
+ argument fd
+ argument s
+ local length
+ length = strlen(s)
+ syscall(1, fd, s, length)
+ return
+
+function puts
+ argument s
+ fputs(1, s)
+ return
+
+function fputn
+ argument fd
+ argument n
+ local s
+ s = itos(n)
+ fputs(fd, s)
+ return
+
+function fputc
+ argument fd
+ argument c
+ local p
+ p = &c
+ syscall(1, fd, p, 1)
+ return
+
+function putc
+ argument c
+ fputc(1, c)
+ return
+
+; returns 0 at end of file
+function fgetc
+ argument fd
+ local c
+ local p
+ c = 0
+ p = &c
+ syscall(0, fd, p, 1)
+ return c
+
+; read a line from fd as a null-terminated string
+; returns 0 at end of file, 1 otherwise
+function fgets
+ argument fd
+ argument buf
+ argument size
+ local p
+ local end
+ local c
+ p = buf
+ end = buf + size
+
+ :fgets_loop
+ c = fgetc(fd)
+ if c == 0 goto fgets_eof
+ if c == 10 goto fgets_eol
+ *1p = c
+ p += 1
+ if p == end goto fgets_eob
+ goto fgets_loop
+
+ :fgets_eol ; end of line
+ *1p = 0
+ return 1
+ :fgets_eof ; end of file
+ *1p = 0
+ return 0
+ :fgets_eob ; end of buffer
+ p -= 1
+ *1p = 0
+ return 1
+
+; open the given file for reading
+function open_r
+ argument filename
+ local fd
+ fd = syscall(2, filename, 0)
+ if fd < 0 goto open_r_error
+ return fd
+ :open_r_error
+ file_error(filename)
+ return -1
+
+; open the given file for writing with the given mode
+function open_w
+ argument filename
+ argument mode
+ local fd
+ fd = syscall(2, filename, 0x241, mode)
+ if fd < 0 goto open_w_error
+ return fd
+ :open_w_error
+ file_error(filename)
+ return -1
+
+function close
+ argument fd
+ syscall(3, fd)
+ return
+
+function isupper
+ argument c
+ if c < 'A goto return_0
+ if c <= 'Z goto return_1
+ goto return_0
+
+function exit
+ argument status_code
+ syscall(0x3c, status_code)
+
+:return_0
+ return 0
+:return_1
+ return 1
+
+function syscall
+ ; I've done some testing, and this should be okay even if
+ ; rbp-56 goes beyond the end of the stack.
+ ; mov rax, [rbp-16]
+ byte 0x48
+ byte 0x8b
+ byte 0x85
+ byte 0xf0
+ byte 0xff
+ byte 0xff
+ byte 0xff
+ ; mov rdi, rax
+ byte 0x48
+ byte 0x89
+ byte 0xc7
+
+ ; mov rax, [rbp-24]
+ byte 0x48
+ byte 0x8b
+ byte 0x85
+ byte 0xe8
+ byte 0xff
+ byte 0xff
+ byte 0xff
+ ; mov rsi, rax
+ byte 0x48
+ byte 0x89
+ byte 0xc6
+
+ ; mov rax, [rbp-32]
+ byte 0x48
+ byte 0x8b
+ byte 0x85
+ byte 0xe0
+ byte 0xff
+ byte 0xff
+ byte 0xff
+ ; mov rdx, rax
+ byte 0x48
+ byte 0x89
+ byte 0xc2
+
+ ; mov rax, [rbp-40]
+ byte 0x48
+ byte 0x8b
+ byte 0x85
+ byte 0xd8
+ byte 0xff
+ byte 0xff
+ byte 0xff
+ ; mov r10, rax
+ byte 0x49
+ byte 0x89
+ byte 0xc2
+
+ ; mov rax, [rbp-48]
+ byte 0x48
+ byte 0x8b
+ byte 0x85
+ byte 0xd0
+ byte 0xff
+ byte 0xff
+ byte 0xff
+ ; mov r8, rax
+ byte 0x49
+ byte 0x89
+ byte 0xc0
+
+ ; mov rax, [rbp-56]
+ byte 0x48
+ byte 0x8b
+ byte 0x85
+ byte 0xc8
+ byte 0xff
+ byte 0xff
+ byte 0xff
+ ; mov r9, rax
+ byte 0x49
+ byte 0x89
+ byte 0xc1
+
+ ; mov rax, [rbp-8]
+ byte 0x48
+ byte 0x8b
+ byte 0x85
+ byte 0xf8
+ byte 0xff
+ byte 0xff
+ byte 0xff
+
+ ; syscall
+ byte 0x0f
+ byte 0x05
+
+ return