From 519069a89df7f2f704b9ba7052fc80660817115f Mon Sep 17 00:00:00 2001 From: pommicket Date: Fri, 7 Jan 2022 11:07:06 -0500 Subject: rename 04b => 04, better 04 README --- 03/README.md | 2 +- 04/Makefile | 11 + 04/README.md | 271 ++++++ 04/guessing_game | 242 +++++ 04/in03 | 2532 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 04/in04 | 133 +++ 04b/Makefile | 11 - 04b/README.md | 240 ----- 04b/guessing_game | 238 ----- 04b/in03 | 2532 ----------------------------------------------------- 04b/in04b | 133 --- Makefile | 4 +- README.md | 4 +- 13 files changed, 3194 insertions(+), 3159 deletions(-) create mode 100644 04/Makefile create mode 100644 04/README.md create mode 100644 04/guessing_game create mode 100644 04/in03 create mode 100644 04/in04 delete mode 100644 04b/Makefile delete mode 100644 04b/README.md delete mode 100644 04b/guessing_game delete mode 100644 04b/in03 delete mode 100644 04b/in04b diff --git a/03/README.md b/03/README.md index b446574..686b48c 100644 --- a/03/README.md +++ b/03/README.md @@ -165,4 +165,4 @@ you need to make sure you store away any information you'll need after the funct And the language definitely won't be as nice to use as something with real variables. But overall, I'm very happy with this compiler, especially considering it's written in a language with 2-letter label names. -With that, let's move on to the [next stage](../04a/README.md). +With that, let's move on to the [next stage](../04/README.md). diff --git a/04/Makefile b/04/Makefile new file mode 100644 index 0000000..ae9568c --- /dev/null +++ b/04/Makefile @@ -0,0 +1,11 @@ +all: out03 guessing_game.out out04 README.html +out03: in03 ../03/out02 + ../03/out02 +%.html: %.md ../markdown + ../markdown $< +out04: in04 out03 + ./out03 +%.out: % out03 + ./out03 $< $@ +clean: + rm -f out* README.html *.out diff --git a/04/README.md b/04/README.md new file mode 100644 index 0000000..6f638b6 --- /dev/null +++ b/04/README.md @@ -0,0 +1,271 @@ +# stage 04 + +As usual, the source for this compiler is `in03`, an input to the [previous compiler](../03/README.md). +`in04` contains a hello world program written in the stage 4 language. +Here is the core of the program: + +```main() + +function main + puts(.str_hello_world) + putc(10) ; newline + syscall(0x3c, 0) + +:str_hello_world + string Hello, world! + byte 0 + +function strlen + argument s + local c + local p + p = s + :strlen_loop + c = *1p + if c == 0 goto strlen_loop_end + p += 1 + goto strlen_loop + :strlen_loop_end + return p - s + +function putc + argument c + local p + p = &c + syscall(1, 1, p, 1) + return + +function puts + argument s + local len + len = strlen(s) + syscall(1, 1, s, len) + return +``` + +It's so simple compared to previous languages! +Importantly, functions now have arguments and return values. +Rather than mess around with registers, we can now +declare local (and global) variables, and use them directly. +These variables will be placed on the +stack. Since arguments are also placed on the stack, +by implementing local variables we get arguments for free. There is no difference +between the `local` and `argument` keywords in this language other than spelling. +In fact, the number of agruments to a function call is not checked against +how many arguments the function has. This does make it easy to screw things up by calling a function +with the wrong number of arguments, but it also means that we can provide a variable number of arguments +to the `syscall` function. Speaking of which, if you look at the bottom of `in04`, you'll see: + +``` +function syscall + ... + byte 0x48 + byte 0x8b + byte 0x85 + byte 0xf0 + byte 0xff + byte 0xff + byte 0xff + ... +``` + +Originally I was going to make `syscall` a built-in feature of the language, but then I realized that wasn't +necessary. +Instead, `syscall` is a function written manually in machine language. +We can take a look at its decompilation to make things clearer: + +``` +(...function prologue...) +mov rax,[rbp-0x10] +mov rdi,rax +mov rax,[rbp-0x18] +mov rsi,rax +mov rax,[rbp-0x20] +mov rdx,rax +mov rax,[rbp-0x28] +mov r10,rax +mov rax,[rbp-0x30] +mov r8,rax +mov rax,[rbp-0x38] +mov r9,rax +mov rax,[rbp-0x8] +syscall +(...function epilogue...) +``` + +This just sets `rax`, `rdi`, `rsi`, etc. to the arguments the function was called with, +and then does a syscall. + +## functions and local variables + +In this language, function arguments are placed onto the stack from left to right +and all arguments and local variables are 8 bytes. +As a reminder, +the stack is just an area of memory which is automatically extended downwards (on x86-64, at least). +So, how do we keep track of the location of local variables in the stack? We could do something like +this: + +``` +sub rsp, 24 ; make room for 3 variables +mov [rsp], 10 ; variable1 = 10 +mov [rsp+8], 20 ; variable2 = 20 +mov [rsp+16], 30 ; variable3 = 30 +; ... +add rsp, 24 ; reset rsp +``` + +But now suppose that in the middle of the `; ...` code we want another local variable: +``` +sub rsp, 8 ; make room for another variable +``` +well, since we've changed `rsp`, `variable1` is now at `rsp+8` instead of `rsp`, +`variable2` is at `rsp+16` instead of `rsp+8`, and +`variable3` is at `rsp+24` instead of `rsp+16`. +Also, we had better make sure we increment `rsp` by `32` now instead of `24` +to put it back in the right place. +It would be annoying (but by no means impossible) to keep track of all this. +We could just declare all local variables at the start of the function, +but that makes the language more annoying to use. + +Instead, we can use the `rbp` register to keep track of what `rsp` was +at the start of the function: + +``` +; save old value of rbp +sub rsp, 8 +mov [rsp], rbp +; set rbp to initial value of rsp +mov rbp, rsp + +lea rsp, [rbp-8] ; add variable1 (this instruction sets rsp to rbp-8) +mov [rbp-8], 10 ; variable1 = 10 +lea rsp, [rbp-16] ; add variable2 +mov [rbp-16], 20 ; variable2 = 20 +lea rsp, [rbp-24] ; add variable3 +mov [rbp-24], 30 ; variable3 = 30 +; Note that variable1's address is still rbp-8; adding more variables didn't affect it. +; ... + +; restore old values of rbp and rsp +mov rsp, rbp +mov rbp, [rsp] +add rsp, 8 +``` + +This is actually the intended use of `rbp` (it *p*oints to the *b*ase of the stack frame). +Note that setting `rsp` very specifically rather than just doing `sub rsp, 8` is important: +if we skip over some code with a local variable declaration, or execute a local declaration twice, +we want `rsp` to be in the right place. +The first three and last three instructions above are called the function *prologue* and *epilogue*. +They are the same for all functions; a prologue is generated at the start of every function, +and an epilogue is generated for every return statement. +The return value is placed in `rax`. + +## global variables + +Global variables are much simpler than local ones. The variable `:static_memory_end` in the compiler +keeps track of where to put the next global variable in memory. It is initialized at address `0x500000`, +which gives us 1MB for code (and strings). When a global variable is added, `:static_memory_end` is increased +by its size. + +## misc improvements + +- Errors now give you the line number in decimal instead of hexadecimal. +- You get an error if you declare a label (or a variable) twice. +- Conditional jumping is much nicer: e.g. `if x == 3 goto some_label` +- Comments can now appear on lines with code. +- You don't need a `d` prefix for decimal numbers. +- You can control the input and output filenames with command-line arguments (by default, `in04` and `out04` are used). + +## language description + +Comments begin with `;`. + +To make the compiler simpler, this language doesn't support fancy +expressions like `2 * (3 + 5) / 6`. There is a limited set of possible +expressions, specifically there are *terms* and *r-values*. + +But first, each program is made up of a series of statements, and +each statement is one of the following: +- `global {name}` or `global {size} {name}` - declare a global variable with the given size, or 8 bytes if none is provided. +- `local {name}` - declare a local variable +- `argument {name}` - declare a function argument. this is functionally equivalent to `local`, so it just exists for readability. +- `function {name}` - declare a function +- `:{name}` - declare a label +- `goto {label}` - jump to the specified label +- `if {term} {operator} {term} goto {label}` - +conditionally jump to the specified label. `{operator}` should be one of +`==`, `<`, `>`, `>=`, `<=`, `!=`, `[`, `]`, `[=`, `]=` +(the last four do unsigned comparisons). +- `{lvalue} = {rvalue}` - set `lvalue` to `rvalue` +- `{lvalue} += {rvalue}` - add `rvalue` to `lvalue` +- `{lvalue} -= {rvalue}` - etc. +- `{lvalue} *= {rvalue}` +- `{lvalue} /= {rvalue}` +- `{lvalue} %= {rvalue}` +- `{lvalue} &= {rvalue}` +- `{lvalue} |= {rvalue}` +- `{lvalue} ^= {rvalue}` +- `{lvalue} <= {rvalue}` - left shift `lvalue` by `rvalue` +- `{lvalue} >= {rvalue}` - right shift `lvalue` by `rvalue` (unsigned) +- `{function}({term}, {term}, ...)` - function call, ignoring the return value +- `return {rvalue}` +- `string {str}` - places a literal string in the code +- `byte {number}` - places a literal byte in the code + +Now let's get down into the weeds: + +A a *number* is one of: +- `{decimal number}` - e.g. `108` +- `0x{hexadecimal number}` - e.g. `0x2f` for 47 +- `'{character}` - e.g. `'a` for 97 (the character code for `a`) + +A *term* is one of: +- `{variable name}` - the value of a (local or global) variable +- `.{label name}` - the address of a label +- `{number}` + +An *l-value* is the left-hand side of an assignment expression, +and it is one of: +- `{variable}` +- `*1{variable}` - dereference 1 byte +- `*2{variable}` - dereference 2 bytes +- `*4{variable}` - dereference 4 bytes +- `*8{variable}` - dereference 8 bytes + +An *r-value* is an expression, which can be more complicated than a term. +r-values are one of: +- `{term}` +- `&{variable}` - address of variable +- `*1{variable}` / `*2{variable}` / `*4{variable}` / `*8{variable}` - dereference 1, 2, 4, or 8 bytes +- `~{term}` - bitwise not +- `{function}({term}, {term}, ...)` +- `{term} + {term}` +- `{term} - {term}` +- `{term} * {term}` +- `{term} / {term}` +- `{term} % {term}` +- `{term} & {term}` +- `{term} | {term}` +- `{term} ^ {term}` +- `{term} < {term}` - left shift +- `{term} > {term}` - right shift (unsigned) + +That's quite a lot of stuff, and it makes for a pretty powerful +language, all things considered. To test out the language, +in addition to the hello world program, I also wrote a little +guessing game, which you can find in the file `guessing_game`. +It ended up being quite nice to write! + +## limitations + +Variables in this language do not have types. This makes it very easy to make mistakes like +treating numbers as pointers or vice versa. + +A big annoyance with this language is the lack of local label names. Due to the limited nature +of branching in this language (`if ... goto ...` stands in for `if`, `else if`, `while`, etc.), +you need to use a lot of labels, and that means their names can get quite long. But at least unlike +the 03 language, you'll get an error if you use the same label name twice! + +Overall, though, this language ended up being surprisingly powerful. With any luck, stage `05` will +finally be a C compiler... But first, it's time to make [something that's not a compiler](../04a/README.html). diff --git a/04/guessing_game b/04/guessing_game new file mode 100644 index 0000000..449ded8 --- /dev/null +++ b/04/guessing_game @@ -0,0 +1,242 @@ +global 0x1000 exit_code +global y +y = 4 +exit_code = main() +exit(exit_code) + +function main + local secret_number + local guess + global 32 input_line + local p_line + p_line = &input_line + secret_number = getrand(100) + puts(.str_intro) + + :guess_loop + puts(.str_guess) + syscall(0, 0, p_line, 30) + guess = stoi(p_line) + if guess < secret_number goto too_low + if guess > secret_number goto too_high + puts(.str_got_it) + return 0 + :too_low + puts(.str_too_low) + goto guess_loop + :too_high + puts(.str_too_high) + goto guess_loop + +:str_intro + string I'm thinking of a number. + byte 10 + byte 0 + +:str_guess + string Guess what it is: + byte 32 + byte 0 + +:str_got_it + string You got it! + byte 10 + byte 0 + +:str_too_low + string Too low! + byte 10 + byte 0 + +:str_too_high + string Too high! + byte 10 + byte 0 + +; get a "random" number from 0 to x using the system clock +function getrand + argument x + global 16 getrand_time + local ptime + local n + + ptime = &getrand_time + syscall(228, 0, ptime) ; clock_gettime(CLOCK_REALTIME, ptime) + ptime += 8 ; nanoseconds at offset 8 in struct timespec + n = *4ptime + n %= x + return n + +; returns a pointer to a null-terminated string containing the number given +function itos + global 32 itos_string + argument x + local c + local p + p = &itos_string + p += 30 + :itos_loop + c = x % 10 + c += '0 + *1p = c + x /= 10 + if x == 0 goto itos_loop_end + p -= 1 + goto itos_loop + :itos_loop_end + return p + + +; returns the number at the start of the given string +function stoi + argument s + local p + local n + local c + n = 0 + p = s + :stoi_loop + c = *1p + if c < '0 goto stoi_loop_end + if c > '9 goto stoi_loop_end + n *= 10 + n += c - '0 + p += 1 + goto stoi_loop + :stoi_loop_end + return n + + +function strlen + argument s + local c + local p + p = s + :strlen_loop + c = *1p + if c == 0 goto strlen_loop_end + p += 1 + goto strlen_loop + :strlen_loop_end + return p - s + +function fputs + argument fd + argument s + local length + length = strlen(s) + syscall(1, fd, s, length) + return + +function puts + argument s + fputs(1, s) + return + +function fputn + argument fd + argument n + local s + s = itos(n) + fputs(fd, s) + return + +function exit + argument status_code + syscall(0x3c, status_code) + +function syscall + ; I've done some testing, and this should be okay even if + ; rbp-56 goes beyond the end of the stack. + ; mov rax, [rbp-16] + byte 0x48 + byte 0x8b + byte 0x85 + byte 0xf0 + byte 0xff + byte 0xff + byte 0xff + ; mov rdi, rax + byte 0x48 + byte 0x89 + byte 0xc7 + + ; mov rax, [rbp-24] + byte 0x48 + byte 0x8b + byte 0x85 + byte 0xe8 + byte 0xff + byte 0xff + byte 0xff + ; mov rsi, rax + byte 0x48 + byte 0x89 + byte 0xc6 + + ; mov rax, [rbp-32] + byte 0x48 + byte 0x8b + byte 0x85 + byte 0xe0 + byte 0xff + byte 0xff + byte 0xff + ; mov rdx, rax + byte 0x48 + byte 0x89 + byte 0xc2 + + ; mov rax, [rbp-40] + byte 0x48 + byte 0x8b + byte 0x85 + byte 0xd8 + byte 0xff + byte 0xff + byte 0xff + ; mov r10, rax + byte 0x49 + byte 0x89 + byte 0xc2 + + ; mov rax, [rbp-48] + byte 0x48 + byte 0x8b + byte 0x85 + byte 0xd0 + byte 0xff + byte 0xff + byte 0xff + ; mov r8, rax + byte 0x49 + byte 0x89 + byte 0xc0 + + ; mov rax, [rbp-56] + byte 0x48 + byte 0x8b + byte 0x85 + byte 0xc8 + byte 0xff + byte 0xff + byte 0xff + ; mov r9, rax + byte 0x49 + byte 0x89 + byte 0xc1 + + ; mov rax, [rbp-8] + byte 0x48 + byte 0x8b + byte 0x85 + byte 0xf8 + byte 0xff + byte 0xff + byte 0xff + + ; syscall + byte 0x0f + byte 0x05 + + return diff --git a/04/in03 b/04/in03 new file mode 100644 index 0000000..c2f45ef --- /dev/null +++ b/04/in03 @@ -0,0 +1,2532 @@ +; initialize global_variables_end +C=:global_variables_end +D=:global_variables +8C=D +; initialize static_memory_end +C=:static_memory_end +; 0x100000 = 1MB for code +D=x500000 +8C=D +; initialize labels_end +C=:labels_end +D=:labels +8C=D + +I=8S +A=d2 +?I>A:argv_file_names + ; use default input/output filenames + ; open input file + J=:input_filename + I=d0 + syscall x2 + J=A + ?J<0:input_file_error + ; open output file + J=:output_filename + I=x241 + D=x1ed + syscall x2 + J=A + ?J<0:output_file_error + !:second_pass_starting_point +:argv_file_names + ; open input file + J=S + ; argv[1] is at *(rsp+16) + J+=d16 + J=8J + I=d0 + syscall x2 + J=A + ?J<0:input_file_error + ; open output file + J=S + ; argv[2] is at *(rsp+24) + J+=d24 + J=8J + I=x241 + D=x1ed + syscall x2 + J=A + ?J<0:output_file_error + + +:second_pass_starting_point +; write ELF header +J=d4 +I=:ELF_header +D=x78 +syscall x1 + +:read_line +; increment line number +D=:line_number +C=8D +C+=d1 +8D=C + +; use rbp to store line pointer +R=:line +:read_line_loop + ; read 1 byte into rbp + J=d3 + I=R + D=d1 + syscall x0 + D=A + ?D=0:eof + + ; check if the character was a newline: + C=1R + D=xa + ?C=D:read_line_loop_end + ; check if the character was a tab: + D=x9 + ; if so, don't increment rbp + ?C=D:read_line_loop + ; check if the character was a semicolon: + D='; + ; if so, it's a comment + ?C=D:handle_comment + + R+=d1 + !:read_line_loop + + :handle_comment + ; read out rest of line from file + J=d3 + I=R + D=d1 + syscall x0 + D=A + ?D=0:eof + C=1R + D=xa + ; if we didn't reach the end of the line, keep going + ?C!D:handle_comment + + !:read_line_loop_end +:read_line_loop_end + +; remove whitespace (specifically, ' ' characters) at end of line +I=R +:remove_terminal_whitespace_loop + I-=d1 + C=1I + D=x20 + ?C!D:remove_terminal_whitespace_loop_end + ; replace ' ' with a newline + D=xa + 1I=D + !:remove_terminal_whitespace_loop +:remove_terminal_whitespace_loop_end + +; check if this is a blank line +C=:line +D=1C +C=xa +?C=D:read_line + +C=': +?C=D:handle_label_definition + +I=:line +J=:"global" +C=x20 +call :string= +D=A +?D!0:handle_global + +I=:line +J=:"local" +C=x20 +call :string= +D=A +?D!0:handle_local +; arguments are treated the same as local variables +I=:line +J=:"argument" +C=x20 +call :string= +D=A +?D!0:handle_local + +I=:line +J=:"return" +C=x20 +call :string= +D=A +?D!0:handle_return + +I=:line +J=:"byte" +C=x20 +call :string= +D=A +?D!0:handle_byte + +I=:line +J=:"string" +C=x20 +call :string= +D=A +?D!0:handle_string + +I=:line +J=:"goto" +C=x20 +call :string= +D=A +?D!0:handle_goto + +I=:line +J=:"if" +C=x20 +call :string= +D=A +?D!0:handle_if + +I=:line +J=:"function" +call :string= +D=A +?D!0:handle_function + + +; set delimiter to newline +C=xa + +I=:line +J=:"return\n" +call :string= +D=A +?D!0:handle_return + +; check if this is an assignment +I=:line +:assignment_check_loop + C=1I + D=xa + ?C=D:assignment_check_loop_end + D='= + ?C=D:handle_assignment + I+=d1 + !:assignment_check_loop +:assignment_check_loop_end + +; check if this is a function call (where we discard the return value) +I=:line +; (check for an opening bracket not preceded by a space) +:call_check_loop + C=1I + D=x20 + ?C=D:call_check_loop_end + D=xa + ?C=D:call_check_loop_end + D='( + ?C=D:handle_call + I+=d1 + !:call_check_loop +:call_check_loop_end + +!:bad_statement + +!:read_line + +:eof + C=:second_pass + D=1C + ?D!0:exit_success + ; set 2nd pass to 1 + 1C=d1 + ; make sure output file is large enough for static memory + ; we'll use the ftruncate syscall to set the size of the file + J=d4 + I=:static_memory_end + I=8I + I-=x400000 + syscall x4d + ; seek both files back to start + J=d3 + I=d0 + D=d0 + syscall x8 + J=d4 + I=d0 + D=d0 + syscall x8 + ; set line number to 0 + C=:line_number + 8C=0 + + !:second_pass_starting_point + +:exit_success + J=d0 + syscall x3c + +align +:local_variable_name + reserve d8 + +:handle_byte + I=:line + ; 5 = length of "byte " + I+=d5 + call :read_number + ; make sure byte is 0-255 + C=A + D=xff + ?CaD:bad_byte + ; write byte + I=:byte + 1I=C + J=d4 + D=d1 + syscall x1 + !:read_line +:byte + reserve d1 + +:handle_string + I=:line + ; 7 = length of "string " + I+=d7 + J=I + ; find end of string + :string_loop + C=1J + D=xa + ?C=D:string_loop_end + J+=d1 + !:string_loop + :string_loop_end + ; get length of string + D=J + D-=I + ; output fd + J=d4 + syscall x1 + !:read_line + +:handle_call + J=I + ; just use the rvalue function call code + C=:rvalue + D=:line + 8C=D + I=:line + call :rvalue_function_call + !:read_line + +:handle_local + ; skip ' ' + I+=d1 + + ; store away pointer to variable name + C=:local_variable_name + 8C=I + + ; check if already defined + J=:local_variables + call :ident_lookup + C=A + ?C!0:local_redeclaration + + C=:local_variable_name + I=8C + J=:local_variables_end + J=8J + call :ident_copy + + ; increase stack_end, store it in J + C=:stack_end + D=4C + D+=d8 + 4C=D + 4J=D + J+=d4 + ; store null terminator + 1J=0 + + ; update :local_variables_end + I=:local_variables_end + 8I=J + + ; set rsp appropriately + C=:rbp_offset + J=d0 + J-=D + 4C=J + + J=d4 + I=:lea_rsp_[rbp_offset] + D=d7 + syscall x1 + + + ; read the next line + !:read_line + +:lea_rsp_[rbp_offset] + x48 + x8d + xa5 +:rbp_offset + reserve d4 + +align +:global_start + reserve d8 +:global_variable_name + reserve d8 +:global_variable_size + reserve d8 +:handle_global + ; ignore if this is the second pass + C=:second_pass + C=1C + ?C!0:read_line + + ; skip ' ' + I+=d1 + + C=1I + D='9 + ?C>D:global_default_size + ; read specific size of global + call :read_number + D=A + C=:global_variable_size + 8C=D + ; check and skip space after number + C=1I + D=x20 + ?C!D:bad_number + I+=d1 + !:global_cont + :global_default_size + ; default size = 8 + C=:global_variable_size + D=d8 + 8C=D + :global_cont + + ; store away pointer to variable name + C=:global_variable_name + 8C=I + + ; check if already defined + J=:global_variables + call :ident_lookup + C=A + ?C!0:global_redeclaration + + C=:global_variable_name + I=8C + + J=:global_variables_end + J=8J + call :ident_copy + ; store address + D=:static_memory_end + C=4D + 4J=C + J+=d4 + ; increase static_memory_end by size + D=:global_variable_size + D=8D + C+=D + D=:static_memory_end + 4D=C + ; store null terminator + 1J=0 + ; update :global_variables_end + I=:global_variables_end + 8I=J + ; go read the next line + !:read_line + +:handle_function + I=:line + ; length of "function " + I+=d9 + ; make function name a label + call :add_label + + ; emit prologue + J=d4 + I=:function_prologue + D=d14 + syscall x1 + + ; reset local variable table + D=:local_variables + 1D=0 + C=:local_variables_end + 8C=D + + ; reset stack_end + D=:stack_end + 4D=0 + + ; go read the next line + !:read_line + +:function_prologue + ; sub rsp, 8 + x48 + x81 + xec + x08 + x00 + x00 + x00 + ; mov [rsp], rbp + x48 + x89 + x2c + x24 + ; mov rbp, rsp + R=S + ; total length: 7 + 4 + 3 = 14 bytes + +:function_epilogue + ; mov rsp, rbp + S=R + ; mov rbp, [rsp] + x48 + x8b + x2c + x24 + ; add rsp, 8 + x48 + x81 + xc4 + x08 + x00 + x00 + x00 + ; ret + return + ; total length = 15 bytes + +:handle_label_definition + I=:line + I+=d1 + call :add_label + !:read_line + +align +:label_name + reserve d8 +; add the label in rsi to the label list (with the current pc address) +:add_label + ; ignore if this is the second pass + C=:second_pass + C=1C + ?C!0:return_0 + + C=:label_name + 8C=I + + ; make sure label only has identifier characters + :label_checking_loop + C=1I + D=xa + ?C=D:label_checking_loop_end + I+=d1 + B=C + call :isident + D=A + ?D!0:label_checking_loop + !:bad_label + :label_checking_loop_end + + C=:label_name + I=8C + J=:labels + call :ident_lookup + C=A + ?C!0:label_redefinition + + J=:labels_end + J=8J + C=:label_name + I=8C + call :ident_copy + R=J + + ; figure out where in the file we are (using lseek) + J=d4 + I=d0 + D=d1 + syscall x8 + C=A + C+=x400000 + J=R + ; store address + 4J=C + J+=d4 + + ; update labels_end + C=:labels_end + 8C=J + + return + +:handle_goto + J=d4 + I=:jmp_prefix + D=d1 + syscall x1 + I=:line + ; 5 = length of "goto " + I+=d5 + call :emit_label_jump_address + !:read_line +:jmp_prefix + xe9 + +:handle_if + I=:line + I+=d3 + ; skip term 1 + call :go_to_space + I+=d1 + ; skip operator + call :go_to_space + I+=d1 + ; put second operand in rsi + call :set_rax_to_term + call :set_rsi_to_rax + + + I=:line + ; length of "if " + I+=d3 + ; put first operand in rax + call :set_rax_to_term + ; put second operand in rbx + call :set_rbx_to_rsi + ; emit cmp rax, rbx + J=d4 + I=:cmp_rax_rbx + D=d3 + syscall x1 + + I=:line + I+=d3 + call :go_to_space + I+=d1 + R=I + C=x20 + + I=R + J=:"==" + call :string= + I=A + ?I!0:write_je + + I=R + J=:"!=" + call :string= + I=A + ?I!0:write_jne + + I=R + J=:">" + call :string= + I=A + ?I!0:write_jg + + I=R + J=:"<" + call :string= + I=A + ?I!0:write_jl + + I=R + J=:">=" + call :string= + I=A + ?I!0:write_jge + + I=R + J=:"<=" + call :string= + I=A + ?I!0:write_jle + + I=R + J=:"]" + call :string= + I=A + ?I!0:write_ja + + I=R + J=:"[" + call :string= + I=A + ?I!0:write_jb + + I=R + J=:"]=" + call :string= + I=A + ?I!0:write_jae + + I=R + J=:"[=" + call :string= + I=A + ?I!0:write_jbe + + !:bad_jump + + :write_je + J=d4 + I=:je_prefix + D=d2 + syscall x1 + !:if_continue + + :write_jne + J=d4 + I=:jne_prefix + D=d2 + syscall x1 + !:if_continue + + :write_jl + J=d4 + I=:jl_prefix + D=d2 + syscall x1 + !:if_continue + + :write_jg + J=d4 + I=:jg_prefix + D=d2 + syscall x1 + !:if_continue + + :write_jle + J=d4 + I=:jle_prefix + D=d2 + syscall x1 + !:if_continue + + :write_jge + J=d4 + I=:jge_prefix + D=d2 + syscall x1 + !:if_continue + + :write_jb + J=d4 + I=:jb_prefix + D=d2 + syscall x1 + !:if_continue + + :write_ja + J=d4 + I=:ja_prefix + D=d2 + syscall x1 + !:if_continue + + :write_jbe + J=d4 + I=:jbe_prefix + D=d2 + syscall x1 + !:if_continue + + :write_jae + J=d4 + I=:jae_prefix + D=d2 + syscall x1 + !:if_continue + +:if_continue + I=:line + I+=d3 + ; skip term 1 + call :go_to_space + I+=d1 + ; skip operator + call :go_to_space + I+=d1 + ; skip term 2 + call :go_to_space + I+=d1 + J=:"goto" + C=x20 + call :string= + C=A + ; make sure word after term 2 is "goto" + ?C=0:bad_jump + I+=d1 + call :emit_label_jump_address + !:read_line + +:je_prefix + x0f + x84 +:jne_prefix + x0f + x85 +:jl_prefix + x0f + x8c +:jg_prefix + x0f + x8f +:jle_prefix + x0f + x8e +:jge_prefix + x0f + x8d +:jb_prefix + x0f + x82 +:ja_prefix + x0f + x87 +:jbe_prefix + x0f + x86 +:jae_prefix + x0f + x83 + +:cmp_rax_rbx + x48 + x39 + xd8 + +align +:reladdr + reserve d4 + +; emit relative address (for jumping) of label in rsi +:emit_label_jump_address + ; address doesn't matter for first pass + C=:second_pass + C=1C + ?C=0:jump_ignore_address + ; look up label; store address in rbp + J=:labels + call :ident_lookup + C=A + ?C=0:bad_label + R=4C +:jump_ignore_address + + ; first, figure out current address + J=d4 + I=d0 + D=d1 + syscall x8 + C=A + ; add an additional 4 because the relative address is 4 bytes long + C+=x400004 + + ; compute relative address + D=d0 + D-=C + D+=R + ; store in :reladdr + C=:reladdr + 4C=D + ; output + J=d4 + I=:reladdr + D=d4 + syscall x1 + return + +align +:assignment_type + reserve d8 +:handle_assignment + I-=d1 + C=:assignment_type + 8C=I + + I+=d2 + C=1I + D=x20 + ; check for space after = + ?C!D:bad_assignment + I+=d1 + + ; set rdi to right-hand side of assignment + call :set_rax_to_rvalue + call :set_rdi_to_rax + + J=:assignment_type + J=8J + C=1J + ; put newline after lvalue to make parsing easier + D=xa + 1J=D + D=x20 + ?C=D:handle_assignment_cont + J-=d1 + D=xa + 1J=D + :handle_assignment_cont + D=x20 + ?C=D:handle_plain_assignment + D='+ + ?C=D:handle_+= + D='- + ?C=D:handle_-= + D='* + ?C=D:handle_*= + D='/ + ?C=D:handle_/= + D='% + ?C=D:handle_%= + D='& + ?C=D:handle_&= + D='| + ?C=D:handle_|= + D='^ + ?C=D:handle_^= + D='< + ?C=D:handle_<= + D='> + ?C=D:handle_>= + + !:bad_assignment + +:handle_plain_assignment + I=:line + call :set_lvalue_to_rax + !:read_line + +:handle_+= + I=:line + call :set_rax_to_rvalue + call :set_rbx_to_rdi + call :emit_add_rax_rbx + I=:line + call :set_lvalue_to_rax + !:read_line + +:handle_-= + I=:line + call :set_rax_to_rvalue + call :set_rbx_to_rdi + call :emit_sub_rax_rbx + I=:line + call :set_lvalue_to_rax + !:read_line + +:handle_*= + I=:line + call :set_rax_to_rvalue + call :set_rbx_to_rdi + call :emit_imul_rbx + I=:line + call :set_lvalue_to_rax + !:read_line + +:handle_/= + I=:line + call :set_rax_to_rvalue + call :set_rbx_to_rdi + call :emit_zero_rdx_idiv_rbx + I=:line + call :set_lvalue_to_rax + !:read_line + +:handle_%= + I=:line + call :set_rax_to_rvalue + call :set_rbx_to_rdi + call :emit_zero_rdx_idiv_rbx + call :set_rax_to_rdx + I=:line + call :set_lvalue_to_rax + !:read_line + +:handle_&= + I=:line + call :set_rax_to_rvalue + call :set_rbx_to_rdi + call :emit_and_rax_rbx + I=:line + call :set_lvalue_to_rax + !:read_line + +:handle_|= + I=:line + call :set_rax_to_rvalue + call :set_rbx_to_rdi + call :emit_or_rax_rbx + I=:line + call :set_lvalue_to_rax + !:read_line + +:handle_^= + I=:line + call :set_rax_to_rvalue + call :set_rbx_to_rdi + call :emit_xor_rax_rbx + I=:line + call :set_lvalue_to_rax + !:read_line + +:handle_<= + I=:line + call :set_rax_to_rvalue + call :set_rcx_to_rdi + call :emit_shl_rax_cl + I=:line + call :set_lvalue_to_rax + !:read_line + +:handle_>= + I=:line + call :set_rax_to_rvalue + call :set_rcx_to_rdi + call :emit_shr_rax_cl + I=:line + call :set_lvalue_to_rax + !:read_line + +align +:lvalue + reserve d8 + +; set the lvalue in rsi to +:set_lvalue_to_rax + C=:lvalue + 8C=I + + ; first, store away value in + R=I + call :set_rdi_to_rax + I=R + + C=:lvalue + I=8C + C=1I + D='* + + ?C=D:lvalue_deref + ; not a dereference; just a variable + C=:lvalue + I=8C + call :set_rax_to_address_of_variable + call :set_rbx_to_rax + call :set_rax_to_rdi + call :set_[rbx]_to_rax + return + :lvalue_deref + C=:lvalue + I=8C + I+=d2 + call :set_rax_to_address_of_variable + call :set_rbx_to_rax + call :set_rax_to_[rbx] + call :set_rbx_to_rax + call :set_rax_to_rdi + + C=:lvalue + I=8C + I+=d1 + C=1I + + D='1 + ?C=D:lvalue_deref1 + D='2 + ?C=D:lvalue_deref2 + D='4 + ?C=D:lvalue_deref4 + D='8 + ?C=D:lvalue_deref8 + !:bad_assignment + :lvalue_deref1 + !:set_[rbx]_to_al + :lvalue_deref2 + !:set_[rbx]_to_ax + :lvalue_deref4 + !:set_[rbx]_to_eax + :lvalue_deref8 + !:set_[rbx]_to_rax + +:handle_return + I=:line + ; skip "return" + I+=d6 + C=1I + D=xa + ?C=D:no_return_value + + ; skip ' ' after return + I+=d1 + + call :set_rax_to_rvalue + + :no_return_value + J=d4 + I=:function_epilogue + D=d15 + syscall x1 + + ; go read the next line + !:read_line + +:mov_rsp_rbp + S=R + +:ret + return + +; copy the newline-terminated identifier from rsi to rdi +:ident_copy + C=1I + B=C + call :isident + D=A + ?D=0:bad_identifier + + :ident_loop + C=1I + 1J=C + I+=d1 + J+=d1 + D=xa + ?C=D:ident_loop_end + B=C + call :isident + D=A + ?D=0:bad_identifier + !:ident_loop + :ident_loop_end + return + +align +:ident_lookup_i + reserve d8 + +; look up identifier rsi in list rdi +; returns address of whatever's right after the identifier in the list, or 0 if not found +:ident_lookup + C=:ident_lookup_i + 8C=I + + :ident_lookup_loop + ; check if reached the end of the table + C=1J + ?C=0:return_0 + I=:ident_lookup_i + I=8I + call :ident= + C=A + ; move past terminator of identifier in table + :ident_finish_loop + D=1J + J+=d1 + A=xa + ?D!A:ident_finish_loop + ; check if this was it + ?C!0:return_J + ; nope. keep going + ; skip over address: + J+=d4 + !:ident_lookup_loop + +; can the character in rbx appear in an identifier? +:isident + A='0 + ?BA:return_1 + A='_ + ?B=A:return_1 + !:return_0 + +; set to the term in rsi +:set_rax_to_term + R=I + + C=1I + D='' + ?C=D:term_number + D='. + ?C=D:term_label + D=d58 + ?C to the variable in rsi +:set_rax_to_variable + ; variable + call :set_rax_to_address_of_variable + call :set_rbx_to_rax + call :set_rax_to_[rbx] + return + +:term_label + C=:second_pass + C=1C + ; skip looking up label on first pass; just use whatever's in rsi + ?C=0:set_rax_to_immediate + ; move past . + I+=d1 + J=:labels + call :ident_lookup + C=A + ?C=0:bad_label + ; set rax to label value + I=4C + !:set_rax_to_immediate + +align +:rvalue + reserve d8 + +; set to the rvalue in rsi +:set_rax_to_rvalue + ; store pointer to rvalue + C=:rvalue + 8C=I + + C=1I + D='& + ?C=D:rvalue_addressof + + D='~ + ?C=D:rvalue_bitwise_not + + D='* + ?C=D:rvalue_dereference + + J=I + :rvalue_loop + C=1J + D='( + ?C=D:rvalue_function_call + D=x20 + ?C=D:rvalue_binary_op + D=xa + ; no space or opening bracket; this must be a term + ?C=D:set_rax_to_term + J+=d1 + !:rvalue_loop + +align +:rvalue_function_arg + reserve d8 +:rvalue_function_arg_offset + reserve d4 + +:rvalue_function_call + I=J + I+=d1 + C=1I + D=') + ?C=D:function_call_no_arguments + + C=:rvalue_function_arg_offset + ; set arg offset to -16 (to skip over stack space for return address and rbp) + D=xfffffffffffffff0 + 4C=D + + :rvalue_function_loop + C=:rvalue_function_arg + 8C=I + ; set to argument + call :set_rax_to_term + ; set <[rsp-arg_offset]> to rax + ; first, output prefix + J=d4 + I=:mov_[rsp_offset]_rax_prefix + D=d4 + syscall x1 + ; now decrement offset, and output it + I=:rvalue_function_arg_offset + C=4I + C-=d8 + 4I=C + J=d4 + D=d4 + syscall x1 + + C=:rvalue_function_arg + I=8C + ; skip over argument + :rvalue_function_arg_loop + C=1I + D=', + ?C=D:rvalue_function_next_arg + D=') + ?C=D:rvalue_function_loop_end + D=xa + ; no closing bracket + ?C=D:bad_call + I+=d1 + !:rvalue_function_arg_loop + :rvalue_function_next_arg + ; skip comma + I+=d1 + C=1I + D=x20 + ; make sure there's a space after the comma + ?C!D:bad_call + ; skip space + I+=d1 + + ; handle the next argument + !:rvalue_function_loop + :rvalue_function_loop_end + :function_call_no_arguments + + I+=d1 + C=1I + D=xa + ; make sure there's nothing after the closing bracket + ?C!D:bad_term + + C=:second_pass + C=1C + ?C=0:ignore_function_address + ; look up function name + I=:rvalue + I=8I + J=:labels + call :ident_lookup + C=A + ?C=0:bad_function + ; read address + I=4C + :ignore_function_address + call :set_rax_to_immediate + ; write call rax + J=d4 + I=:call_rax + D=d2 + syscall x1 + ; we're done! + + return + +:mov_[rsp_offset]_rax_prefix + x48 + x89 + x84 + x24 + +:call_rax + xff + xd0 + +:binary_op + reserve d1 +:rvalue_binary_op + ; move past ' ' + J+=d1 + ; store binary op + D=1J + C=:binary_op + 1C=D + + ; make sure space follows operator + J+=d1 + C=1J + D=x20 + ?C!D:bad_term + ; set rsi to second operand + J+=d1 + I=J + call :set_rax_to_term + call :set_rsi_to_rax + + ; now set rax to first operand + I=:rvalue + I=8I + call :set_rax_to_term + + ; and combine + C=:binary_op + C=1C + + D='+ + ?C=D:rvalue_add + + D='- + ?C=D:rvalue_sub + + D='* + ?C=D:rvalue_mul + + D='/ + ?C=D:rvalue_div + + D='% + ?C=D:rvalue_rem + + D='& + ?C=D:rvalue_and + + D='| + ?C=D:rvalue_or + + D='^ + ?C=D:rvalue_xor + + D='< + ?C=D:rvalue_shl + + D='> + ?C=D:rvalue_shr + + !:bad_term + +:rvalue_add + call :set_rbx_to_rsi + !:emit_add_rax_rbx + +:rvalue_sub + call :set_rbx_to_rsi + !:emit_sub_rax_rbx + +:rvalue_mul + call :set_rbx_to_rsi + !:emit_imul_rbx + +:rvalue_div + call :set_rbx_to_rsi + !:emit_zero_rdx_idiv_rbx + +:rvalue_rem + call :set_rbx_to_rsi + call :emit_zero_rdx_idiv_rbx + call :set_rax_to_rdx + return + +:rvalue_and + call :set_rbx_to_rsi + !:emit_and_rax_rbx + +:rvalue_or + call :set_rbx_to_rsi + !:emit_or_rax_rbx + +:rvalue_xor + call :set_rbx_to_rsi + !:emit_xor_rax_rbx + +:rvalue_shl + call :set_rcx_to_rsi + !:emit_shl_rax_cl + +:rvalue_shr + call :set_rcx_to_rsi + !:emit_shr_rax_cl + +:rvalue_addressof + I+=d1 + !:set_rax_to_address_of_variable + +:rvalue_bitwise_not + I+=d1 + call :set_rax_to_term + J=d4 + I=:not_rax + D=d3 + syscall x1 + return +:not_rax + x48 + xf7 + xd0 + +:rvalue_dereference_size + reserve d1 + +:rvalue_dereference + I+=d1 + D=1I + C=:rvalue_dereference_size + 1C=D + I+=d1 + call :set_rax_to_variable + call :set_rbx_to_rax + call :zero_rax + C=:rvalue_dereference_size + C=1C + + D='1 + ?C=D:set_al_to_[rbx] + D='2 + ?C=D:set_ax_to_[rbx] + D='4 + ?C=D:set_eax_to_[rbx] + D='8 + ?C=D:set_rax_to_[rbx] + + !:bad_term + + +; set to address of variable in rsi +:set_rax_to_address_of_variable + J=:local_variables + call :ident_lookup + C=A + ?C=0:try_global + ; it's a local variable + ; read the offset from + D=4C + ; put negated offset in rbp + R=d0 + R-=D + + ; lea rax, [rbp+ + J=d4 + I=:lea_rax_rbp_offset_prefix + D=d3 + syscall x1 + + ; offset] + J=d4 + I=:imm64 + 4I=R + D=d4 + syscall x1 + + return + :try_global + J=:global_variables + call :ident_lookup + C=A + ?C=0:bad_variable + ; it's a global variable + ; get its address + C=4C + + ; put address in rax + I=C + !:set_rax_to_immediate + +:number_is_negative + reserve d1 + +:term_number + call :read_number + I=A + !:set_rax_to_immediate + +; set rax to the number in the string at rsi +:read_number + C=1I + D='' + ?C=D:read_char + D='- + ; set rdx to 0 if number is positive, 1 if negative + ?C=D:read_number_negative + D=d0 + !:read_number_cont + :read_number_negative + D=d1 + I+=d1 + :read_number_cont + ; store away negativity + C=:number_is_negative + 1C=D + ; check if number starts with 0-9 + C=1I + D='9 + ?C>D:bad_number + D='0 + ?CD:hex_number_loop_end + ; one of the digits a-f + D=xffffffffffffffa9 + !:hex_number_digit + :hex_number_0123456789 + D=xffffffffffffffd0 + :hex_number_digit + C+=D + ; shift left by 4 + R<=d4 + ; add digit + R+=C + I+=d1 + !:hex_number_loop + :hex_number_loop_end + !:read_number_output + +:read_number_output + ; first, make sure number is followed by space/newline/appropriate punctuation + C=1I + D=x20 + ?C=D:read_number_valid + D=', + ?C=D:read_number_valid + D=') + ?C=D:read_number_valid + D=xa + ?C=D:read_number_valid + !:bad_number +:read_number_valid + ; we now have the *unsigned* number in rbp. take the sign into consideration + C=:number_is_negative + D=1C + ?D=0:number_not_negative + ; R = -R + C=R + R=d0 + R-=C + :number_not_negative + ; finally, return + A=R + return + + + +; set to the immediate in rsi. +:set_rax_to_immediate + C=:imm64 + 8C=I + + ; write prefix + J=d4 + D=d2 + I=:mov_rax_imm64_prefix + syscall x1 + + ; write immediate + J=d4 + D=d8 + I=:imm64 + syscall x1 + return + +:zero_rax + J=d4 + I=:xor_eax_eax + D=d2 + syscall x1 + return +:xor_eax_eax + x31 + xc0 + +:zero_rdx + J=d4 + I=:xor_edx_edx + D=d2 + syscall x1 + return +:xor_edx_edx + x31 + xd2 + +:set_rbx_to_rax + J=d4 + I=:mov_rbx_rax + D=d3 + syscall x1 + return +:mov_rbx_rax + B=A + +:set_rbx_to_rsi + J=d4 + I=:mov_rbx_rsi + D=d3 + syscall x1 + return +:mov_rbx_rsi + B=I + +:set_rbx_to_rdi + J=d4 + I=:mov_rbx_rdi + D=d3 + syscall x1 + return +:mov_rbx_rdi + B=J + +:set_rcx_to_rsi + J=d4 + I=:mov_rcx_rsi + D=d3 + syscall x1 + return +:mov_rcx_rsi + C=I + +:set_rcx_to_rdi + J=d4 + I=:mov_rcx_rdi + D=d3 + syscall x1 + return +:mov_rcx_rdi + C=J + +:set_rax_to_rdx + J=d4 + I=:mov_rax_rdx + D=d3 + syscall x1 + return +:mov_rax_rdx + A=D + +:set_rax_to_rdi + J=d4 + I=:mov_rax_rdi + D=d3 + syscall x1 + return +:mov_rax_rdi + A=J + +:set_rsi_to_rax + J=d4 + I=:mov_rsi_rax + D=d3 + syscall x1 + return +:mov_rsi_rax + I=A + +:set_rdi_to_rax + J=d4 + I=:mov_rdi_rax + D=d3 + syscall x1 + return +:mov_rdi_rax + J=A + +:set_rax_to_[rbx] + J=d4 + I=:mov_rax_[rbx] + D=d3 + syscall x1 + return +:mov_rax_[rbx] + x48 + x8b + x03 + +:set_eax_to_[rbx] + J=d4 + I=:mov_eax_[rbx] + D=d2 + syscall x1 + return +:mov_eax_[rbx] + x8b + x03 + +:set_ax_to_[rbx] + J=d4 + I=:mov_ax_[rbx] + D=d3 + syscall x1 + return +:mov_ax_[rbx] + x66 + x8b + x03 + +:set_al_to_[rbx] + J=d4 + I=:mov_al_[rbx] + D=d2 + syscall x1 + return +:mov_al_[rbx] + x8a + x03 + + +:set_[rbx]_to_rax + J=d4 + I=:mov_[rbx]_rax + D=d3 + syscall x1 + return +:mov_[rbx]_rax + x48 + x89 + x03 + +:set_[rbx]_to_eax + J=d4 + I=:mov_[rbx]_eax + D=d2 + syscall x1 + return +:mov_[rbx]_eax + x89 + x03 + +:set_[rbx]_to_ax + J=d4 + I=:mov_[rbx]_ax + D=d3 + syscall x1 + return +:mov_[rbx]_ax + x66 + x89 + x03 + +:set_[rbx]_to_al + J=d4 + I=:mov_[rbx]_al + D=d2 + syscall x1 + return +:mov_[rbx]_al + x88 + x03 + + +:mov_rax_imm64_prefix + x48 + xb8 + +:emit_add_rax_rbx + J=d4 + I=:add_rax_rbx + D=d3 + syscall x1 + return +:add_rax_rbx + x48 + x01 + xd8 + +:emit_sub_rax_rbx + J=d4 + I=:sub_rax_rbx + D=d3 + syscall x1 + return +:sub_rax_rbx + x48 + x29 + xd8 + +:emit_and_rax_rbx + J=d4 + I=:and_rax_rbx + D=d3 + syscall x1 + return +:and_rax_rbx + x48 + x21 + xd8 + +:emit_or_rax_rbx + J=d4 + I=:or_rax_rbx + D=d3 + syscall x1 + return +:or_rax_rbx + x48 + x09 + xd8 + +:emit_xor_rax_rbx + J=d4 + I=:xor_rax_rbx + D=d3 + syscall x1 + return +:xor_rax_rbx + x48 + x31 + xd8 + +:emit_shl_rax_cl + J=d4 + I=:shl_rax_cl + D=d3 + syscall x1 + return +:shl_rax_cl + x48 + xd3 + xe0 + +:emit_shr_rax_cl + J=d4 + I=:shr_rax_cl + D=d3 + syscall x1 + return +:shr_rax_cl + x48 + xd3 + xe8 + +:emit_imul_rbx + J=d4 + I=:imul_rbx + D=d3 + syscall x1 + return +:imul_rbx + x48 + xf7 + xeb + +:emit_zero_rdx_idiv_rbx + call :zero_rdx + J=d4 + I=:idiv_rbx + D=d3 + syscall x1 + return +:idiv_rbx + x48 + xf7 + xfb + +align +:imm64 + reserve d8 + +; prefix for lea rax, [rbp+IMM32] +:lea_rax_rbp_offset_prefix + x48 + x8d + x85 + +:input_filename + str in04 + x0 + +:output_filename + str out04 + x0 + +:input_file_error + B=:input_file_error_message + !:general_error + +:input_file_error_message + str Couldn't open input file. + xa + x0 + +:output_file_error + B=:output_file_error_message + !:general_error + +:output_file_error_message + str Couldn't open output file. + xa + x0 + +:bad_identifier + B=:bad_identifier_error_message + !:program_error + +:bad_identifier_error_message + str Bad identifier. + xa + x0 + +:bad_label + B=:bad_label_error_message + !:program_error + +:bad_label_error_message + str Bad label. + xa + x0 + +:bad_variable + B=:bad_variable_error_message + !:program_error + +:bad_variable_error_message + str No such variable. + xa + x0 + +:bad_function + B=:bad_function_error_message + !:program_error + +:bad_function_error_message + str No such function. + xa + x0 + +:bad_byte + B=:bad_byte_error_message + !:program_error + +:bad_byte_error_message + str Byte not in range 0-255. + xa + x0 + +:bad_number + B=:bad_number_error_message + !:program_error + +:bad_number_error_message + str Bad number. + xa + x0 + +:bad_assignment + B=:bad_assignment_error_message + !:program_error + +:bad_assignment_error_message + str Bad assignment. + xa + x0 + +:bad_term + B=:bad_term_error_message + !:program_error + +:bad_term_error_message + str Bad term. + xa + x0 + +:bad_statement + B=:bad_statement_error_message + !:program_error + +:bad_statement_error_message + str Bad statement. + xa + x0 + +:bad_jump + B=:bad_jump_error_message + !:program_error + +:bad_jump_error_message + str Bad jump. + xa + x0 + +:bad_call + B=:bad_call_error_message + !:program_error + +:bad_call_error_message + str Bad function call. + xa + x0 + +:label_redefinition + B=:label_redefinition_error_message + !:program_error + +:label_redefinition_error_message + str Label redefinition. + xa + x0 + +:global_redeclaration + B=:global_redeclaration_error_message + !:program_error + +:global_redeclaration_error_message + str Global variable declared twice. + xa + x0 + +:local_redeclaration + B=:local_redeclaration_error_message + !:program_error + +:local_redeclaration_error_message + str Local variable declared twice. + xa + x0 + +:general_error + call :eputs + J=d1 + syscall x3c + +:program_error + R=B + + B=:"Line" + call :eputs + + D=:line_number + D=8D + B=D + call :eputn + + B=:line_number_separator + call :eputs + + B=R + call :eputs + J=d1 + syscall x3c + +:"Line" + str Line + x20 + x0 + +:line_number_separator + str : + x20 + x0 + +:strlen + I=B + D=B + :strlen_loop + C=1I + ?C=0:strlen_ret + I+=d1 + !:strlen_loop + :strlen_ret + I-=D + A=I + return + +; check if strings in rdi and rsi are equal, up to terminator in rcx +:string= + D=1I + A=1J + ?D!A:return_0 + ?D=C:return_1 + I+=d1 + J+=d1 + !:string= + +; check if strings in rdi and rsi are equal, up to the first non-identifier character +:ident= + D=1I + B=D + call :isident + ; I ended + ?A=0:ident=_I_end + + D=1J + B=D + call :isident + ; J ended, but I didn't + ?A=0:return_0 + + ; we haven't reached the end of either + D=1I + A=1J + ?D!A:return_0 + I+=d1 + J+=d1 + !:ident= +:ident=_I_end + D=1J + B=D + call :isident + ; check if J also ended + ?A=0:return_1 + ; J didn't end + !:return_0 + +:return_0 + A=d0 + return +:return_1 + A=d1 + return +:return_2 + A=d2 + return +:return_3 + A=d3 + return +:return_4 + A=d4 + return +:return_5 + A=d5 + return +:return_6 + A=d6 + return +:return_7 + A=d7 + return +:return_8 + A=d8 + return +:return_J + A=J + return + +; write the character in rbx to the file in rdi. +:fputc + C=B + I=S + I-=d1 + 1I=C + D=d1 + syscall x1 + return + +; write the string in rbx to stderr +:eputs + J=B + call :strlen + D=A + I=J + J=d2 + syscall x1 + return + +; write rbx in decimal to stderr +:eputn + I=B + J=S + J-=d1 + :eputn_loop + D=d0 + ; divide by 10 + B=d10 + A=I + div + ; quotient is new number + I=A + ; add remainder to string + D+='0 + 1J=D + J-=d1 + ?I!0:eputn_loop + J+=d1 + D=S + D-=J + I=J + J=d2 + syscall x1 + return + +; copy rdx bytes from rsi to rdi. +; this copies from the left: if you're doing an overlapped copy, rsi should be greater than rdi +:memcpy + ?D=0:return_0 + A=1I + 1J=A + I+=d1 + J+=d1 + D-=d1 + !:memcpy + +; copy from rdi to rsi, until byte cl is reached +:memccpy + D=1I + 1J=D + I+=d1 + J+=d1 + ?D!C:memccpy + return + +; advance rsi to the next space or newline character +:go_to_space + C=1I + D=xa + ?C=D:return_0 + D=x20 + ?C=D:return_0 + I+=d1 + !:go_to_space + +:"global" + str global + x20 +:"argument" + str argument + x20 +:"local" + str local + x20 +:"return" + str return + x20 +:"return\n" + str return + xa +:"byte" + str byte + x20 +:"string" + str string + x20 +:"goto" + str goto + x20 +:"if" + str if + x20 +:"function" + str function + x20 +:"==" + str == + x20 +:"!=" + str != + x20 +:">" + str > + x20 +:"<" + str < + x20 +:"<=" + str <= + x20 +:">=" + str >= + x20 +:"[" + str [ + x20 +:"]" + str ] + x20 +:"[=" + str [= + x20 +:"]=" + str ]= + x20 + +:zero + x0 + +; put a 0 byte before the line (this is important for removing whitespace at the end of the line, +; specifically, we don't want this to be a space character) +x0 +:line + reserve d1000 + +align +:global_variables_end + reserve d8 +:static_memory_end + reserve d8 +:local_variables_end + reserve d8 +:stack_end + reserve d8 +:labels_end + reserve d8 +:line_number + reserve d8 +:global_variables + reserve d50000 +:local_variables + reserve d20000 +:labels + reserve d200000 +:second_pass + reserve d1 + +:ELF_header +x7f +x45 +x4c +x46 +x02 +x01 +x01 + +reserve d9 + +x02 +x00 + +x3e +x00 + +x01 +x00 +x00 +x00 + +x78 +x00 +x40 +x00 +x00 +x00 +x00 +x00 + +x40 +x00 +x00 +x00 +x00 +x00 +x00 +x00 + +reserve d12 + +x40 +x00 +x38 +x00 +x01 +x00 +x00 +x00 +x00 +x00 +x00 +x00 + +x01 +x00 +x00 +x00 + +x07 +x00 +x00 +x00 + +x78 +x00 +x00 +x00 +x00 +x00 +x00 +x00 + +x78 +x00 +x40 +x00 +x00 +x00 +x00 +x00 + +reserve d8 + +x00 +x00 +x20 +x00 +x00 +x00 +x00 +x00 + +x00 +x00 +x20 +x00 +x00 +x00 +x00 +x00 + +x00 +x10 +x00 +x00 +x00 +x00 +x00 +x00 + +; NOTE: we shouldn't end the file with a reserve; we don't handle that properly diff --git a/04/in04 b/04/in04 new file mode 100644 index 0000000..2b85900 --- /dev/null +++ b/04/in04 @@ -0,0 +1,133 @@ +main() + +function main + puts(.str_hello_world) + putc(10) ; newline + syscall(0x3c, 0) + +:str_hello_world + string Hello, world! + byte 0 + +function strlen + argument s + local c + local p + p = s + :strlen_loop + c = *1p + if c == 0 goto strlen_loop_end + p += 1 + goto strlen_loop + :strlen_loop_end + return p - s + +function putc + argument c + local p + p = &c + syscall(1, 1, p, 1) + return + +function puts + argument s + local len + len = strlen(s) + syscall(1, 1, s, len) + return + +function syscall + ; I've done some testing, and this should be okay even if + ; rbp-56 goes beyond the end of the stack. + ; mov rax, [rbp-16] + byte 0x48 + byte 0x8b + byte 0x85 + byte 0xf0 + byte 0xff + byte 0xff + byte 0xff + ; mov rdi, rax + byte 0x48 + byte 0x89 + byte 0xc7 + + ; mov rax, [rbp-24] + byte 0x48 + byte 0x8b + byte 0x85 + byte 0xe8 + byte 0xff + byte 0xff + byte 0xff + ; mov rsi, rax + byte 0x48 + byte 0x89 + byte 0xc6 + + ; mov rax, [rbp-32] + byte 0x48 + byte 0x8b + byte 0x85 + byte 0xe0 + byte 0xff + byte 0xff + byte 0xff + ; mov rdx, rax + byte 0x48 + byte 0x89 + byte 0xc2 + + ; mov rax, [rbp-40] + byte 0x48 + byte 0x8b + byte 0x85 + byte 0xd8 + byte 0xff + byte 0xff + byte 0xff + ; mov r10, rax + byte 0x49 + byte 0x89 + byte 0xc2 + + ; mov rax, [rbp-48] + byte 0x48 + byte 0x8b + byte 0x85 + byte 0xd0 + byte 0xff + byte 0xff + byte 0xff + ; mov r8, rax + byte 0x49 + byte 0x89 + byte 0xc0 + + ; mov rax, [rbp-56] + byte 0x48 + byte 0x8b + byte 0x85 + byte 0xc8 + byte 0xff + byte 0xff + byte 0xff + ; mov r9, rax + byte 0x49 + byte 0x89 + byte 0xc1 + + ; mov rax, [rbp-8] + byte 0x48 + byte 0x8b + byte 0x85 + byte 0xf8 + byte 0xff + byte 0xff + byte 0xff + + ; syscall + byte 0x0f + byte 0x05 + + return diff --git a/04b/Makefile b/04b/Makefile deleted file mode 100644 index ef72181..0000000 --- a/04b/Makefile +++ /dev/null @@ -1,11 +0,0 @@ -all: out03 guessing_game.out out04b README.html -out03: in03 ../03/out02 - ../03/out02 -%.html: %.md ../markdown - ../markdown $< -out04b: in04b out03 - ./out03 -%.out: % out03 - ./out03 $< $@ -clean: - rm -f out* README.html *.out diff --git a/04b/README.md b/04b/README.md deleted file mode 100644 index f131943..0000000 --- a/04b/README.md +++ /dev/null @@ -1,240 +0,0 @@ -# stage 04 - -As usual, the source for this compiler is `in03`, an input to the [previous compiler](../03/README.md). -`in04b` contains a hello world program written in the stage 4 language. -Here is the core of the program: - -``` -main() - -function main - puts(.str_hello_world) - putc(10) ; newline - syscall(0x3c, 0) -``` - -As you can see, we can now pass arguments to functions. And let's take a look at `putc`: - -``` -function putc - argument c - local p - p = &c - syscall(1, 1, p, 1) - return -``` - -It's so simple compared to previous languages! Rather than mess around with registers, we can now -declare local (and global) variables, and use them directly. These variables will be placed on the -stack. Since arguments are also placed on the stack, -by implementing local variables we get arguments for free. There is no difference -between the `local` and `argument` keywords in this language other than spelling. -In fact, the number of agruments to a function call is not checked against -how many arguments the function has. This does make it easy to screw things up by calling a function -with the wrong number of arguments, but it also means that we can provide a variable number of arguments -to the `syscall` function. Speaking of which, if you look at the bottom of `in04b`, you'll see: - -``` -function syscall - ... - byte 0x48 - byte 0x8b - byte 0x85 - byte 0xf0 - byte 0xff - byte 0xff - byte 0xff - ... -``` - -Originally I was going to make `syscall` a built-in feature of the language, but then I realized that wasn't -necessary. -Instead, `syscall` is a function written manually in machine language. -We can take a look at its decompilation to make things clearer: - -``` -mov rax,[rbp-0x10] -mov rdi,rax -mov rax,[rbp-0x18] -mov rsi,rax -mov rax,[rbp-0x20] -mov rdx,rax -mov rax,[rbp-0x28] -mov r10,rax -mov rax,[rbp-0x30] -mov r8,rax -mov rax,[rbp-0x38] -mov r9,rax -mov rax,[rbp-0x8] -syscall -``` - -This just sets `rax`, `rdi`, `rsi`, etc. to the arguments the function was called with, -and then does a syscall. - -## functions and local variables - -In this language, function arguments are placed onto the stack from left to right -and all arguments and local variables are 8 bytes. -As a reminder, -the stack is just an area of memory which is automatically extended downwards (on x86-64, at least). -So, how do we keep track of the location of local variables in the stack? We could do something like -this: - -``` -sub rsp, 24 ; make room for 3 variables -mov [rsp], 10 ; variable1 = 10 -mov [rsp+8], 20 ; variable2 = 20 -mov [rsp+16], 30 ; variable3 = 30 -; ... -add rsp, 24 ; reset rsp -``` - -But now suppose that in the middle of the `; ...` code we want another local variable: -``` -sub rsp, 8 ; make room for another variable -``` -well, since we've changed `rsp`, `variable1` is now at `rsp+8` instead of `rsp`, -`variable2` is at `rsp+16` instead of `rsp+8`, and -`variable3` is at `rsp+24` instead of `rsp+16`. -Also, we had better make sure we increment `rsp` by `32` now instead of `24` -to put it back in the right place. -It would be annoying (but by no means impossible) to keep track of all this. -We could just declare all local variables at the start of the function, -but that makes the language more annoying to use. - -Instead, we can use the `rbp` register to keep track of what `rsp` was -at the start of the function: - -``` -; save old value of rbp -sub rsp, 8 -mov [rsp], rbp -; set rbp to initial value of rsp -mov rbp, rsp - -lea rsp, [rbp-8] ; add variable1 (this instruction sets rsp to rbp-8) -mov [rbp-8], 10 ; variable1 = 10 -lea rsp, [rbp-16] ; add variable2 -mov [rbp-16], 20 ; variable2 = 20 -lea rsp, [rbp-24] ; add variable3 -mov [rbp-24], 30 ; variable3 = 30 -; Note that variable1's address is still rbp-8; adding more variables didn't affect it. -; ... - -; restore old values of rbp and rsp -mov rsp, rbp -mov rbp, [rsp] -add rsp, 8 -``` - -This is actually the intended use of `rbp` (it *p*oints to the *b*ase of the stack frame). -Note that setting `rsp` very specifically rather than just doing `sub rsp, 8` is important: -if we skip over some code with a local variable declaration, or execute a local declaration twice, -we want `rsp` to be in the right place. -The first three and last three instructions above are called the function *prologue* and *epilogue*. -They are all the same for all functions; a prologue is generated at the start of every function, -and an epilogue is generated for every return statement. -The return value is placed in `rax`. - -## global variables - -Global variables are much simpler than local ones. The variable `:static_memory_end` in the compiler -keeps track of where to put the next global variable in memory. It is initialized at address `0x440000`, -which gives us 256KB for code (and strings). When a global variable is added, `:static_memory_end` is increased -by its size. - -## language description - -Comments begin with `;` and may be put at the end of lines -with or without code. -Blank lines are ignored. - -To make the compiler simpler, this language doesn't support fancy -expressions like `2 * (3 + 5) / 6`. There is a limited set of possible -expressions, specifically there are *terms* and *r-values*. - -But first, each program is made up of a series of statements, and -each statement is one of the following: -- `global {name}` or `global {size} {name}` - declare a global variable with the given size, or 8 bytes if none is provided. -- `local {name}` - declare a local variable -- `argument {name}` - declare a function argument. this is functionally equivalent to `local`, so it just exists for readability. -- `function {name}` - declare a function -- `:{name}` - declare a label -- `goto {label}` - jump to the specified label -- `if {term} {operator} {term} goto {label}` - -conditionally jump to the specified label. `{operator}` should be one of -`==`, `<`, `>`, `>=`, `<=`, `!=`, `[`, `]`, `[=`, `]=` -(the last four do unsigned comparisons). -- `{lvalue} = {rvalue}` - set `lvalue` to `rvalue` -- `{lvalue} += {rvalue}` - add `rvalue` to `lvalue` -- `{lvalue} -= {rvalue}` - etc. -- `{lvalue} *= {rvalue}` -- `{lvalue} /= {rvalue}` -- `{lvalue} %= {rvalue}` -- `{lvalue} &= {rvalue}` -- `{lvalue} |= {rvalue}` -- `{lvalue} ^= {rvalue}` -- `{lvalue} <= {rvalue}` - left shift `lvalue` by `rvalue` -- `{lvalue} >= {rvalue}` - right shift `lvalue` by `rvalue` -- `{function}({term}, {term}, ...)` - function call, ignoring the return value -- `return {rvalue}` -- `string {str}` - places a literal string in the code -- `byte {number}` - places a literal byte in the code - -Now let's get down into the weeds: - -A a *number* is one of: -- `{decimal number}` - e.g. `108` (note: there's no `d` prefix anymore) -- `0x{hexadecimal number}` - e.g. `0x2f` for 47 -- `'{character}` - e.g. `'a` for 97 (the character code for `a`) - -A *term* is one of: -- `{variable name}` - the value of a (local or global) variable -- `.{label name}` - the address of a label -- `{number}` - -An *lvalue* is the left-hand side of an assignment expression, -and it is one of: -- `{variable}` -- `*1{variable}` - dereference 1 byte -- `*2{variable}` - dereference 2 bytes -- `*4{variable}` - dereference 4 bytes -- `*8{variable}` - dereference 8 bytes - -An *rvalue* is an expression, which can be more complicated than a term. -rvalues are one of: -- `{term}` -- `&{variable}` - address of variable -- `*1{variable}` / `*2{variable}` / `*4{variable}` / `*8{variable}` - dereference 1, 2, 4, or 8 bytes -- `~{term}` - bitwise not -- `{function}({term}, {term}, ...)` -- `{term} + {term}` -- `{term} - {term}` -- `{term} * {term}` -- `{term} / {term}` -- `{term} % {term}` -- `{term} & {term}` -- `{term} | {term}` -- `{term} ^ {term}` -- `{term} < {term}` - left shift -- `{term} > {term}` - right shift - -That's quite a lot of stuff, and it makes for a pretty powerful -language, all things considered. To test out the language, -in addition to the hello world program, I also wrote a little -guessing game, which you can find in the file `guessing_game`. -It ended up being quite nice to write! - -## limitations - -Variables in this language do not have types. This makes it very easy to make mistakes like -treating numbers as pointers or vice versa. - -A big annoyance with this language is the lack of local label names. Due to the limited nature -of branching in this language (`if ... goto ...` stands in for `if`, `else if`, `while`, etc.), -you need to use a lot of labels, and that means their names can get quite long. But at least unlike -the 03 language, you'll get an error if you use the same label name twice! - -Overall, though, this language ended up being surprisingly powerful. With any luck, the next stage will -finally be a C compiler... diff --git a/04b/guessing_game b/04b/guessing_game deleted file mode 100644 index 415851d..0000000 --- a/04b/guessing_game +++ /dev/null @@ -1,238 +0,0 @@ -global 0x1000 exit_code -global y -y = 4 -exit_code = main() -exit(exit_code) - -function main - local secret_number - local guess - global 32 input_line - local p_line - p_line = &input_line - secret_number = getrand(100) - fputs(1, .str_intro) - - :guess_loop - fputs(1, .str_guess) - syscall(0, 0, p_line, 30) - guess = stoi(p_line) - if guess < secret_number goto too_low - if guess > secret_number goto too_high - fputs(1, .str_got_it) - return 0 - :too_low - fputs(1, .str_too_low) - goto guess_loop - :too_high - fputs(1, .str_too_high) - goto guess_loop - -:str_intro - string I'm thinking of a number. - byte 10 - byte 0 - -:str_guess - string Guess what it is: - byte 32 - byte 0 - -:str_got_it - string You got it! - byte 10 - byte 0 - -:str_too_low - string Too low! - byte 10 - byte 0 - -:str_too_high - string Too high! - byte 10 - byte 0 - -; get a "random" number from 0 to x using the system clock -function getrand - argument x - global 16 getrand_time - local ptime - local n - - ptime = &getrand_time - syscall(228, 1, ptime) - ptime += 8 ; nanoseconds at offset 8 in struct timespec - n = *4ptime - n %= x - return n - -; returns a pointer to a null-terminated string containing the number given -function itos - global 32 itos_string - argument x - local c - local p - p = &itos_string - p += 30 - :itos_loop - c = x % 10 - c += '0 - *1p = c - x /= 10 - if x == 0 goto itos_loop_end - p -= 1 - goto itos_loop - :itos_loop_end - return p - - -; returns the number at the start of the given string -function stoi - argument s - local p - local n - local c - n = 0 - p = s - :stoi_loop - c = *1p - if c < '0 goto stoi_loop_end - if c > '9 goto stoi_loop_end - n *= 10 - n += c - '0 - p += 1 - goto stoi_loop - :stoi_loop_end - return n - - -function strlen - argument s - local c - local p - p = s - :strlen_loop - c = *1p - if c == 0 goto strlen_loop_end - p += 1 - goto strlen_loop - :strlen_loop_end - return p - s - -function fputs - argument fd - argument s - local length - length = strlen(s) - syscall(1, fd, s, length) - return - - -function fputn - argument fd - argument n - local s - s = itos(n) - fputs(fd, s) - return - -function exit - argument status_code - syscall(0x3c, status_code) - -function syscall - ; I've done some testing, and this should be okay even if - ; rbp-56 goes beyond the end of the stack. - ; mov rax, [rbp-16] - byte 0x48 - byte 0x8b - byte 0x85 - byte 0xf0 - byte 0xff - byte 0xff - byte 0xff - ; mov rdi, rax - byte 0x48 - byte 0x89 - byte 0xc7 - - ; mov rax, [rbp-24] - byte 0x48 - byte 0x8b - byte 0x85 - byte 0xe8 - byte 0xff - byte 0xff - byte 0xff - ; mov rsi, rax - byte 0x48 - byte 0x89 - byte 0xc6 - - ; mov rax, [rbp-32] - byte 0x48 - byte 0x8b - byte 0x85 - byte 0xe0 - byte 0xff - byte 0xff - byte 0xff - ; mov rdx, rax - byte 0x48 - byte 0x89 - byte 0xc2 - - ; mov rax, [rbp-40] - byte 0x48 - byte 0x8b - byte 0x85 - byte 0xd8 - byte 0xff - byte 0xff - byte 0xff - ; mov r10, rax - byte 0x49 - byte 0x89 - byte 0xc2 - - ; mov rax, [rbp-48] - byte 0x48 - byte 0x8b - byte 0x85 - byte 0xd0 - byte 0xff - byte 0xff - byte 0xff - ; mov r8, rax - byte 0x49 - byte 0x89 - byte 0xc0 - - ; mov rax, [rbp-56] - byte 0x48 - byte 0x8b - byte 0x85 - byte 0xc8 - byte 0xff - byte 0xff - byte 0xff - ; mov r9, rax - byte 0x49 - byte 0x89 - byte 0xc1 - - ; mov rax, [rbp-8] - byte 0x48 - byte 0x8b - byte 0x85 - byte 0xf8 - byte 0xff - byte 0xff - byte 0xff - - ; syscall - byte 0x0f - byte 0x05 - - return diff --git a/04b/in03 b/04b/in03 deleted file mode 100644 index 8fb9ade..0000000 --- a/04b/in03 +++ /dev/null @@ -1,2532 +0,0 @@ -; initialize global_variables_end -C=:global_variables_end -D=:global_variables -8C=D -; initialize static_memory_end -C=:static_memory_end -; 0x80000 = 512KB for code -D=x480000 -8C=D -; initialize labels_end -C=:labels_end -D=:labels -8C=D - -I=8S -A=d2 -?I>A:argv_file_names - ; use default input/output filenames - ; open input file - J=:input_filename - I=d0 - syscall x2 - J=A - ?J<0:input_file_error - ; open output file - J=:output_filename - I=x241 - D=x1ed - syscall x2 - J=A - ?J<0:output_file_error - !:second_pass_starting_point -:argv_file_names - ; open input file - J=S - ; argv[1] is at *(rsp+16) - J+=d16 - J=8J - I=d0 - syscall x2 - J=A - ?J<0:input_file_error - ; open output file - J=S - ; argv[2] is at *(rsp+24) - J+=d24 - J=8J - I=x241 - D=x1ed - syscall x2 - J=A - ?J<0:output_file_error - - -:second_pass_starting_point -; write ELF header -J=d4 -I=:ELF_header -D=x78 -syscall x1 - -:read_line -; increment line number -D=:line_number -C=8D -C+=d1 -8D=C - -; use rbp to store line pointer -R=:line -:read_line_loop - ; read 1 byte into rbp - J=d3 - I=R - D=d1 - syscall x0 - D=A - ?D=0:eof - - ; check if the character was a newline: - C=1R - D=xa - ?C=D:read_line_loop_end - ; check if the character was a tab: - D=x9 - ; if so, don't increment rbp - ?C=D:read_line_loop - ; check if the character was a semicolon: - D='; - ; if so, it's a comment - ?C=D:handle_comment - - R+=d1 - !:read_line_loop - - :handle_comment - ; read out rest of line from file - J=d3 - I=R - D=d1 - syscall x0 - D=A - ?D=0:eof - C=1R - D=xa - ; if we didn't reach the end of the line, keep going - ?C!D:handle_comment - - !:read_line_loop_end -:read_line_loop_end - -; remove whitespace (specifically, ' ' characters) at end of line -I=R -:remove_terminal_whitespace_loop - I-=d1 - C=1I - D=x20 - ?C!D:remove_terminal_whitespace_loop_end - ; replace ' ' with a newline - D=xa - 1I=D - !:remove_terminal_whitespace_loop -:remove_terminal_whitespace_loop_end - -; check if this is a blank line -C=:line -D=1C -C=xa -?C=D:read_line - -C=': -?C=D:handle_label_definition - -I=:line -J=:"global" -C=x20 -call :string= -D=A -?D!0:handle_global - -I=:line -J=:"local" -C=x20 -call :string= -D=A -?D!0:handle_local -; arguments are treated the same as local variables -I=:line -J=:"argument" -C=x20 -call :string= -D=A -?D!0:handle_local - -I=:line -J=:"return" -C=x20 -call :string= -D=A -?D!0:handle_return - -I=:line -J=:"byte" -C=x20 -call :string= -D=A -?D!0:handle_byte - -I=:line -J=:"string" -C=x20 -call :string= -D=A -?D!0:handle_string - -I=:line -J=:"goto" -C=x20 -call :string= -D=A -?D!0:handle_goto - -I=:line -J=:"if" -C=x20 -call :string= -D=A -?D!0:handle_if - -I=:line -J=:"function" -call :string= -D=A -?D!0:handle_function - - -; set delimiter to newline -C=xa - -I=:line -J=:"return\n" -call :string= -D=A -?D!0:handle_return - -; check if this is an assignment -I=:line -:assignment_check_loop - C=1I - D=xa - ?C=D:assignment_check_loop_end - D='= - ?C=D:handle_assignment - I+=d1 - !:assignment_check_loop -:assignment_check_loop_end - -; check if this is a function call (where we discard the return value) -I=:line -; (check for an opening bracket not preceded by a space) -:call_check_loop - C=1I - D=x20 - ?C=D:call_check_loop_end - D=xa - ?C=D:call_check_loop_end - D='( - ?C=D:handle_call - I+=d1 - !:call_check_loop -:call_check_loop_end - -!:bad_statement - -!:read_line - -:eof - C=:second_pass - D=1C - ?D!0:exit_success - ; set 2nd pass to 1 - 1C=d1 - ; make sure output file is large enough for static memory - ; we'll use the ftruncate syscall to set the size of the file - J=d4 - I=:static_memory_end - I=8I - I-=x400000 - syscall x4d - ; seek both files back to start - J=d3 - I=d0 - D=d0 - syscall x8 - J=d4 - I=d0 - D=d0 - syscall x8 - ; set line number to 0 - C=:line_number - 8C=0 - - !:second_pass_starting_point - -:exit_success - J=d0 - syscall x3c - -align -:local_variable_name - reserve d8 - -:handle_byte - I=:line - ; 5 = length of "byte " - I+=d5 - call :read_number - ; make sure byte is 0-255 - C=A - D=xff - ?CaD:bad_byte - ; write byte - I=:byte - 1I=C - J=d4 - D=d1 - syscall x1 - !:read_line -:byte - reserve d1 - -:handle_string - I=:line - ; 7 = length of "string " - I+=d7 - J=I - ; find end of string - :string_loop - C=1J - D=xa - ?C=D:string_loop_end - J+=d1 - !:string_loop - :string_loop_end - ; get length of string - D=J - D-=I - ; output fd - J=d4 - syscall x1 - !:read_line - -:handle_call - J=I - ; just use the rvalue function call code - C=:rvalue - D=:line - 8C=D - I=:line - call :rvalue_function_call - !:read_line - -:handle_local - ; skip ' ' - I+=d1 - - ; store away pointer to variable name - C=:local_variable_name - 8C=I - - ; check if already defined - J=:local_variables - call :ident_lookup - C=A - ?C!0:local_redeclaration - - C=:local_variable_name - I=8C - J=:local_variables_end - J=8J - call :ident_copy - - ; increase stack_end, store it in J - C=:stack_end - D=4C - D+=d8 - 4C=D - 4J=D - J+=d4 - ; store null terminator - 1J=0 - - ; update :local_variables_end - I=:local_variables_end - 8I=J - - ; set rsp appropriately - C=:rbp_offset - J=d0 - J-=D - 4C=J - - J=d4 - I=:lea_rsp_[rbp_offset] - D=d7 - syscall x1 - - - ; read the next line - !:read_line - -:lea_rsp_[rbp_offset] - x48 - x8d - xa5 -:rbp_offset - reserve d4 - -align -:global_start - reserve d8 -:global_variable_name - reserve d8 -:global_variable_size - reserve d8 -:handle_global - ; ignore if this is the second pass - C=:second_pass - C=1C - ?C!0:read_line - - ; skip ' ' - I+=d1 - - C=1I - D='9 - ?C>D:global_default_size - ; read specific size of global - call :read_number - D=A - C=:global_variable_size - 8C=D - ; check and skip space after number - C=1I - D=x20 - ?C!D:bad_number - I+=d1 - !:global_cont - :global_default_size - ; default size = 8 - C=:global_variable_size - D=d8 - 8C=D - :global_cont - - ; store away pointer to variable name - C=:global_variable_name - 8C=I - - ; check if already defined - J=:global_variables - call :ident_lookup - C=A - ?C!0:global_redeclaration - - C=:global_variable_name - I=8C - - J=:global_variables_end - J=8J - call :ident_copy - ; store address - D=:static_memory_end - C=4D - 4J=C - J+=d4 - ; increase static_memory_end by size - D=:global_variable_size - D=8D - C+=D - D=:static_memory_end - 4D=C - ; store null terminator - 1J=0 - ; update :global_variables_end - I=:global_variables_end - 8I=J - ; go read the next line - !:read_line - -:handle_function - I=:line - ; length of "function " - I+=d9 - ; make function name a label - call :add_label - - ; emit prologue - J=d4 - I=:function_prologue - D=d14 - syscall x1 - - ; reset local variable table - D=:local_variables - 1D=0 - C=:local_variables_end - 8C=D - - ; reset stack_end - D=:stack_end - 4D=0 - - ; go read the next line - !:read_line - -:function_prologue - ; sub rsp, 8 - x48 - x81 - xec - x08 - x00 - x00 - x00 - ; mov [rsp], rbp - x48 - x89 - x2c - x24 - ; mov rbp, rsp - R=S - ; total length: 7 + 4 + 3 = 14 bytes - -:function_epilogue - ; mov rsp, rbp - S=R - ; mov rbp, [rsp] - x48 - x8b - x2c - x24 - ; add rsp, 8 - x48 - x81 - xc4 - x08 - x00 - x00 - x00 - ; ret - return - ; total length = 15 bytes - -:handle_label_definition - I=:line - I+=d1 - call :add_label - !:read_line - -align -:label_name - reserve d8 -; add the label in rsi to the label list (with the current pc address) -:add_label - ; ignore if this is the second pass - C=:second_pass - C=1C - ?C!0:return_0 - - C=:label_name - 8C=I - - ; make sure label only has identifier characters - :label_checking_loop - C=1I - D=xa - ?C=D:label_checking_loop_end - I+=d1 - B=C - call :isident - D=A - ?D!0:label_checking_loop - !:bad_label - :label_checking_loop_end - - C=:label_name - I=8C - J=:labels - call :ident_lookup - C=A - ?C!0:label_redefinition - - J=:labels_end - J=8J - C=:label_name - I=8C - call :ident_copy - R=J - - ; figure out where in the file we are (using lseek) - J=d4 - I=d0 - D=d1 - syscall x8 - C=A - C+=x400000 - J=R - ; store address - 4J=C - J+=d4 - - ; update labels_end - C=:labels_end - 8C=J - - return - -:handle_goto - J=d4 - I=:jmp_prefix - D=d1 - syscall x1 - I=:line - ; 5 = length of "goto " - I+=d5 - call :emit_label_jump_address - !:read_line -:jmp_prefix - xe9 - -:handle_if - I=:line - I+=d3 - ; skip term 1 - call :go_to_space - I+=d1 - ; skip operator - call :go_to_space - I+=d1 - ; put second operand in rsi - call :set_rax_to_term - call :set_rsi_to_rax - - - I=:line - ; length of "if " - I+=d3 - ; put first operand in rax - call :set_rax_to_term - ; put second operand in rbx - call :set_rbx_to_rsi - ; emit cmp rax, rbx - J=d4 - I=:cmp_rax_rbx - D=d3 - syscall x1 - - I=:line - I+=d3 - call :go_to_space - I+=d1 - R=I - C=x20 - - I=R - J=:"==" - call :string= - I=A - ?I!0:write_je - - I=R - J=:"!=" - call :string= - I=A - ?I!0:write_jne - - I=R - J=:">" - call :string= - I=A - ?I!0:write_jg - - I=R - J=:"<" - call :string= - I=A - ?I!0:write_jl - - I=R - J=:">=" - call :string= - I=A - ?I!0:write_jge - - I=R - J=:"<=" - call :string= - I=A - ?I!0:write_jle - - I=R - J=:"]" - call :string= - I=A - ?I!0:write_ja - - I=R - J=:"[" - call :string= - I=A - ?I!0:write_jb - - I=R - J=:"]=" - call :string= - I=A - ?I!0:write_jae - - I=R - J=:"[=" - call :string= - I=A - ?I!0:write_jbe - - !:bad_jump - - :write_je - J=d4 - I=:je_prefix - D=d2 - syscall x1 - !:if_continue - - :write_jne - J=d4 - I=:jne_prefix - D=d2 - syscall x1 - !:if_continue - - :write_jl - J=d4 - I=:jl_prefix - D=d2 - syscall x1 - !:if_continue - - :write_jg - J=d4 - I=:jg_prefix - D=d2 - syscall x1 - !:if_continue - - :write_jle - J=d4 - I=:jle_prefix - D=d2 - syscall x1 - !:if_continue - - :write_jge - J=d4 - I=:jge_prefix - D=d2 - syscall x1 - !:if_continue - - :write_jb - J=d4 - I=:jb_prefix - D=d2 - syscall x1 - !:if_continue - - :write_ja - J=d4 - I=:ja_prefix - D=d2 - syscall x1 - !:if_continue - - :write_jbe - J=d4 - I=:jbe_prefix - D=d2 - syscall x1 - !:if_continue - - :write_jae - J=d4 - I=:jae_prefix - D=d2 - syscall x1 - !:if_continue - -:if_continue - I=:line - I+=d3 - ; skip term 1 - call :go_to_space - I+=d1 - ; skip operator - call :go_to_space - I+=d1 - ; skip term 2 - call :go_to_space - I+=d1 - J=:"goto" - C=x20 - call :string= - C=A - ; make sure word after term 2 is "goto" - ?C=0:bad_jump - I+=d1 - call :emit_label_jump_address - !:read_line - -:je_prefix - x0f - x84 -:jne_prefix - x0f - x85 -:jl_prefix - x0f - x8c -:jg_prefix - x0f - x8f -:jle_prefix - x0f - x8e -:jge_prefix - x0f - x8d -:jb_prefix - x0f - x82 -:ja_prefix - x0f - x87 -:jbe_prefix - x0f - x86 -:jae_prefix - x0f - x83 - -:cmp_rax_rbx - x48 - x39 - xd8 - -align -:reladdr - reserve d4 - -; emit relative address (for jumping) of label in rsi -:emit_label_jump_address - ; address doesn't matter for first pass - C=:second_pass - C=1C - ?C=0:jump_ignore_address - ; look up label; store address in rbp - J=:labels - call :ident_lookup - C=A - ?C=0:bad_label - R=4C -:jump_ignore_address - - ; first, figure out current address - J=d4 - I=d0 - D=d1 - syscall x8 - C=A - ; add an additional 4 because the relative address is 4 bytes long - C+=x400004 - - ; compute relative address - D=d0 - D-=C - D+=R - ; store in :reladdr - C=:reladdr - 4C=D - ; output - J=d4 - I=:reladdr - D=d4 - syscall x1 - return - -align -:assignment_type - reserve d8 -:handle_assignment - I-=d1 - C=:assignment_type - 8C=I - - I+=d2 - C=1I - D=x20 - ; check for space after = - ?C!D:bad_assignment - I+=d1 - - ; set rdi to right-hand side of assignment - call :set_rax_to_rvalue - call :set_rdi_to_rax - - J=:assignment_type - J=8J - C=1J - ; put newline after lvalue to make parsing easier - D=xa - 1J=D - D=x20 - ?C=D:handle_assignment_cont - J-=d1 - D=xa - 1J=D - :handle_assignment_cont - D=x20 - ?C=D:handle_plain_assignment - D='+ - ?C=D:handle_+= - D='- - ?C=D:handle_-= - D='* - ?C=D:handle_*= - D='/ - ?C=D:handle_/= - D='% - ?C=D:handle_%= - D='& - ?C=D:handle_&= - D='| - ?C=D:handle_|= - D='^ - ?C=D:handle_^= - D='< - ?C=D:handle_<= - D='> - ?C=D:handle_>= - - !:bad_assignment - -:handle_plain_assignment - I=:line - call :set_lvalue_to_rax - !:read_line - -:handle_+= - I=:line - call :set_rax_to_rvalue - call :set_rbx_to_rdi - call :emit_add_rax_rbx - I=:line - call :set_lvalue_to_rax - !:read_line - -:handle_-= - I=:line - call :set_rax_to_rvalue - call :set_rbx_to_rdi - call :emit_sub_rax_rbx - I=:line - call :set_lvalue_to_rax - !:read_line - -:handle_*= - I=:line - call :set_rax_to_rvalue - call :set_rbx_to_rdi - call :emit_imul_rbx - I=:line - call :set_lvalue_to_rax - !:read_line - -:handle_/= - I=:line - call :set_rax_to_rvalue - call :set_rbx_to_rdi - call :emit_zero_rdx_idiv_rbx - I=:line - call :set_lvalue_to_rax - !:read_line - -:handle_%= - I=:line - call :set_rax_to_rvalue - call :set_rbx_to_rdi - call :emit_zero_rdx_idiv_rbx - call :set_rax_to_rdx - I=:line - call :set_lvalue_to_rax - !:read_line - -:handle_&= - I=:line - call :set_rax_to_rvalue - call :set_rbx_to_rdi - call :emit_and_rax_rbx - I=:line - call :set_lvalue_to_rax - !:read_line - -:handle_|= - I=:line - call :set_rax_to_rvalue - call :set_rbx_to_rdi - call :emit_or_rax_rbx - I=:line - call :set_lvalue_to_rax - !:read_line - -:handle_^= - I=:line - call :set_rax_to_rvalue - call :set_rbx_to_rdi - call :emit_xor_rax_rbx - I=:line - call :set_lvalue_to_rax - !:read_line - -:handle_<= - I=:line - call :set_rax_to_rvalue - call :set_rcx_to_rdi - call :emit_shl_rax_cl - I=:line - call :set_lvalue_to_rax - !:read_line - -:handle_>= - I=:line - call :set_rax_to_rvalue - call :set_rcx_to_rdi - call :emit_shr_rax_cl - I=:line - call :set_lvalue_to_rax - !:read_line - -align -:lvalue - reserve d8 - -; set the lvalue in rsi to -:set_lvalue_to_rax - C=:lvalue - 8C=I - - ; first, store away value in - R=I - call :set_rdi_to_rax - I=R - - C=:lvalue - I=8C - C=1I - D='* - - ?C=D:lvalue_deref - ; not a dereference; just a variable - C=:lvalue - I=8C - call :set_rax_to_address_of_variable - call :set_rbx_to_rax - call :set_rax_to_rdi - call :set_[rbx]_to_rax - return - :lvalue_deref - C=:lvalue - I=8C - I+=d2 - call :set_rax_to_address_of_variable - call :set_rbx_to_rax - call :set_rax_to_[rbx] - call :set_rbx_to_rax - call :set_rax_to_rdi - - C=:lvalue - I=8C - I+=d1 - C=1I - - D='1 - ?C=D:lvalue_deref1 - D='2 - ?C=D:lvalue_deref2 - D='4 - ?C=D:lvalue_deref4 - D='8 - ?C=D:lvalue_deref8 - !:bad_assignment - :lvalue_deref1 - !:set_[rbx]_to_al - :lvalue_deref2 - !:set_[rbx]_to_ax - :lvalue_deref4 - !:set_[rbx]_to_eax - :lvalue_deref8 - !:set_[rbx]_to_rax - -:handle_return - I=:line - ; skip "return" - I+=d6 - C=1I - D=xa - ?C=D:no_return_value - - ; skip ' ' after return - I+=d1 - - call :set_rax_to_rvalue - - :no_return_value - J=d4 - I=:function_epilogue - D=d15 - syscall x1 - - ; go read the next line - !:read_line - -:mov_rsp_rbp - S=R - -:ret - return - -; copy the newline-terminated identifier from rsi to rdi -:ident_copy - C=1I - B=C - call :isident - D=A - ?D=0:bad_identifier - - :ident_loop - C=1I - 1J=C - I+=d1 - J+=d1 - D=xa - ?C=D:ident_loop_end - B=C - call :isident - D=A - ?D=0:bad_identifier - !:ident_loop - :ident_loop_end - return - -align -:ident_lookup_i - reserve d8 - -; look up identifier rsi in list rdi -; returns address of whatever's right after the identifier in the list, or 0 if not found -:ident_lookup - C=:ident_lookup_i - 8C=I - - :ident_lookup_loop - ; check if reached the end of the table - C=1J - ?C=0:return_0 - I=:ident_lookup_i - I=8I - call :ident= - C=A - ; move past terminator of identifier in table - :ident_finish_loop - D=1J - J+=d1 - A=xa - ?D!A:ident_finish_loop - ; check if this was it - ?C!0:return_J - ; nope. keep going - ; skip over address: - J+=d4 - !:ident_lookup_loop - -; can the character in rbx appear in an identifier? -:isident - A='0 - ?BA:return_1 - A='_ - ?B=A:return_1 - !:return_0 - -; set to the term in rsi -:set_rax_to_term - R=I - - C=1I - D='' - ?C=D:term_number - D='. - ?C=D:term_label - D=d58 - ?C to the variable in rsi -:set_rax_to_variable - ; variable - call :set_rax_to_address_of_variable - call :set_rbx_to_rax - call :set_rax_to_[rbx] - return - -:term_label - C=:second_pass - C=1C - ; skip looking up label on first pass; just use whatever's in rsi - ?C=0:set_rax_to_immediate - ; move past . - I+=d1 - J=:labels - call :ident_lookup - C=A - ?C=0:bad_label - ; set rax to label value - I=4C - !:set_rax_to_immediate - -align -:rvalue - reserve d8 - -; set to the rvalue in rsi -:set_rax_to_rvalue - ; store pointer to rvalue - C=:rvalue - 8C=I - - C=1I - D='& - ?C=D:rvalue_addressof - - D='~ - ?C=D:rvalue_bitwise_not - - D='* - ?C=D:rvalue_dereference - - J=I - :rvalue_loop - C=1J - D='( - ?C=D:rvalue_function_call - D=x20 - ?C=D:rvalue_binary_op - D=xa - ; no space or opening bracket; this must be a term - ?C=D:set_rax_to_term - J+=d1 - !:rvalue_loop - -align -:rvalue_function_arg - reserve d8 -:rvalue_function_arg_offset - reserve d4 - -:rvalue_function_call - I=J - I+=d1 - C=1I - D=') - ?C=D:function_call_no_arguments - - C=:rvalue_function_arg_offset - ; set arg offset to -16 (to skip over stack space for return address and rbp) - D=xfffffffffffffff0 - 4C=D - - :rvalue_function_loop - C=:rvalue_function_arg - 8C=I - ; set to argument - call :set_rax_to_term - ; set <[rsp-arg_offset]> to rax - ; first, output prefix - J=d4 - I=:mov_[rsp_offset]_rax_prefix - D=d4 - syscall x1 - ; now decrement offset, and output it - I=:rvalue_function_arg_offset - C=4I - C-=d8 - 4I=C - J=d4 - D=d4 - syscall x1 - - C=:rvalue_function_arg - I=8C - ; skip over argument - :rvalue_function_arg_loop - C=1I - D=', - ?C=D:rvalue_function_next_arg - D=') - ?C=D:rvalue_function_loop_end - D=xa - ; no closing bracket - ?C=D:bad_call - I+=d1 - !:rvalue_function_arg_loop - :rvalue_function_next_arg - ; skip comma - I+=d1 - C=1I - D=x20 - ; make sure there's a space after the comma - ?C!D:bad_call - ; skip space - I+=d1 - - ; handle the next argument - !:rvalue_function_loop - :rvalue_function_loop_end - :function_call_no_arguments - - I+=d1 - C=1I - D=xa - ; make sure there's nothing after the closing bracket - ?C!D:bad_term - - C=:second_pass - C=1C - ?C=0:ignore_function_address - ; look up function name - I=:rvalue - I=8I - J=:labels - call :ident_lookup - C=A - ?C=0:bad_function - ; read address - I=4C - :ignore_function_address - call :set_rax_to_immediate - ; write call rax - J=d4 - I=:call_rax - D=d2 - syscall x1 - ; we're done! - - return - -:mov_[rsp_offset]_rax_prefix - x48 - x89 - x84 - x24 - -:call_rax - xff - xd0 - -:binary_op - reserve d1 -:rvalue_binary_op - ; move past ' ' - J+=d1 - ; store binary op - D=1J - C=:binary_op - 1C=D - - ; make sure space follows operator - J+=d1 - C=1J - D=x20 - ?C!D:bad_term - ; set rsi to second operand - J+=d1 - I=J - call :set_rax_to_term - call :set_rsi_to_rax - - ; now set rax to first operand - I=:rvalue - I=8I - call :set_rax_to_term - - ; and combine - C=:binary_op - C=1C - - D='+ - ?C=D:rvalue_add - - D='- - ?C=D:rvalue_sub - - D='* - ?C=D:rvalue_mul - - D='/ - ?C=D:rvalue_div - - D='% - ?C=D:rvalue_rem - - D='& - ?C=D:rvalue_and - - D='| - ?C=D:rvalue_or - - D='^ - ?C=D:rvalue_xor - - D='< - ?C=D:rvalue_shl - - D='> - ?C=D:rvalue_shr - - !:bad_term - -:rvalue_add - call :set_rbx_to_rsi - !:emit_add_rax_rbx - -:rvalue_sub - call :set_rbx_to_rsi - !:emit_sub_rax_rbx - -:rvalue_mul - call :set_rbx_to_rsi - !:emit_imul_rbx - -:rvalue_div - call :set_rbx_to_rsi - !:emit_zero_rdx_idiv_rbx - -:rvalue_rem - call :set_rbx_to_rsi - call :emit_zero_rdx_idiv_rbx - call :set_rax_to_rdx - return - -:rvalue_and - call :set_rbx_to_rsi - !:emit_and_rax_rbx - -:rvalue_or - call :set_rbx_to_rsi - !:emit_or_rax_rbx - -:rvalue_xor - call :set_rbx_to_rsi - !:emit_xor_rax_rbx - -:rvalue_shl - call :set_rcx_to_rsi - !:emit_shl_rax_cl - -:rvalue_shr - call :set_rcx_to_rsi - !:emit_shr_rax_cl - -:rvalue_addressof - I+=d1 - !:set_rax_to_address_of_variable - -:rvalue_bitwise_not - I+=d1 - call :set_rax_to_term - J=d4 - I=:not_rax - D=d3 - syscall x1 - return -:not_rax - x48 - xf7 - xd0 - -:rvalue_dereference_size - reserve d1 - -:rvalue_dereference - I+=d1 - D=1I - C=:rvalue_dereference_size - 1C=D - I+=d1 - call :set_rax_to_variable - call :set_rbx_to_rax - call :zero_rax - C=:rvalue_dereference_size - C=1C - - D='1 - ?C=D:set_al_to_[rbx] - D='2 - ?C=D:set_ax_to_[rbx] - D='4 - ?C=D:set_eax_to_[rbx] - D='8 - ?C=D:set_rax_to_[rbx] - - !:bad_term - - -; set to address of variable in rsi -:set_rax_to_address_of_variable - J=:local_variables - call :ident_lookup - C=A - ?C=0:try_global - ; it's a local variable - ; read the offset from - D=4C - ; put negated offset in rbp - R=d0 - R-=D - - ; lea rax, [rbp+ - J=d4 - I=:lea_rax_rbp_offset_prefix - D=d3 - syscall x1 - - ; offset] - J=d4 - I=:imm64 - 4I=R - D=d4 - syscall x1 - - return - :try_global - J=:global_variables - call :ident_lookup - C=A - ?C=0:bad_variable - ; it's a global variable - ; get its address - C=4C - - ; put address in rax - I=C - !:set_rax_to_immediate - -:number_is_negative - reserve d1 - -:term_number - call :read_number - I=A - !:set_rax_to_immediate - -; set rax to the number in the string at rsi -:read_number - C=1I - D='' - ?C=D:read_char - D='- - ; set rdx to 0 if number is positive, 1 if negative - ?C=D:read_number_negative - D=d0 - !:read_number_cont - :read_number_negative - D=d1 - I+=d1 - :read_number_cont - ; store away negativity - C=:number_is_negative - 1C=D - ; check if number starts with 0-9 - C=1I - D='9 - ?C>D:bad_number - D='0 - ?CD:hex_number_loop_end - ; one of the digits a-f - D=xffffffffffffffa9 - !:hex_number_digit - :hex_number_0123456789 - D=xffffffffffffffd0 - :hex_number_digit - C+=D - ; shift left by 4 - R<=d4 - ; add digit - R+=C - I+=d1 - !:hex_number_loop - :hex_number_loop_end - !:read_number_output - -:read_number_output - ; first, make sure number is followed by space/newline/appropriate punctuation - C=1I - D=x20 - ?C=D:read_number_valid - D=', - ?C=D:read_number_valid - D=') - ?C=D:read_number_valid - D=xa - ?C=D:read_number_valid - !:bad_number -:read_number_valid - ; we now have the *unsigned* number in rbp. take the sign into consideration - C=:number_is_negative - D=1C - ?D=0:number_not_negative - ; R = -R - C=R - R=d0 - R-=C - :number_not_negative - ; finally, return - A=R - return - - - -; set to the immediate in rsi. -:set_rax_to_immediate - C=:imm64 - 8C=I - - ; write prefix - J=d4 - D=d2 - I=:mov_rax_imm64_prefix - syscall x1 - - ; write immediate - J=d4 - D=d8 - I=:imm64 - syscall x1 - return - -:zero_rax - J=d4 - I=:xor_eax_eax - D=d2 - syscall x1 - return -:xor_eax_eax - x31 - xc0 - -:zero_rdx - J=d4 - I=:xor_edx_edx - D=d2 - syscall x1 - return -:xor_edx_edx - x31 - xd2 - -:set_rbx_to_rax - J=d4 - I=:mov_rbx_rax - D=d3 - syscall x1 - return -:mov_rbx_rax - B=A - -:set_rbx_to_rsi - J=d4 - I=:mov_rbx_rsi - D=d3 - syscall x1 - return -:mov_rbx_rsi - B=I - -:set_rbx_to_rdi - J=d4 - I=:mov_rbx_rdi - D=d3 - syscall x1 - return -:mov_rbx_rdi - B=J - -:set_rcx_to_rsi - J=d4 - I=:mov_rcx_rsi - D=d3 - syscall x1 - return -:mov_rcx_rsi - C=I - -:set_rcx_to_rdi - J=d4 - I=:mov_rcx_rdi - D=d3 - syscall x1 - return -:mov_rcx_rdi - C=J - -:set_rax_to_rdx - J=d4 - I=:mov_rax_rdx - D=d3 - syscall x1 - return -:mov_rax_rdx - A=D - -:set_rax_to_rdi - J=d4 - I=:mov_rax_rdi - D=d3 - syscall x1 - return -:mov_rax_rdi - A=J - -:set_rsi_to_rax - J=d4 - I=:mov_rsi_rax - D=d3 - syscall x1 - return -:mov_rsi_rax - I=A - -:set_rdi_to_rax - J=d4 - I=:mov_rdi_rax - D=d3 - syscall x1 - return -:mov_rdi_rax - J=A - -:set_rax_to_[rbx] - J=d4 - I=:mov_rax_[rbx] - D=d3 - syscall x1 - return -:mov_rax_[rbx] - x48 - x8b - x03 - -:set_eax_to_[rbx] - J=d4 - I=:mov_eax_[rbx] - D=d2 - syscall x1 - return -:mov_eax_[rbx] - x8b - x03 - -:set_ax_to_[rbx] - J=d4 - I=:mov_ax_[rbx] - D=d3 - syscall x1 - return -:mov_ax_[rbx] - x66 - x8b - x03 - -:set_al_to_[rbx] - J=d4 - I=:mov_al_[rbx] - D=d2 - syscall x1 - return -:mov_al_[rbx] - x8a - x03 - - -:set_[rbx]_to_rax - J=d4 - I=:mov_[rbx]_rax - D=d3 - syscall x1 - return -:mov_[rbx]_rax - x48 - x89 - x03 - -:set_[rbx]_to_eax - J=d4 - I=:mov_[rbx]_eax - D=d2 - syscall x1 - return -:mov_[rbx]_eax - x89 - x03 - -:set_[rbx]_to_ax - J=d4 - I=:mov_[rbx]_ax - D=d3 - syscall x1 - return -:mov_[rbx]_ax - x66 - x89 - x03 - -:set_[rbx]_to_al - J=d4 - I=:mov_[rbx]_al - D=d2 - syscall x1 - return -:mov_[rbx]_al - x88 - x03 - - -:mov_rax_imm64_prefix - x48 - xb8 - -:emit_add_rax_rbx - J=d4 - I=:add_rax_rbx - D=d3 - syscall x1 - return -:add_rax_rbx - x48 - x01 - xd8 - -:emit_sub_rax_rbx - J=d4 - I=:sub_rax_rbx - D=d3 - syscall x1 - return -:sub_rax_rbx - x48 - x29 - xd8 - -:emit_and_rax_rbx - J=d4 - I=:and_rax_rbx - D=d3 - syscall x1 - return -:and_rax_rbx - x48 - x21 - xd8 - -:emit_or_rax_rbx - J=d4 - I=:or_rax_rbx - D=d3 - syscall x1 - return -:or_rax_rbx - x48 - x09 - xd8 - -:emit_xor_rax_rbx - J=d4 - I=:xor_rax_rbx - D=d3 - syscall x1 - return -:xor_rax_rbx - x48 - x31 - xd8 - -:emit_shl_rax_cl - J=d4 - I=:shl_rax_cl - D=d3 - syscall x1 - return -:shl_rax_cl - x48 - xd3 - xe0 - -:emit_shr_rax_cl - J=d4 - I=:shr_rax_cl - D=d3 - syscall x1 - return -:shr_rax_cl - x48 - xd3 - xe8 - -:emit_imul_rbx - J=d4 - I=:imul_rbx - D=d3 - syscall x1 - return -:imul_rbx - x48 - xf7 - xeb - -:emit_zero_rdx_idiv_rbx - call :zero_rdx - J=d4 - I=:idiv_rbx - D=d3 - syscall x1 - return -:idiv_rbx - x48 - xf7 - xfb - -align -:imm64 - reserve d8 - -; prefix for lea rax, [rbp+IMM32] -:lea_rax_rbp_offset_prefix - x48 - x8d - x85 - -:input_filename - str in04b - x0 - -:output_filename - str out04b - x0 - -:input_file_error - B=:input_file_error_message - !:general_error - -:input_file_error_message - str Couldn't open input file. - xa - x0 - -:output_file_error - B=:output_file_error_message - !:general_error - -:output_file_error_message - str Couldn't open output file. - xa - x0 - -:bad_identifier - B=:bad_identifier_error_message - !:program_error - -:bad_identifier_error_message - str Bad identifier. - xa - x0 - -:bad_label - B=:bad_label_error_message - !:program_error - -:bad_label_error_message - str Bad label. - xa - x0 - -:bad_variable - B=:bad_variable_error_message - !:program_error - -:bad_variable_error_message - str No such variable. - xa - x0 - -:bad_function - B=:bad_function_error_message - !:program_error - -:bad_function_error_message - str No such function. - xa - x0 - -:bad_byte - B=:bad_byte_error_message - !:program_error - -:bad_byte_error_message - str Byte not in range 0-255. - xa - x0 - -:bad_number - B=:bad_number_error_message - !:program_error - -:bad_number_error_message - str Bad number. - xa - x0 - -:bad_assignment - B=:bad_assignment_error_message - !:program_error - -:bad_assignment_error_message - str Bad assignment. - xa - x0 - -:bad_term - B=:bad_term_error_message - !:program_error - -:bad_term_error_message - str Bad term. - xa - x0 - -:bad_statement - B=:bad_statement_error_message - !:program_error - -:bad_statement_error_message - str Bad statement. - xa - x0 - -:bad_jump - B=:bad_jump_error_message - !:program_error - -:bad_jump_error_message - str Bad jump. - xa - x0 - -:bad_call - B=:bad_call_error_message - !:program_error - -:bad_call_error_message - str Bad function call. - xa - x0 - -:label_redefinition - B=:label_redefinition_error_message - !:program_error - -:label_redefinition_error_message - str Label redefinition. - xa - x0 - -:global_redeclaration - B=:global_redeclaration_error_message - !:program_error - -:global_redeclaration_error_message - str Global variable declared twice. - xa - x0 - -:local_redeclaration - B=:local_redeclaration_error_message - !:program_error - -:local_redeclaration_error_message - str Local variable declared twice. - xa - x0 - -:general_error - call :eputs - J=d1 - syscall x3c - -:program_error - R=B - - B=:"Line" - call :eputs - - D=:line_number - D=8D - B=D - call :eputn - - B=:line_number_separator - call :eputs - - B=R - call :eputs - J=d1 - syscall x3c - -:"Line" - str Line - x20 - x0 - -:line_number_separator - str : - x20 - x0 - -:strlen - I=B - D=B - :strlen_loop - C=1I - ?C=0:strlen_ret - I+=d1 - !:strlen_loop - :strlen_ret - I-=D - A=I - return - -; check if strings in rdi and rsi are equal, up to terminator in rcx -:string= - D=1I - A=1J - ?D!A:return_0 - ?D=C:return_1 - I+=d1 - J+=d1 - !:string= - -; check if strings in rdi and rsi are equal, up to the first non-identifier character -:ident= - D=1I - B=D - call :isident - ; I ended - ?A=0:ident=_I_end - - D=1J - B=D - call :isident - ; J ended, but I didn't - ?A=0:return_0 - - ; we haven't reached the end of either - D=1I - A=1J - ?D!A:return_0 - I+=d1 - J+=d1 - !:ident= -:ident=_I_end - D=1J - B=D - call :isident - ; check if J also ended - ?A=0:return_1 - ; J didn't end - !:return_0 - -:return_0 - A=d0 - return -:return_1 - A=d1 - return -:return_2 - A=d2 - return -:return_3 - A=d3 - return -:return_4 - A=d4 - return -:return_5 - A=d5 - return -:return_6 - A=d6 - return -:return_7 - A=d7 - return -:return_8 - A=d8 - return -:return_J - A=J - return - -; write the character in rbx to the file in rdi. -:fputc - C=B - I=S - I-=d1 - 1I=C - D=d1 - syscall x1 - return - -; write the string in rbx to stderr -:eputs - J=B - call :strlen - D=A - I=J - J=d2 - syscall x1 - return - -; write rbx in decimal to stderr -:eputn - I=B - J=S - J-=d1 - :eputn_loop - D=d0 - ; divide by 10 - B=d10 - A=I - div - ; quotient is new number - I=A - ; add remainder to string - D+='0 - 1J=D - J-=d1 - ?I!0:eputn_loop - J+=d1 - D=S - D-=J - I=J - J=d2 - syscall x1 - return - -; copy rdx bytes from rsi to rdi. -; this copies from the left: if you're doing an overlapped copy, rsi should be greater than rdi -:memcpy - ?D=0:return_0 - A=1I - 1J=A - I+=d1 - J+=d1 - D-=d1 - !:memcpy - -; copy from rdi to rsi, until byte cl is reached -:memccpy - D=1I - 1J=D - I+=d1 - J+=d1 - ?D!C:memccpy - return - -; advance rsi to the next space or newline character -:go_to_space - C=1I - D=xa - ?C=D:return_0 - D=x20 - ?C=D:return_0 - I+=d1 - !:go_to_space - -:"global" - str global - x20 -:"argument" - str argument - x20 -:"local" - str local - x20 -:"return" - str return - x20 -:"return\n" - str return - xa -:"byte" - str byte - x20 -:"string" - str string - x20 -:"goto" - str goto - x20 -:"if" - str if - x20 -:"function" - str function - x20 -:"==" - str == - x20 -:"!=" - str != - x20 -:">" - str > - x20 -:"<" - str < - x20 -:"<=" - str <= - x20 -:">=" - str >= - x20 -:"[" - str [ - x20 -:"]" - str ] - x20 -:"[=" - str [= - x20 -:"]=" - str ]= - x20 - -:zero - x0 - -; put a 0 byte before the line (this is important for removing whitespace at the end of the line, -; specifically, we don't want this to be a space character) -x0 -:line - reserve d1000 - -align -:global_variables_end - reserve d8 -:static_memory_end - reserve d8 -:local_variables_end - reserve d8 -:stack_end - reserve d8 -:labels_end - reserve d8 -:line_number - reserve d8 -:global_variables - reserve d50000 -:local_variables - reserve d20000 -:labels - reserve d200000 -:second_pass - reserve d1 - -:ELF_header -x7f -x45 -x4c -x46 -x02 -x01 -x01 - -reserve d9 - -x02 -x00 - -x3e -x00 - -x01 -x00 -x00 -x00 - -x78 -x00 -x40 -x00 -x00 -x00 -x00 -x00 - -x40 -x00 -x00 -x00 -x00 -x00 -x00 -x00 - -reserve d12 - -x40 -x00 -x38 -x00 -x01 -x00 -x00 -x00 -x00 -x00 -x00 -x00 - -x01 -x00 -x00 -x00 - -x07 -x00 -x00 -x00 - -x78 -x00 -x00 -x00 -x00 -x00 -x00 -x00 - -x78 -x00 -x40 -x00 -x00 -x00 -x00 -x00 - -reserve d8 - -x00 -x00 -x20 -x00 -x00 -x00 -x00 -x00 - -x00 -x00 -x20 -x00 -x00 -x00 -x00 -x00 - -x00 -x10 -x00 -x00 -x00 -x00 -x00 -x00 - -; NOTE: we shouldn't end the file with a reserve; we don't handle that properly diff --git a/04b/in04b b/04b/in04b deleted file mode 100644 index 2b85900..0000000 --- a/04b/in04b +++ /dev/null @@ -1,133 +0,0 @@ -main() - -function main - puts(.str_hello_world) - putc(10) ; newline - syscall(0x3c, 0) - -:str_hello_world - string Hello, world! - byte 0 - -function strlen - argument s - local c - local p - p = s - :strlen_loop - c = *1p - if c == 0 goto strlen_loop_end - p += 1 - goto strlen_loop - :strlen_loop_end - return p - s - -function putc - argument c - local p - p = &c - syscall(1, 1, p, 1) - return - -function puts - argument s - local len - len = strlen(s) - syscall(1, 1, s, len) - return - -function syscall - ; I've done some testing, and this should be okay even if - ; rbp-56 goes beyond the end of the stack. - ; mov rax, [rbp-16] - byte 0x48 - byte 0x8b - byte 0x85 - byte 0xf0 - byte 0xff - byte 0xff - byte 0xff - ; mov rdi, rax - byte 0x48 - byte 0x89 - byte 0xc7 - - ; mov rax, [rbp-24] - byte 0x48 - byte 0x8b - byte 0x85 - byte 0xe8 - byte 0xff - byte 0xff - byte 0xff - ; mov rsi, rax - byte 0x48 - byte 0x89 - byte 0xc6 - - ; mov rax, [rbp-32] - byte 0x48 - byte 0x8b - byte 0x85 - byte 0xe0 - byte 0xff - byte 0xff - byte 0xff - ; mov rdx, rax - byte 0x48 - byte 0x89 - byte 0xc2 - - ; mov rax, [rbp-40] - byte 0x48 - byte 0x8b - byte 0x85 - byte 0xd8 - byte 0xff - byte 0xff - byte 0xff - ; mov r10, rax - byte 0x49 - byte 0x89 - byte 0xc2 - - ; mov rax, [rbp-48] - byte 0x48 - byte 0x8b - byte 0x85 - byte 0xd0 - byte 0xff - byte 0xff - byte 0xff - ; mov r8, rax - byte 0x49 - byte 0x89 - byte 0xc0 - - ; mov rax, [rbp-56] - byte 0x48 - byte 0x8b - byte 0x85 - byte 0xc8 - byte 0xff - byte 0xff - byte 0xff - ; mov r9, rax - byte 0x49 - byte 0x89 - byte 0xc1 - - ; mov rax, [rbp-8] - byte 0x48 - byte 0x8b - byte 0x85 - byte 0xf8 - byte 0xff - byte 0xff - byte 0xff - - ; syscall - byte 0x0f - byte 0x05 - - return diff --git a/Makefile b/Makefile index ca98178..20d8dcb 100644 --- a/Makefile +++ b/Makefile @@ -3,15 +3,15 @@ all: markdown README.html $(MAKE) -C 01 $(MAKE) -C 02 $(MAKE) -C 03 + $(MAKE) -C 04 $(MAKE) -C 04a - $(MAKE) -C 04b clean: $(MAKE) -C 00 clean $(MAKE) -C 01 clean $(MAKE) -C 02 clean $(MAKE) -C 03 clean + $(MAKE) -C 04 clean $(MAKE) -C 04a clean - $(MAKE) -C 04b clean rm -f markdown rm -f README.html markdown: markdown.c diff --git a/README.md b/README.md index b7369cb..195a64a 100644 --- a/README.md +++ b/README.md @@ -26,8 +26,8 @@ command codes. - [stage 02](02/README.md) - a language with labels - [stage 03](03/README.md) - a language with longer labels, better error messages, and less register manipulation - more coming soon (hopefully) +- [stage 04](04/README.md) - a language with nice functions and local variables - [stage 04a](04a/README.md) - (interlude) a very simple preprocessor -- [stage 04b](04b/README.md) - a language with nice functions and local variables ## prerequisite knowledge @@ -114,4 +114,4 @@ shall not be held liable in connection with it. ## contributing If you notice a mistake/want to clarify something, you can submit a pull request -via GitHub, or email `pommicket at pommicket.com`. Translations are welcome! +via GitHub, or email `pommicket at pommicket.com`. -- cgit v1.2.3