diff options
Diffstat (limited to '04b')
-rw-r--r-- | 04b/Makefile | 11 | ||||
-rw-r--r-- | 04b/README.md | 240 | ||||
-rw-r--r-- | 04b/guessing_game | 238 | ||||
-rw-r--r-- | 04b/in03 | 2532 | ||||
-rw-r--r-- | 04b/in04b | 133 |
5 files changed, 0 insertions, 3154 deletions
diff --git a/04b/Makefile b/04b/Makefile deleted file mode 100644 index ef72181..0000000 --- a/04b/Makefile +++ /dev/null @@ -1,11 +0,0 @@ -all: out03 guessing_game.out out04b README.html -out03: in03 ../03/out02 - ../03/out02 -%.html: %.md ../markdown - ../markdown $< -out04b: in04b out03 - ./out03 -%.out: % out03 - ./out03 $< $@ -clean: - rm -f out* README.html *.out diff --git a/04b/README.md b/04b/README.md deleted file mode 100644 index f131943..0000000 --- a/04b/README.md +++ /dev/null @@ -1,240 +0,0 @@ -# stage 04 - -As usual, the source for this compiler is `in03`, an input to the [previous compiler](../03/README.md). -`in04b` contains a hello world program written in the stage 4 language. -Here is the core of the program: - -``` -main() - -function main - puts(.str_hello_world) - putc(10) ; newline - syscall(0x3c, 0) -``` - -As you can see, we can now pass arguments to functions. And let's take a look at `putc`: - -``` -function putc - argument c - local p - p = &c - syscall(1, 1, p, 1) - return -``` - -It's so simple compared to previous languages! Rather than mess around with registers, we can now -declare local (and global) variables, and use them directly. These variables will be placed on the -stack. Since arguments are also placed on the stack, -by implementing local variables we get arguments for free. There is no difference -between the `local` and `argument` keywords in this language other than spelling. -In fact, the number of agruments to a function call is not checked against -how many arguments the function has. This does make it easy to screw things up by calling a function -with the wrong number of arguments, but it also means that we can provide a variable number of arguments -to the `syscall` function. Speaking of which, if you look at the bottom of `in04b`, you'll see: - -``` -function syscall - ... - byte 0x48 - byte 0x8b - byte 0x85 - byte 0xf0 - byte 0xff - byte 0xff - byte 0xff - ... -``` - -Originally I was going to make `syscall` a built-in feature of the language, but then I realized that wasn't -necessary. -Instead, `syscall` is a function written manually in machine language. -We can take a look at its decompilation to make things clearer: - -``` -mov rax,[rbp-0x10] -mov rdi,rax -mov rax,[rbp-0x18] -mov rsi,rax -mov rax,[rbp-0x20] -mov rdx,rax -mov rax,[rbp-0x28] -mov r10,rax -mov rax,[rbp-0x30] -mov r8,rax -mov rax,[rbp-0x38] -mov r9,rax -mov rax,[rbp-0x8] -syscall -``` - -This just sets `rax`, `rdi`, `rsi`, etc. to the arguments the function was called with, -and then does a syscall. - -## functions and local variables - -In this language, function arguments are placed onto the stack from left to right -and all arguments and local variables are 8 bytes. -As a reminder, -the stack is just an area of memory which is automatically extended downwards (on x86-64, at least). -So, how do we keep track of the location of local variables in the stack? We could do something like -this: - -``` -sub rsp, 24 ; make room for 3 variables -mov [rsp], 10 ; variable1 = 10 -mov [rsp+8], 20 ; variable2 = 20 -mov [rsp+16], 30 ; variable3 = 30 -; ... -add rsp, 24 ; reset rsp -``` - -But now suppose that in the middle of the `; ...` code we want another local variable: -``` -sub rsp, 8 ; make room for another variable -``` -well, since we've changed `rsp`, `variable1` is now at `rsp+8` instead of `rsp`, -`variable2` is at `rsp+16` instead of `rsp+8`, and -`variable3` is at `rsp+24` instead of `rsp+16`. -Also, we had better make sure we increment `rsp` by `32` now instead of `24` -to put it back in the right place. -It would be annoying (but by no means impossible) to keep track of all this. -We could just declare all local variables at the start of the function, -but that makes the language more annoying to use. - -Instead, we can use the `rbp` register to keep track of what `rsp` was -at the start of the function: - -``` -; save old value of rbp -sub rsp, 8 -mov [rsp], rbp -; set rbp to initial value of rsp -mov rbp, rsp - -lea rsp, [rbp-8] ; add variable1 (this instruction sets rsp to rbp-8) -mov [rbp-8], 10 ; variable1 = 10 -lea rsp, [rbp-16] ; add variable2 -mov [rbp-16], 20 ; variable2 = 20 -lea rsp, [rbp-24] ; add variable3 -mov [rbp-24], 30 ; variable3 = 30 -; Note that variable1's address is still rbp-8; adding more variables didn't affect it. -; ... - -; restore old values of rbp and rsp -mov rsp, rbp -mov rbp, [rsp] -add rsp, 8 -``` - -This is actually the intended use of `rbp` (it *p*oints to the *b*ase of the stack frame). -Note that setting `rsp` very specifically rather than just doing `sub rsp, 8` is important: -if we skip over some code with a local variable declaration, or execute a local declaration twice, -we want `rsp` to be in the right place. -The first three and last three instructions above are called the function *prologue* and *epilogue*. -They are all the same for all functions; a prologue is generated at the start of every function, -and an epilogue is generated for every return statement. -The return value is placed in `rax`. - -## global variables - -Global variables are much simpler than local ones. The variable `:static_memory_end` in the compiler -keeps track of where to put the next global variable in memory. It is initialized at address `0x440000`, -which gives us 256KB for code (and strings). When a global variable is added, `:static_memory_end` is increased -by its size. - -## language description - -Comments begin with `;` and may be put at the end of lines -with or without code. -Blank lines are ignored. - -To make the compiler simpler, this language doesn't support fancy -expressions like `2 * (3 + 5) / 6`. There is a limited set of possible -expressions, specifically there are *terms* and *r-values*. - -But first, each program is made up of a series of statements, and -each statement is one of the following: -- `global {name}` or `global {size} {name}` - declare a global variable with the given size, or 8 bytes if none is provided. -- `local {name}` - declare a local variable -- `argument {name}` - declare a function argument. this is functionally equivalent to `local`, so it just exists for readability. -- `function {name}` - declare a function -- `:{name}` - declare a label -- `goto {label}` - jump to the specified label -- `if {term} {operator} {term} goto {label}` - -conditionally jump to the specified label. `{operator}` should be one of -`==`, `<`, `>`, `>=`, `<=`, `!=`, `[`, `]`, `[=`, `]=` -(the last four do unsigned comparisons). -- `{lvalue} = {rvalue}` - set `lvalue` to `rvalue` -- `{lvalue} += {rvalue}` - add `rvalue` to `lvalue` -- `{lvalue} -= {rvalue}` - etc. -- `{lvalue} *= {rvalue}` -- `{lvalue} /= {rvalue}` -- `{lvalue} %= {rvalue}` -- `{lvalue} &= {rvalue}` -- `{lvalue} |= {rvalue}` -- `{lvalue} ^= {rvalue}` -- `{lvalue} <= {rvalue}` - left shift `lvalue` by `rvalue` -- `{lvalue} >= {rvalue}` - right shift `lvalue` by `rvalue` -- `{function}({term}, {term}, ...)` - function call, ignoring the return value -- `return {rvalue}` -- `string {str}` - places a literal string in the code -- `byte {number}` - places a literal byte in the code - -Now let's get down into the weeds: - -A a *number* is one of: -- `{decimal number}` - e.g. `108` (note: there's no `d` prefix anymore) -- `0x{hexadecimal number}` - e.g. `0x2f` for 47 -- `'{character}` - e.g. `'a` for 97 (the character code for `a`) - -A *term* is one of: -- `{variable name}` - the value of a (local or global) variable -- `.{label name}` - the address of a label -- `{number}` - -An *lvalue* is the left-hand side of an assignment expression, -and it is one of: -- `{variable}` -- `*1{variable}` - dereference 1 byte -- `*2{variable}` - dereference 2 bytes -- `*4{variable}` - dereference 4 bytes -- `*8{variable}` - dereference 8 bytes - -An *rvalue* is an expression, which can be more complicated than a term. -rvalues are one of: -- `{term}` -- `&{variable}` - address of variable -- `*1{variable}` / `*2{variable}` / `*4{variable}` / `*8{variable}` - dereference 1, 2, 4, or 8 bytes -- `~{term}` - bitwise not -- `{function}({term}, {term}, ...)` -- `{term} + {term}` -- `{term} - {term}` -- `{term} * {term}` -- `{term} / {term}` -- `{term} % {term}` -- `{term} & {term}` -- `{term} | {term}` -- `{term} ^ {term}` -- `{term} < {term}` - left shift -- `{term} > {term}` - right shift - -That's quite a lot of stuff, and it makes for a pretty powerful -language, all things considered. To test out the language, -in addition to the hello world program, I also wrote a little -guessing game, which you can find in the file `guessing_game`. -It ended up being quite nice to write! - -## limitations - -Variables in this language do not have types. This makes it very easy to make mistakes like -treating numbers as pointers or vice versa. - -A big annoyance with this language is the lack of local label names. Due to the limited nature -of branching in this language (`if ... goto ...` stands in for `if`, `else if`, `while`, etc.), -you need to use a lot of labels, and that means their names can get quite long. But at least unlike -the 03 language, you'll get an error if you use the same label name twice! - -Overall, though, this language ended up being surprisingly powerful. With any luck, the next stage will -finally be a C compiler... diff --git a/04b/guessing_game b/04b/guessing_game deleted file mode 100644 index 415851d..0000000 --- a/04b/guessing_game +++ /dev/null @@ -1,238 +0,0 @@ -global 0x1000 exit_code -global y -y = 4 -exit_code = main() -exit(exit_code) - -function main - local secret_number - local guess - global 32 input_line - local p_line - p_line = &input_line - secret_number = getrand(100) - fputs(1, .str_intro) - - :guess_loop - fputs(1, .str_guess) - syscall(0, 0, p_line, 30) - guess = stoi(p_line) - if guess < secret_number goto too_low - if guess > secret_number goto too_high - fputs(1, .str_got_it) - return 0 - :too_low - fputs(1, .str_too_low) - goto guess_loop - :too_high - fputs(1, .str_too_high) - goto guess_loop - -:str_intro - string I'm thinking of a number. - byte 10 - byte 0 - -:str_guess - string Guess what it is: - byte 32 - byte 0 - -:str_got_it - string You got it! - byte 10 - byte 0 - -:str_too_low - string Too low! - byte 10 - byte 0 - -:str_too_high - string Too high! - byte 10 - byte 0 - -; get a "random" number from 0 to x using the system clock -function getrand - argument x - global 16 getrand_time - local ptime - local n - - ptime = &getrand_time - syscall(228, 1, ptime) - ptime += 8 ; nanoseconds at offset 8 in struct timespec - n = *4ptime - n %= x - return n - -; returns a pointer to a null-terminated string containing the number given -function itos - global 32 itos_string - argument x - local c - local p - p = &itos_string - p += 30 - :itos_loop - c = x % 10 - c += '0 - *1p = c - x /= 10 - if x == 0 goto itos_loop_end - p -= 1 - goto itos_loop - :itos_loop_end - return p - - -; returns the number at the start of the given string -function stoi - argument s - local p - local n - local c - n = 0 - p = s - :stoi_loop - c = *1p - if c < '0 goto stoi_loop_end - if c > '9 goto stoi_loop_end - n *= 10 - n += c - '0 - p += 1 - goto stoi_loop - :stoi_loop_end - return n - - -function strlen - argument s - local c - local p - p = s - :strlen_loop - c = *1p - if c == 0 goto strlen_loop_end - p += 1 - goto strlen_loop - :strlen_loop_end - return p - s - -function fputs - argument fd - argument s - local length - length = strlen(s) - syscall(1, fd, s, length) - return - - -function fputn - argument fd - argument n - local s - s = itos(n) - fputs(fd, s) - return - -function exit - argument status_code - syscall(0x3c, status_code) - -function syscall - ; I've done some testing, and this should be okay even if - ; rbp-56 goes beyond the end of the stack. - ; mov rax, [rbp-16] - byte 0x48 - byte 0x8b - byte 0x85 - byte 0xf0 - byte 0xff - byte 0xff - byte 0xff - ; mov rdi, rax - byte 0x48 - byte 0x89 - byte 0xc7 - - ; mov rax, [rbp-24] - byte 0x48 - byte 0x8b - byte 0x85 - byte 0xe8 - byte 0xff - byte 0xff - byte 0xff - ; mov rsi, rax - byte 0x48 - byte 0x89 - byte 0xc6 - - ; mov rax, [rbp-32] - byte 0x48 - byte 0x8b - byte 0x85 - byte 0xe0 - byte 0xff - byte 0xff - byte 0xff - ; mov rdx, rax - byte 0x48 - byte 0x89 - byte 0xc2 - - ; mov rax, [rbp-40] - byte 0x48 - byte 0x8b - byte 0x85 - byte 0xd8 - byte 0xff - byte 0xff - byte 0xff - ; mov r10, rax - byte 0x49 - byte 0x89 - byte 0xc2 - - ; mov rax, [rbp-48] - byte 0x48 - byte 0x8b - byte 0x85 - byte 0xd0 - byte 0xff - byte 0xff - byte 0xff - ; mov r8, rax - byte 0x49 - byte 0x89 - byte 0xc0 - - ; mov rax, [rbp-56] - byte 0x48 - byte 0x8b - byte 0x85 - byte 0xc8 - byte 0xff - byte 0xff - byte 0xff - ; mov r9, rax - byte 0x49 - byte 0x89 - byte 0xc1 - - ; mov rax, [rbp-8] - byte 0x48 - byte 0x8b - byte 0x85 - byte 0xf8 - byte 0xff - byte 0xff - byte 0xff - - ; syscall - byte 0x0f - byte 0x05 - - return diff --git a/04b/in03 b/04b/in03 deleted file mode 100644 index 8fb9ade..0000000 --- a/04b/in03 +++ /dev/null @@ -1,2532 +0,0 @@ -; initialize global_variables_end -C=:global_variables_end -D=:global_variables -8C=D -; initialize static_memory_end -C=:static_memory_end -; 0x80000 = 512KB for code -D=x480000 -8C=D -; initialize labels_end -C=:labels_end -D=:labels -8C=D - -I=8S -A=d2 -?I>A:argv_file_names - ; use default input/output filenames - ; open input file - J=:input_filename - I=d0 - syscall x2 - J=A - ?J<0:input_file_error - ; open output file - J=:output_filename - I=x241 - D=x1ed - syscall x2 - J=A - ?J<0:output_file_error - !:second_pass_starting_point -:argv_file_names - ; open input file - J=S - ; argv[1] is at *(rsp+16) - J+=d16 - J=8J - I=d0 - syscall x2 - J=A - ?J<0:input_file_error - ; open output file - J=S - ; argv[2] is at *(rsp+24) - J+=d24 - J=8J - I=x241 - D=x1ed - syscall x2 - J=A - ?J<0:output_file_error - - -:second_pass_starting_point -; write ELF header -J=d4 -I=:ELF_header -D=x78 -syscall x1 - -:read_line -; increment line number -D=:line_number -C=8D -C+=d1 -8D=C - -; use rbp to store line pointer -R=:line -:read_line_loop - ; read 1 byte into rbp - J=d3 - I=R - D=d1 - syscall x0 - D=A - ?D=0:eof - - ; check if the character was a newline: - C=1R - D=xa - ?C=D:read_line_loop_end - ; check if the character was a tab: - D=x9 - ; if so, don't increment rbp - ?C=D:read_line_loop - ; check if the character was a semicolon: - D='; - ; if so, it's a comment - ?C=D:handle_comment - - R+=d1 - !:read_line_loop - - :handle_comment - ; read out rest of line from file - J=d3 - I=R - D=d1 - syscall x0 - D=A - ?D=0:eof - C=1R - D=xa - ; if we didn't reach the end of the line, keep going - ?C!D:handle_comment - - !:read_line_loop_end -:read_line_loop_end - -; remove whitespace (specifically, ' ' characters) at end of line -I=R -:remove_terminal_whitespace_loop - I-=d1 - C=1I - D=x20 - ?C!D:remove_terminal_whitespace_loop_end - ; replace ' ' with a newline - D=xa - 1I=D - !:remove_terminal_whitespace_loop -:remove_terminal_whitespace_loop_end - -; check if this is a blank line -C=:line -D=1C -C=xa -?C=D:read_line - -C=': -?C=D:handle_label_definition - -I=:line -J=:"global" -C=x20 -call :string= -D=A -?D!0:handle_global - -I=:line -J=:"local" -C=x20 -call :string= -D=A -?D!0:handle_local -; arguments are treated the same as local variables -I=:line -J=:"argument" -C=x20 -call :string= -D=A -?D!0:handle_local - -I=:line -J=:"return" -C=x20 -call :string= -D=A -?D!0:handle_return - -I=:line -J=:"byte" -C=x20 -call :string= -D=A -?D!0:handle_byte - -I=:line -J=:"string" -C=x20 -call :string= -D=A -?D!0:handle_string - -I=:line -J=:"goto" -C=x20 -call :string= -D=A -?D!0:handle_goto - -I=:line -J=:"if" -C=x20 -call :string= -D=A -?D!0:handle_if - -I=:line -J=:"function" -call :string= -D=A -?D!0:handle_function - - -; set delimiter to newline -C=xa - -I=:line -J=:"return\n" -call :string= -D=A -?D!0:handle_return - -; check if this is an assignment -I=:line -:assignment_check_loop - C=1I - D=xa - ?C=D:assignment_check_loop_end - D='= - ?C=D:handle_assignment - I+=d1 - !:assignment_check_loop -:assignment_check_loop_end - -; check if this is a function call (where we discard the return value) -I=:line -; (check for an opening bracket not preceded by a space) -:call_check_loop - C=1I - D=x20 - ?C=D:call_check_loop_end - D=xa - ?C=D:call_check_loop_end - D='( - ?C=D:handle_call - I+=d1 - !:call_check_loop -:call_check_loop_end - -!:bad_statement - -!:read_line - -:eof - C=:second_pass - D=1C - ?D!0:exit_success - ; set 2nd pass to 1 - 1C=d1 - ; make sure output file is large enough for static memory - ; we'll use the ftruncate syscall to set the size of the file - J=d4 - I=:static_memory_end - I=8I - I-=x400000 - syscall x4d - ; seek both files back to start - J=d3 - I=d0 - D=d0 - syscall x8 - J=d4 - I=d0 - D=d0 - syscall x8 - ; set line number to 0 - C=:line_number - 8C=0 - - !:second_pass_starting_point - -:exit_success - J=d0 - syscall x3c - -align -:local_variable_name - reserve d8 - -:handle_byte - I=:line - ; 5 = length of "byte " - I+=d5 - call :read_number - ; make sure byte is 0-255 - C=A - D=xff - ?CaD:bad_byte - ; write byte - I=:byte - 1I=C - J=d4 - D=d1 - syscall x1 - !:read_line -:byte - reserve d1 - -:handle_string - I=:line - ; 7 = length of "string " - I+=d7 - J=I - ; find end of string - :string_loop - C=1J - D=xa - ?C=D:string_loop_end - J+=d1 - !:string_loop - :string_loop_end - ; get length of string - D=J - D-=I - ; output fd - J=d4 - syscall x1 - !:read_line - -:handle_call - J=I - ; just use the rvalue function call code - C=:rvalue - D=:line - 8C=D - I=:line - call :rvalue_function_call - !:read_line - -:handle_local - ; skip ' ' - I+=d1 - - ; store away pointer to variable name - C=:local_variable_name - 8C=I - - ; check if already defined - J=:local_variables - call :ident_lookup - C=A - ?C!0:local_redeclaration - - C=:local_variable_name - I=8C - J=:local_variables_end - J=8J - call :ident_copy - - ; increase stack_end, store it in J - C=:stack_end - D=4C - D+=d8 - 4C=D - 4J=D - J+=d4 - ; store null terminator - 1J=0 - - ; update :local_variables_end - I=:local_variables_end - 8I=J - - ; set rsp appropriately - C=:rbp_offset - J=d0 - J-=D - 4C=J - - J=d4 - I=:lea_rsp_[rbp_offset] - D=d7 - syscall x1 - - - ; read the next line - !:read_line - -:lea_rsp_[rbp_offset] - x48 - x8d - xa5 -:rbp_offset - reserve d4 - -align -:global_start - reserve d8 -:global_variable_name - reserve d8 -:global_variable_size - reserve d8 -:handle_global - ; ignore if this is the second pass - C=:second_pass - C=1C - ?C!0:read_line - - ; skip ' ' - I+=d1 - - C=1I - D='9 - ?C>D:global_default_size - ; read specific size of global - call :read_number - D=A - C=:global_variable_size - 8C=D - ; check and skip space after number - C=1I - D=x20 - ?C!D:bad_number - I+=d1 - !:global_cont - :global_default_size - ; default size = 8 - C=:global_variable_size - D=d8 - 8C=D - :global_cont - - ; store away pointer to variable name - C=:global_variable_name - 8C=I - - ; check if already defined - J=:global_variables - call :ident_lookup - C=A - ?C!0:global_redeclaration - - C=:global_variable_name - I=8C - - J=:global_variables_end - J=8J - call :ident_copy - ; store address - D=:static_memory_end - C=4D - 4J=C - J+=d4 - ; increase static_memory_end by size - D=:global_variable_size - D=8D - C+=D - D=:static_memory_end - 4D=C - ; store null terminator - 1J=0 - ; update :global_variables_end - I=:global_variables_end - 8I=J - ; go read the next line - !:read_line - -:handle_function - I=:line - ; length of "function " - I+=d9 - ; make function name a label - call :add_label - - ; emit prologue - J=d4 - I=:function_prologue - D=d14 - syscall x1 - - ; reset local variable table - D=:local_variables - 1D=0 - C=:local_variables_end - 8C=D - - ; reset stack_end - D=:stack_end - 4D=0 - - ; go read the next line - !:read_line - -:function_prologue - ; sub rsp, 8 - x48 - x81 - xec - x08 - x00 - x00 - x00 - ; mov [rsp], rbp - x48 - x89 - x2c - x24 - ; mov rbp, rsp - R=S - ; total length: 7 + 4 + 3 = 14 bytes - -:function_epilogue - ; mov rsp, rbp - S=R - ; mov rbp, [rsp] - x48 - x8b - x2c - x24 - ; add rsp, 8 - x48 - x81 - xc4 - x08 - x00 - x00 - x00 - ; ret - return - ; total length = 15 bytes - -:handle_label_definition - I=:line - I+=d1 - call :add_label - !:read_line - -align -:label_name - reserve d8 -; add the label in rsi to the label list (with the current pc address) -:add_label - ; ignore if this is the second pass - C=:second_pass - C=1C - ?C!0:return_0 - - C=:label_name - 8C=I - - ; make sure label only has identifier characters - :label_checking_loop - C=1I - D=xa - ?C=D:label_checking_loop_end - I+=d1 - B=C - call :isident - D=A - ?D!0:label_checking_loop - !:bad_label - :label_checking_loop_end - - C=:label_name - I=8C - J=:labels - call :ident_lookup - C=A - ?C!0:label_redefinition - - J=:labels_end - J=8J - C=:label_name - I=8C - call :ident_copy - R=J - - ; figure out where in the file we are (using lseek) - J=d4 - I=d0 - D=d1 - syscall x8 - C=A - C+=x400000 - J=R - ; store address - 4J=C - J+=d4 - - ; update labels_end - C=:labels_end - 8C=J - - return - -:handle_goto - J=d4 - I=:jmp_prefix - D=d1 - syscall x1 - I=:line - ; 5 = length of "goto " - I+=d5 - call :emit_label_jump_address - !:read_line -:jmp_prefix - xe9 - -:handle_if - I=:line - I+=d3 - ; skip term 1 - call :go_to_space - I+=d1 - ; skip operator - call :go_to_space - I+=d1 - ; put second operand in rsi - call :set_rax_to_term - call :set_rsi_to_rax - - - I=:line - ; length of "if " - I+=d3 - ; put first operand in rax - call :set_rax_to_term - ; put second operand in rbx - call :set_rbx_to_rsi - ; emit cmp rax, rbx - J=d4 - I=:cmp_rax_rbx - D=d3 - syscall x1 - - I=:line - I+=d3 - call :go_to_space - I+=d1 - R=I - C=x20 - - I=R - J=:"==" - call :string= - I=A - ?I!0:write_je - - I=R - J=:"!=" - call :string= - I=A - ?I!0:write_jne - - I=R - J=:">" - call :string= - I=A - ?I!0:write_jg - - I=R - J=:"<" - call :string= - I=A - ?I!0:write_jl - - I=R - J=:">=" - call :string= - I=A - ?I!0:write_jge - - I=R - J=:"<=" - call :string= - I=A - ?I!0:write_jle - - I=R - J=:"]" - call :string= - I=A - ?I!0:write_ja - - I=R - J=:"[" - call :string= - I=A - ?I!0:write_jb - - I=R - J=:"]=" - call :string= - I=A - ?I!0:write_jae - - I=R - J=:"[=" - call :string= - I=A - ?I!0:write_jbe - - !:bad_jump - - :write_je - J=d4 - I=:je_prefix - D=d2 - syscall x1 - !:if_continue - - :write_jne - J=d4 - I=:jne_prefix - D=d2 - syscall x1 - !:if_continue - - :write_jl - J=d4 - I=:jl_prefix - D=d2 - syscall x1 - !:if_continue - - :write_jg - J=d4 - I=:jg_prefix - D=d2 - syscall x1 - !:if_continue - - :write_jle - J=d4 - I=:jle_prefix - D=d2 - syscall x1 - !:if_continue - - :write_jge - J=d4 - I=:jge_prefix - D=d2 - syscall x1 - !:if_continue - - :write_jb - J=d4 - I=:jb_prefix - D=d2 - syscall x1 - !:if_continue - - :write_ja - J=d4 - I=:ja_prefix - D=d2 - syscall x1 - !:if_continue - - :write_jbe - J=d4 - I=:jbe_prefix - D=d2 - syscall x1 - !:if_continue - - :write_jae - J=d4 - I=:jae_prefix - D=d2 - syscall x1 - !:if_continue - -:if_continue - I=:line - I+=d3 - ; skip term 1 - call :go_to_space - I+=d1 - ; skip operator - call :go_to_space - I+=d1 - ; skip term 2 - call :go_to_space - I+=d1 - J=:"goto" - C=x20 - call :string= - C=A - ; make sure word after term 2 is "goto" - ?C=0:bad_jump - I+=d1 - call :emit_label_jump_address - !:read_line - -:je_prefix - x0f - x84 -:jne_prefix - x0f - x85 -:jl_prefix - x0f - x8c -:jg_prefix - x0f - x8f -:jle_prefix - x0f - x8e -:jge_prefix - x0f - x8d -:jb_prefix - x0f - x82 -:ja_prefix - x0f - x87 -:jbe_prefix - x0f - x86 -:jae_prefix - x0f - x83 - -:cmp_rax_rbx - x48 - x39 - xd8 - -align -:reladdr - reserve d4 - -; emit relative address (for jumping) of label in rsi -:emit_label_jump_address - ; address doesn't matter for first pass - C=:second_pass - C=1C - ?C=0:jump_ignore_address - ; look up label; store address in rbp - J=:labels - call :ident_lookup - C=A - ?C=0:bad_label - R=4C -:jump_ignore_address - - ; first, figure out current address - J=d4 - I=d0 - D=d1 - syscall x8 - C=A - ; add an additional 4 because the relative address is 4 bytes long - C+=x400004 - - ; compute relative address - D=d0 - D-=C - D+=R - ; store in :reladdr - C=:reladdr - 4C=D - ; output - J=d4 - I=:reladdr - D=d4 - syscall x1 - return - -align -:assignment_type - reserve d8 -:handle_assignment - I-=d1 - C=:assignment_type - 8C=I - - I+=d2 - C=1I - D=x20 - ; check for space after = - ?C!D:bad_assignment - I+=d1 - - ; set rdi to right-hand side of assignment - call :set_rax_to_rvalue - call :set_rdi_to_rax - - J=:assignment_type - J=8J - C=1J - ; put newline after lvalue to make parsing easier - D=xa - 1J=D - D=x20 - ?C=D:handle_assignment_cont - J-=d1 - D=xa - 1J=D - :handle_assignment_cont - D=x20 - ?C=D:handle_plain_assignment - D='+ - ?C=D:handle_+= - D='- - ?C=D:handle_-= - D='* - ?C=D:handle_*= - D='/ - ?C=D:handle_/= - D='% - ?C=D:handle_%= - D='& - ?C=D:handle_&= - D='| - ?C=D:handle_|= - D='^ - ?C=D:handle_^= - D='< - ?C=D:handle_<= - D='> - ?C=D:handle_>= - - !:bad_assignment - -:handle_plain_assignment - I=:line - call :set_lvalue_to_rax - !:read_line - -:handle_+= - I=:line - call :set_rax_to_rvalue - call :set_rbx_to_rdi - call :emit_add_rax_rbx - I=:line - call :set_lvalue_to_rax - !:read_line - -:handle_-= - I=:line - call :set_rax_to_rvalue - call :set_rbx_to_rdi - call :emit_sub_rax_rbx - I=:line - call :set_lvalue_to_rax - !:read_line - -:handle_*= - I=:line - call :set_rax_to_rvalue - call :set_rbx_to_rdi - call :emit_imul_rbx - I=:line - call :set_lvalue_to_rax - !:read_line - -:handle_/= - I=:line - call :set_rax_to_rvalue - call :set_rbx_to_rdi - call :emit_zero_rdx_idiv_rbx - I=:line - call :set_lvalue_to_rax - !:read_line - -:handle_%= - I=:line - call :set_rax_to_rvalue - call :set_rbx_to_rdi - call :emit_zero_rdx_idiv_rbx - call :set_rax_to_rdx - I=:line - call :set_lvalue_to_rax - !:read_line - -:handle_&= - I=:line - call :set_rax_to_rvalue - call :set_rbx_to_rdi - call :emit_and_rax_rbx - I=:line - call :set_lvalue_to_rax - !:read_line - -:handle_|= - I=:line - call :set_rax_to_rvalue - call :set_rbx_to_rdi - call :emit_or_rax_rbx - I=:line - call :set_lvalue_to_rax - !:read_line - -:handle_^= - I=:line - call :set_rax_to_rvalue - call :set_rbx_to_rdi - call :emit_xor_rax_rbx - I=:line - call :set_lvalue_to_rax - !:read_line - -:handle_<= - I=:line - call :set_rax_to_rvalue - call :set_rcx_to_rdi - call :emit_shl_rax_cl - I=:line - call :set_lvalue_to_rax - !:read_line - -:handle_>= - I=:line - call :set_rax_to_rvalue - call :set_rcx_to_rdi - call :emit_shr_rax_cl - I=:line - call :set_lvalue_to_rax - !:read_line - -align -:lvalue - reserve d8 - -; set the lvalue in rsi to <rax> -:set_lvalue_to_rax - C=:lvalue - 8C=I - - ; first, store away <rax> value in <rdi> - R=I - call :set_rdi_to_rax - I=R - - C=:lvalue - I=8C - C=1I - D='* - - ?C=D:lvalue_deref - ; not a dereference; just a variable - C=:lvalue - I=8C - call :set_rax_to_address_of_variable - call :set_rbx_to_rax - call :set_rax_to_rdi - call :set_[rbx]_to_rax - return - :lvalue_deref - C=:lvalue - I=8C - I+=d2 - call :set_rax_to_address_of_variable - call :set_rbx_to_rax - call :set_rax_to_[rbx] - call :set_rbx_to_rax - call :set_rax_to_rdi - - C=:lvalue - I=8C - I+=d1 - C=1I - - D='1 - ?C=D:lvalue_deref1 - D='2 - ?C=D:lvalue_deref2 - D='4 - ?C=D:lvalue_deref4 - D='8 - ?C=D:lvalue_deref8 - !:bad_assignment - :lvalue_deref1 - !:set_[rbx]_to_al - :lvalue_deref2 - !:set_[rbx]_to_ax - :lvalue_deref4 - !:set_[rbx]_to_eax - :lvalue_deref8 - !:set_[rbx]_to_rax - -:handle_return - I=:line - ; skip "return" - I+=d6 - C=1I - D=xa - ?C=D:no_return_value - - ; skip ' ' after return - I+=d1 - - call :set_rax_to_rvalue - - :no_return_value - J=d4 - I=:function_epilogue - D=d15 - syscall x1 - - ; go read the next line - !:read_line - -:mov_rsp_rbp - S=R - -:ret - return - -; copy the newline-terminated identifier from rsi to rdi -:ident_copy - C=1I - B=C - call :isident - D=A - ?D=0:bad_identifier - - :ident_loop - C=1I - 1J=C - I+=d1 - J+=d1 - D=xa - ?C=D:ident_loop_end - B=C - call :isident - D=A - ?D=0:bad_identifier - !:ident_loop - :ident_loop_end - return - -align -:ident_lookup_i - reserve d8 - -; look up identifier rsi in list rdi -; returns address of whatever's right after the identifier in the list, or 0 if not found -:ident_lookup - C=:ident_lookup_i - 8C=I - - :ident_lookup_loop - ; check if reached the end of the table - C=1J - ?C=0:return_0 - I=:ident_lookup_i - I=8I - call :ident= - C=A - ; move past terminator of identifier in table - :ident_finish_loop - D=1J - J+=d1 - A=xa - ?D!A:ident_finish_loop - ; check if this was it - ?C!0:return_J - ; nope. keep going - ; skip over address: - J+=d4 - !:ident_lookup_loop - -; can the character in rbx appear in an identifier? -:isident - A='0 - ?B<A:return_0 - ; note: 58 = '9' + 1 - A=d58 - ?B<A:return_1 - A='A - ?B<A:return_0 - ; note: 91 = 'z' + 1 - A=d91 - ?B<A:return_1 - A='z - ?B>A:return_0 - ; 96 = 'a' - 1 - A=d96 - ?B>A:return_1 - A='_ - ?B=A:return_1 - !:return_0 - -; set <rax> to the term in rsi -:set_rax_to_term - R=I - - C=1I - D='' - ?C=D:term_number - D='. - ?C=D:term_label - D=d58 - ?C<D:term_number - ; (fallthrough) -; set <rax> to the variable in rsi -:set_rax_to_variable - ; variable - call :set_rax_to_address_of_variable - call :set_rbx_to_rax - call :set_rax_to_[rbx] - return - -:term_label - C=:second_pass - C=1C - ; skip looking up label on first pass; just use whatever's in rsi - ?C=0:set_rax_to_immediate - ; move past . - I+=d1 - J=:labels - call :ident_lookup - C=A - ?C=0:bad_label - ; set rax to label value - I=4C - !:set_rax_to_immediate - -align -:rvalue - reserve d8 - -; set <rax> to the rvalue in rsi -:set_rax_to_rvalue - ; store pointer to rvalue - C=:rvalue - 8C=I - - C=1I - D='& - ?C=D:rvalue_addressof - - D='~ - ?C=D:rvalue_bitwise_not - - D='* - ?C=D:rvalue_dereference - - J=I - :rvalue_loop - C=1J - D='( - ?C=D:rvalue_function_call - D=x20 - ?C=D:rvalue_binary_op - D=xa - ; no space or opening bracket; this must be a term - ?C=D:set_rax_to_term - J+=d1 - !:rvalue_loop - -align -:rvalue_function_arg - reserve d8 -:rvalue_function_arg_offset - reserve d4 - -:rvalue_function_call - I=J - I+=d1 - C=1I - D=') - ?C=D:function_call_no_arguments - - C=:rvalue_function_arg_offset - ; set arg offset to -16 (to skip over stack space for return address and rbp) - D=xfffffffffffffff0 - 4C=D - - :rvalue_function_loop - C=:rvalue_function_arg - 8C=I - ; set <rax> to argument - call :set_rax_to_term - ; set <[rsp-arg_offset]> to rax - ; first, output prefix - J=d4 - I=:mov_[rsp_offset]_rax_prefix - D=d4 - syscall x1 - ; now decrement offset, and output it - I=:rvalue_function_arg_offset - C=4I - C-=d8 - 4I=C - J=d4 - D=d4 - syscall x1 - - C=:rvalue_function_arg - I=8C - ; skip over argument - :rvalue_function_arg_loop - C=1I - D=', - ?C=D:rvalue_function_next_arg - D=') - ?C=D:rvalue_function_loop_end - D=xa - ; no closing bracket - ?C=D:bad_call - I+=d1 - !:rvalue_function_arg_loop - :rvalue_function_next_arg - ; skip comma - I+=d1 - C=1I - D=x20 - ; make sure there's a space after the comma - ?C!D:bad_call - ; skip space - I+=d1 - - ; handle the next argument - !:rvalue_function_loop - :rvalue_function_loop_end - :function_call_no_arguments - - I+=d1 - C=1I - D=xa - ; make sure there's nothing after the closing bracket - ?C!D:bad_term - - C=:second_pass - C=1C - ?C=0:ignore_function_address - ; look up function name - I=:rvalue - I=8I - J=:labels - call :ident_lookup - C=A - ?C=0:bad_function - ; read address - I=4C - :ignore_function_address - call :set_rax_to_immediate - ; write call rax - J=d4 - I=:call_rax - D=d2 - syscall x1 - ; we're done! - - return - -:mov_[rsp_offset]_rax_prefix - x48 - x89 - x84 - x24 - -:call_rax - xff - xd0 - -:binary_op - reserve d1 -:rvalue_binary_op - ; move past ' ' - J+=d1 - ; store binary op - D=1J - C=:binary_op - 1C=D - - ; make sure space follows operator - J+=d1 - C=1J - D=x20 - ?C!D:bad_term - ; set rsi to second operand - J+=d1 - I=J - call :set_rax_to_term - call :set_rsi_to_rax - - ; now set rax to first operand - I=:rvalue - I=8I - call :set_rax_to_term - - ; and combine - C=:binary_op - C=1C - - D='+ - ?C=D:rvalue_add - - D='- - ?C=D:rvalue_sub - - D='* - ?C=D:rvalue_mul - - D='/ - ?C=D:rvalue_div - - D='% - ?C=D:rvalue_rem - - D='& - ?C=D:rvalue_and - - D='| - ?C=D:rvalue_or - - D='^ - ?C=D:rvalue_xor - - D='< - ?C=D:rvalue_shl - - D='> - ?C=D:rvalue_shr - - !:bad_term - -:rvalue_add - call :set_rbx_to_rsi - !:emit_add_rax_rbx - -:rvalue_sub - call :set_rbx_to_rsi - !:emit_sub_rax_rbx - -:rvalue_mul - call :set_rbx_to_rsi - !:emit_imul_rbx - -:rvalue_div - call :set_rbx_to_rsi - !:emit_zero_rdx_idiv_rbx - -:rvalue_rem - call :set_rbx_to_rsi - call :emit_zero_rdx_idiv_rbx - call :set_rax_to_rdx - return - -:rvalue_and - call :set_rbx_to_rsi - !:emit_and_rax_rbx - -:rvalue_or - call :set_rbx_to_rsi - !:emit_or_rax_rbx - -:rvalue_xor - call :set_rbx_to_rsi - !:emit_xor_rax_rbx - -:rvalue_shl - call :set_rcx_to_rsi - !:emit_shl_rax_cl - -:rvalue_shr - call :set_rcx_to_rsi - !:emit_shr_rax_cl - -:rvalue_addressof - I+=d1 - !:set_rax_to_address_of_variable - -:rvalue_bitwise_not - I+=d1 - call :set_rax_to_term - J=d4 - I=:not_rax - D=d3 - syscall x1 - return -:not_rax - x48 - xf7 - xd0 - -:rvalue_dereference_size - reserve d1 - -:rvalue_dereference - I+=d1 - D=1I - C=:rvalue_dereference_size - 1C=D - I+=d1 - call :set_rax_to_variable - call :set_rbx_to_rax - call :zero_rax - C=:rvalue_dereference_size - C=1C - - D='1 - ?C=D:set_al_to_[rbx] - D='2 - ?C=D:set_ax_to_[rbx] - D='4 - ?C=D:set_eax_to_[rbx] - D='8 - ?C=D:set_rax_to_[rbx] - - !:bad_term - - -; set <rax> to address of variable in rsi -:set_rax_to_address_of_variable - J=:local_variables - call :ident_lookup - C=A - ?C=0:try_global - ; it's a local variable - ; read the offset from <rbp> - D=4C - ; put negated offset in rbp - R=d0 - R-=D - - ; lea rax, [rbp+ - J=d4 - I=:lea_rax_rbp_offset_prefix - D=d3 - syscall x1 - - ; offset] - J=d4 - I=:imm64 - 4I=R - D=d4 - syscall x1 - - return - :try_global - J=:global_variables - call :ident_lookup - C=A - ?C=0:bad_variable - ; it's a global variable - ; get its address - C=4C - - ; put address in rax - I=C - !:set_rax_to_immediate - -:number_is_negative - reserve d1 - -:term_number - call :read_number - I=A - !:set_rax_to_immediate - -; set rax to the number in the string at rsi -:read_number - C=1I - D='' - ?C=D:read_char - D='- - ; set rdx to 0 if number is positive, 1 if negative - ?C=D:read_number_negative - D=d0 - !:read_number_cont - :read_number_negative - D=d1 - I+=d1 - :read_number_cont - ; store away negativity - C=:number_is_negative - 1C=D - ; check if number starts with 0-9 - C=1I - D='9 - ?C>D:bad_number - D='0 - ?C<D:bad_number - ?C=D:number_starting_with0 - ; it's a decimal number - ; rbp will store the number - R=d0 - :decimal_number_loop - C=1I - D='9 - ?C>D:decimal_number_loop_end - D='0 - ?C<D:decimal_number_loop_end - C-=D - ; multiply by 10 - B=d10 - A=R - mul - R=A - ; add this digit - R+=C - - I+=d1 - !:decimal_number_loop - :decimal_number_loop_end - !:read_number_output - -:read_char - I+=d1 - R=1I - I+=d1 - !:read_number_output - -:number_starting_with0 - I+=d1 - C=1I - D='x - ?C=D:read_hex_number - ; otherwise, it should just be 0 - R=d0 - !:read_number_output - -:read_hex_number - I+=d1 - ; rbp will store the number - R=d0 - :hex_number_loop - C=1I - D='0 - ?C<D:hex_number_loop_end - D=d58 - ?C<D:hex_number_0123456789 - D='a - ?C<D:hex_number_loop_end - D='f - ?C>D:hex_number_loop_end - ; one of the digits a-f - D=xffffffffffffffa9 - !:hex_number_digit - :hex_number_0123456789 - D=xffffffffffffffd0 - :hex_number_digit - C+=D - ; shift left by 4 - R<=d4 - ; add digit - R+=C - I+=d1 - !:hex_number_loop - :hex_number_loop_end - !:read_number_output - -:read_number_output - ; first, make sure number is followed by space/newline/appropriate punctuation - C=1I - D=x20 - ?C=D:read_number_valid - D=', - ?C=D:read_number_valid - D=') - ?C=D:read_number_valid - D=xa - ?C=D:read_number_valid - !:bad_number -:read_number_valid - ; we now have the *unsigned* number in rbp. take the sign into consideration - C=:number_is_negative - D=1C - ?D=0:number_not_negative - ; R = -R - C=R - R=d0 - R-=C - :number_not_negative - ; finally, return - A=R - return - - - -; set <rax> to the immediate in rsi. -:set_rax_to_immediate - C=:imm64 - 8C=I - - ; write prefix - J=d4 - D=d2 - I=:mov_rax_imm64_prefix - syscall x1 - - ; write immediate - J=d4 - D=d8 - I=:imm64 - syscall x1 - return - -:zero_rax - J=d4 - I=:xor_eax_eax - D=d2 - syscall x1 - return -:xor_eax_eax - x31 - xc0 - -:zero_rdx - J=d4 - I=:xor_edx_edx - D=d2 - syscall x1 - return -:xor_edx_edx - x31 - xd2 - -:set_rbx_to_rax - J=d4 - I=:mov_rbx_rax - D=d3 - syscall x1 - return -:mov_rbx_rax - B=A - -:set_rbx_to_rsi - J=d4 - I=:mov_rbx_rsi - D=d3 - syscall x1 - return -:mov_rbx_rsi - B=I - -:set_rbx_to_rdi - J=d4 - I=:mov_rbx_rdi - D=d3 - syscall x1 - return -:mov_rbx_rdi - B=J - -:set_rcx_to_rsi - J=d4 - I=:mov_rcx_rsi - D=d3 - syscall x1 - return -:mov_rcx_rsi - C=I - -:set_rcx_to_rdi - J=d4 - I=:mov_rcx_rdi - D=d3 - syscall x1 - return -:mov_rcx_rdi - C=J - -:set_rax_to_rdx - J=d4 - I=:mov_rax_rdx - D=d3 - syscall x1 - return -:mov_rax_rdx - A=D - -:set_rax_to_rdi - J=d4 - I=:mov_rax_rdi - D=d3 - syscall x1 - return -:mov_rax_rdi - A=J - -:set_rsi_to_rax - J=d4 - I=:mov_rsi_rax - D=d3 - syscall x1 - return -:mov_rsi_rax - I=A - -:set_rdi_to_rax - J=d4 - I=:mov_rdi_rax - D=d3 - syscall x1 - return -:mov_rdi_rax - J=A - -:set_rax_to_[rbx] - J=d4 - I=:mov_rax_[rbx] - D=d3 - syscall x1 - return -:mov_rax_[rbx] - x48 - x8b - x03 - -:set_eax_to_[rbx] - J=d4 - I=:mov_eax_[rbx] - D=d2 - syscall x1 - return -:mov_eax_[rbx] - x8b - x03 - -:set_ax_to_[rbx] - J=d4 - I=:mov_ax_[rbx] - D=d3 - syscall x1 - return -:mov_ax_[rbx] - x66 - x8b - x03 - -:set_al_to_[rbx] - J=d4 - I=:mov_al_[rbx] - D=d2 - syscall x1 - return -:mov_al_[rbx] - x8a - x03 - - -:set_[rbx]_to_rax - J=d4 - I=:mov_[rbx]_rax - D=d3 - syscall x1 - return -:mov_[rbx]_rax - x48 - x89 - x03 - -:set_[rbx]_to_eax - J=d4 - I=:mov_[rbx]_eax - D=d2 - syscall x1 - return -:mov_[rbx]_eax - x89 - x03 - -:set_[rbx]_to_ax - J=d4 - I=:mov_[rbx]_ax - D=d3 - syscall x1 - return -:mov_[rbx]_ax - x66 - x89 - x03 - -:set_[rbx]_to_al - J=d4 - I=:mov_[rbx]_al - D=d2 - syscall x1 - return -:mov_[rbx]_al - x88 - x03 - - -:mov_rax_imm64_prefix - x48 - xb8 - -:emit_add_rax_rbx - J=d4 - I=:add_rax_rbx - D=d3 - syscall x1 - return -:add_rax_rbx - x48 - x01 - xd8 - -:emit_sub_rax_rbx - J=d4 - I=:sub_rax_rbx - D=d3 - syscall x1 - return -:sub_rax_rbx - x48 - x29 - xd8 - -:emit_and_rax_rbx - J=d4 - I=:and_rax_rbx - D=d3 - syscall x1 - return -:and_rax_rbx - x48 - x21 - xd8 - -:emit_or_rax_rbx - J=d4 - I=:or_rax_rbx - D=d3 - syscall x1 - return -:or_rax_rbx - x48 - x09 - xd8 - -:emit_xor_rax_rbx - J=d4 - I=:xor_rax_rbx - D=d3 - syscall x1 - return -:xor_rax_rbx - x48 - x31 - xd8 - -:emit_shl_rax_cl - J=d4 - I=:shl_rax_cl - D=d3 - syscall x1 - return -:shl_rax_cl - x48 - xd3 - xe0 - -:emit_shr_rax_cl - J=d4 - I=:shr_rax_cl - D=d3 - syscall x1 - return -:shr_rax_cl - x48 - xd3 - xe8 - -:emit_imul_rbx - J=d4 - I=:imul_rbx - D=d3 - syscall x1 - return -:imul_rbx - x48 - xf7 - xeb - -:emit_zero_rdx_idiv_rbx - call :zero_rdx - J=d4 - I=:idiv_rbx - D=d3 - syscall x1 - return -:idiv_rbx - x48 - xf7 - xfb - -align -:imm64 - reserve d8 - -; prefix for lea rax, [rbp+IMM32] -:lea_rax_rbp_offset_prefix - x48 - x8d - x85 - -:input_filename - str in04b - x0 - -:output_filename - str out04b - x0 - -:input_file_error - B=:input_file_error_message - !:general_error - -:input_file_error_message - str Couldn't open input file. - xa - x0 - -:output_file_error - B=:output_file_error_message - !:general_error - -:output_file_error_message - str Couldn't open output file. - xa - x0 - -:bad_identifier - B=:bad_identifier_error_message - !:program_error - -:bad_identifier_error_message - str Bad identifier. - xa - x0 - -:bad_label - B=:bad_label_error_message - !:program_error - -:bad_label_error_message - str Bad label. - xa - x0 - -:bad_variable - B=:bad_variable_error_message - !:program_error - -:bad_variable_error_message - str No such variable. - xa - x0 - -:bad_function - B=:bad_function_error_message - !:program_error - -:bad_function_error_message - str No such function. - xa - x0 - -:bad_byte - B=:bad_byte_error_message - !:program_error - -:bad_byte_error_message - str Byte not in range 0-255. - xa - x0 - -:bad_number - B=:bad_number_error_message - !:program_error - -:bad_number_error_message - str Bad number. - xa - x0 - -:bad_assignment - B=:bad_assignment_error_message - !:program_error - -:bad_assignment_error_message - str Bad assignment. - xa - x0 - -:bad_term - B=:bad_term_error_message - !:program_error - -:bad_term_error_message - str Bad term. - xa - x0 - -:bad_statement - B=:bad_statement_error_message - !:program_error - -:bad_statement_error_message - str Bad statement. - xa - x0 - -:bad_jump - B=:bad_jump_error_message - !:program_error - -:bad_jump_error_message - str Bad jump. - xa - x0 - -:bad_call - B=:bad_call_error_message - !:program_error - -:bad_call_error_message - str Bad function call. - xa - x0 - -:label_redefinition - B=:label_redefinition_error_message - !:program_error - -:label_redefinition_error_message - str Label redefinition. - xa - x0 - -:global_redeclaration - B=:global_redeclaration_error_message - !:program_error - -:global_redeclaration_error_message - str Global variable declared twice. - xa - x0 - -:local_redeclaration - B=:local_redeclaration_error_message - !:program_error - -:local_redeclaration_error_message - str Local variable declared twice. - xa - x0 - -:general_error - call :eputs - J=d1 - syscall x3c - -:program_error - R=B - - B=:"Line" - call :eputs - - D=:line_number - D=8D - B=D - call :eputn - - B=:line_number_separator - call :eputs - - B=R - call :eputs - J=d1 - syscall x3c - -:"Line" - str Line - x20 - x0 - -:line_number_separator - str : - x20 - x0 - -:strlen - I=B - D=B - :strlen_loop - C=1I - ?C=0:strlen_ret - I+=d1 - !:strlen_loop - :strlen_ret - I-=D - A=I - return - -; check if strings in rdi and rsi are equal, up to terminator in rcx -:string= - D=1I - A=1J - ?D!A:return_0 - ?D=C:return_1 - I+=d1 - J+=d1 - !:string= - -; check if strings in rdi and rsi are equal, up to the first non-identifier character -:ident= - D=1I - B=D - call :isident - ; I ended - ?A=0:ident=_I_end - - D=1J - B=D - call :isident - ; J ended, but I didn't - ?A=0:return_0 - - ; we haven't reached the end of either - D=1I - A=1J - ?D!A:return_0 - I+=d1 - J+=d1 - !:ident= -:ident=_I_end - D=1J - B=D - call :isident - ; check if J also ended - ?A=0:return_1 - ; J didn't end - !:return_0 - -:return_0 - A=d0 - return -:return_1 - A=d1 - return -:return_2 - A=d2 - return -:return_3 - A=d3 - return -:return_4 - A=d4 - return -:return_5 - A=d5 - return -:return_6 - A=d6 - return -:return_7 - A=d7 - return -:return_8 - A=d8 - return -:return_J - A=J - return - -; write the character in rbx to the file in rdi. -:fputc - C=B - I=S - I-=d1 - 1I=C - D=d1 - syscall x1 - return - -; write the string in rbx to stderr -:eputs - J=B - call :strlen - D=A - I=J - J=d2 - syscall x1 - return - -; write rbx in decimal to stderr -:eputn - I=B - J=S - J-=d1 - :eputn_loop - D=d0 - ; divide by 10 - B=d10 - A=I - div - ; quotient is new number - I=A - ; add remainder to string - D+='0 - 1J=D - J-=d1 - ?I!0:eputn_loop - J+=d1 - D=S - D-=J - I=J - J=d2 - syscall x1 - return - -; copy rdx bytes from rsi to rdi. -; this copies from the left: if you're doing an overlapped copy, rsi should be greater than rdi -:memcpy - ?D=0:return_0 - A=1I - 1J=A - I+=d1 - J+=d1 - D-=d1 - !:memcpy - -; copy from rdi to rsi, until byte cl is reached -:memccpy - D=1I - 1J=D - I+=d1 - J+=d1 - ?D!C:memccpy - return - -; advance rsi to the next space or newline character -:go_to_space - C=1I - D=xa - ?C=D:return_0 - D=x20 - ?C=D:return_0 - I+=d1 - !:go_to_space - -:"global" - str global - x20 -:"argument" - str argument - x20 -:"local" - str local - x20 -:"return" - str return - x20 -:"return\n" - str return - xa -:"byte" - str byte - x20 -:"string" - str string - x20 -:"goto" - str goto - x20 -:"if" - str if - x20 -:"function" - str function - x20 -:"==" - str == - x20 -:"!=" - str != - x20 -:">" - str > - x20 -:"<" - str < - x20 -:"<=" - str <= - x20 -:">=" - str >= - x20 -:"[" - str [ - x20 -:"]" - str ] - x20 -:"[=" - str [= - x20 -:"]=" - str ]= - x20 - -:zero - x0 - -; put a 0 byte before the line (this is important for removing whitespace at the end of the line, -; specifically, we don't want this to be a space character) -x0 -:line - reserve d1000 - -align -:global_variables_end - reserve d8 -:static_memory_end - reserve d8 -:local_variables_end - reserve d8 -:stack_end - reserve d8 -:labels_end - reserve d8 -:line_number - reserve d8 -:global_variables - reserve d50000 -:local_variables - reserve d20000 -:labels - reserve d200000 -:second_pass - reserve d1 - -:ELF_header -x7f -x45 -x4c -x46 -x02 -x01 -x01 - -reserve d9 - -x02 -x00 - -x3e -x00 - -x01 -x00 -x00 -x00 - -x78 -x00 -x40 -x00 -x00 -x00 -x00 -x00 - -x40 -x00 -x00 -x00 -x00 -x00 -x00 -x00 - -reserve d12 - -x40 -x00 -x38 -x00 -x01 -x00 -x00 -x00 -x00 -x00 -x00 -x00 - -x01 -x00 -x00 -x00 - -x07 -x00 -x00 -x00 - -x78 -x00 -x00 -x00 -x00 -x00 -x00 -x00 - -x78 -x00 -x40 -x00 -x00 -x00 -x00 -x00 - -reserve d8 - -x00 -x00 -x20 -x00 -x00 -x00 -x00 -x00 - -x00 -x00 -x20 -x00 -x00 -x00 -x00 -x00 - -x00 -x10 -x00 -x00 -x00 -x00 -x00 -x00 - -; NOTE: we shouldn't end the file with a reserve; we don't handle that properly diff --git a/04b/in04b b/04b/in04b deleted file mode 100644 index 2b85900..0000000 --- a/04b/in04b +++ /dev/null @@ -1,133 +0,0 @@ -main() - -function main - puts(.str_hello_world) - putc(10) ; newline - syscall(0x3c, 0) - -:str_hello_world - string Hello, world! - byte 0 - -function strlen - argument s - local c - local p - p = s - :strlen_loop - c = *1p - if c == 0 goto strlen_loop_end - p += 1 - goto strlen_loop - :strlen_loop_end - return p - s - -function putc - argument c - local p - p = &c - syscall(1, 1, p, 1) - return - -function puts - argument s - local len - len = strlen(s) - syscall(1, 1, s, len) - return - -function syscall - ; I've done some testing, and this should be okay even if - ; rbp-56 goes beyond the end of the stack. - ; mov rax, [rbp-16] - byte 0x48 - byte 0x8b - byte 0x85 - byte 0xf0 - byte 0xff - byte 0xff - byte 0xff - ; mov rdi, rax - byte 0x48 - byte 0x89 - byte 0xc7 - - ; mov rax, [rbp-24] - byte 0x48 - byte 0x8b - byte 0x85 - byte 0xe8 - byte 0xff - byte 0xff - byte 0xff - ; mov rsi, rax - byte 0x48 - byte 0x89 - byte 0xc6 - - ; mov rax, [rbp-32] - byte 0x48 - byte 0x8b - byte 0x85 - byte 0xe0 - byte 0xff - byte 0xff - byte 0xff - ; mov rdx, rax - byte 0x48 - byte 0x89 - byte 0xc2 - - ; mov rax, [rbp-40] - byte 0x48 - byte 0x8b - byte 0x85 - byte 0xd8 - byte 0xff - byte 0xff - byte 0xff - ; mov r10, rax - byte 0x49 - byte 0x89 - byte 0xc2 - - ; mov rax, [rbp-48] - byte 0x48 - byte 0x8b - byte 0x85 - byte 0xd0 - byte 0xff - byte 0xff - byte 0xff - ; mov r8, rax - byte 0x49 - byte 0x89 - byte 0xc0 - - ; mov rax, [rbp-56] - byte 0x48 - byte 0x8b - byte 0x85 - byte 0xc8 - byte 0xff - byte 0xff - byte 0xff - ; mov r9, rax - byte 0x49 - byte 0x89 - byte 0xc1 - - ; mov rax, [rbp-8] - byte 0x48 - byte 0x8b - byte 0x85 - byte 0xf8 - byte 0xff - byte 0xff - byte 0xff - - ; syscall - byte 0x0f - byte 0x05 - - return |