diff options
-rw-r--r-- | 02/README.md | 3 | ||||
-rw-r--r-- | 02/in01 | 10 | ||||
-rw-r--r-- | 03/in02 | 29 | ||||
-rw-r--r-- | 04/in03 | 2 | ||||
-rw-r--r-- | 05/README.md | 4 | ||||
-rw-r--r-- | README.md | 2 |
6 files changed, 29 insertions, 21 deletions
diff --git a/02/README.md b/02/README.md index 9ba5063..1854baa 100644 --- a/02/README.md +++ b/02/README.md @@ -157,6 +157,9 @@ Numbers cannot appear at the end of a line (this made the compiler simpler to write), so I'm adding a `.` at the end of each one to avoid making that mistake. +The code for `cmp rax, rbx` is now included in all conditional jump instructions +(I kept forgetting to put `cm` before conditional jumps when writing this compiler). + Anything after a command is treated as a comment; additionally `//` can be used for comments on their own lines. I decided to implement this as simply as possible: @@ -42,7 +42,7 @@ the segment we're loading in includes the ELF header at address 0x400000, so we ;im;01;00;00;00;00;00;00;00 write ;sy --- read command (0x400174) -- +-- read a command (0x400174) -- ;im;03;00;00;00;00;00;00;00 input file descriptor ;JA ;im;88;00;40;00;00;00;00;00 where to read to @@ -184,7 +184,7 @@ okay we now have a digit in rbx ;AR ;<I;04 ;+B -;RA store away in rbp +;RA store number away in rbp ;jm;38;ff;ff;ff continue loop unused padding @@ -316,7 +316,7 @@ convert bytes to an offset ;BA offset in rbx -look in the label table +read value from label table ;im;00;00;42;00;00;00;00;00 ;+B ;BA @@ -366,7 +366,7 @@ it's not a label or a number. let's look it up in the instruction table. ;im;d0;0a;40;00;00;00;00;00 start of instruction table ;+B ;BA -;RA store away address of command text in rbp +;RA store away pointer to command text in rbp ;zA;lb ;DA number of bytes to write (used for syscall if command exists) ;BA @@ -388,7 +388,7 @@ bad command! ;00;00;00;00;00;00;00;00;00;00;00;00;00;00;00;00 this is a real command -;BR get address of command text back in rbx +;BR get pointer to command text back in rbx ;im;01;00;00;00;00;00;00;00 add 1 because we don't want to write the length ;+B ;IA address of data to write @@ -539,7 +539,7 @@ im ##43. ascii 'C' je :-<c non-constant shift -// write shl rax, +// write shl rax, imm8 prefix im --<I IA @@ -595,7 +595,7 @@ im ##43. ascii 'C' je :->c non-constant shift -// write shr rax, +// write shr rax, imm8 prefix im -->I IA @@ -651,7 +651,7 @@ im ##43. ascii 'C' je :-]c non-constant shift -// write sar rax, +// write sar rax, imm8 prefix im --]I IA @@ -803,7 +803,7 @@ im --r# cl DA -// get index of first register +// get index of second register im ##2. BA @@ -884,7 +884,7 @@ im cl BA put number in rbx im ---im put immediate in rax +--im emit "mov rax, (immediate)" cl im --1A transfer immediate to output @@ -1000,7 +1000,7 @@ im cl put rax in output jm :-rl -// emit 'B = line[1]', i.e. deal with address of store instruction +// set rbx to register referred to by line[1], i.e. deal with the address of a store instruction ::s@ im ##1. @@ -1173,7 +1173,7 @@ lb BA // note: for this we allow numerical operands, e.g. 'C+=d1' // we don't need this for ::B2 because it's only used for load instructions -// (you don't normally need to dereference numerical literals) +// (you don't normally need to dereference numbers) im ##41. 'A' je @@ -1277,7 +1277,7 @@ BA zA jn if on second pass, :-rl ignore this (read next line) -// first get current address +// get current address im ##4. output fd JA @@ -1303,7 +1303,7 @@ im --LI IA // copy from rsi to rdi until a newline is reached -::lc label copy +::lc label copy loop BI zA lb @@ -1455,9 +1455,9 @@ im JA jm :-s= keep looping -// emit "mov rax, immediate" -- with immediate in rbx +// emit "mov rax, immediate" with immediate in rbx ::im -// first, write prefix +// first, emit the prefix im --IM IA @@ -1574,7 +1574,8 @@ jm jm // conditional jump handling ::?j -// note, we actually put the first operand in rbx and the second in rax. this is because A>0 is more familiar than 0<A +// note, we actually put the first operand in rbx and the second in rax. +// this is because A>0 is more familiar than 0<A im ##1. add 1 to line pointer to get pointer to 1st operand BA @@ -2102,7 +2103,7 @@ BAs im ##41. je -:-ca call a +:-ca call A im ##5. add 5 to line pointer to get pointer to label name BA @@ -2889,7 +2890,7 @@ jm ~~ ~~ ~~ -::L$ end of current label list +::L$ current end of label list --LB ::LB labels ~~ @@ -127,7 +127,7 @@ R=:line !:read_line_loop_end :read_line_loop_end -; remove whitespace (specifically, ' ' characters) at end of line +; remove whitespace (specifically, space characters) at end of line I=R :remove_terminal_whitespace_loop I-=d1 diff --git a/05/README.md b/05/README.md index 13af044..3112efb 100644 --- a/05/README.md +++ b/05/README.md @@ -90,7 +90,9 @@ The C89 standard (in theory) defines which C programs are legal, and exactly wha A draft of it, which is about as good as the real thing, is [available here](http://port70.net/~nsz/c/c89/c89-draft.html). Since 1989, more features have been added to C, and so more C standards have been published. -To keep things simple, our compiler only supports the features from C89 (with a few exceptions). +To keep things simple, our compiler only supports the features from C89, except +that declarations can appear after statements and `//` single-line comments are allowed +(both of these were added in C99). ## compiler high-level details @@ -149,6 +149,7 @@ ax bx cx dx sp bp si di │ mov rbp, [rsp] │ 48 8b 2c 24 │ load 8 bytes from rsp into rbp │ │ lea rax, [rbp+IMM32] │ 48 8d 85 IMM32 │ set rax to rbp+IMM32 │ │ lea rsp, [rbp+IMM32] │ 48 8d a5 IMM32 │ set rsp to rbp+IMM32 │ +| int3 | cc | raise trap signal -useful for debugging| | movsq | 48 a5 | copy 8 bytes from rsi to rdi | | rep movsb | f3 a4 | copy rcx bytes from rsi to rdi | │ push rax │ 50 │ push rax onto the stack │ @@ -220,6 +221,7 @@ SYSCALLS Arguments are passed in rdi, rsi, rdx, r10, r8, r9 The return value is placed in rax. +The values of rsp, rbp and rbx are preserved, but other registers might change. ``` ## license |