diff options
author | pommicket <pommicket@gmail.com> | 2022-01-07 20:30:29 -0500 |
---|---|---|
committer | pommicket <pommicket@gmail.com> | 2022-01-07 20:31:53 -0500 |
commit | f82581ed761680ad607f34a2506bca24a2f5acde (patch) | |
tree | e2e2a54d15c9e832b2aa0d6120a421c9c69f69d0 | |
parent | e52793324a9f693ec8b5d218d99b7d2577f3f614 (diff) |
instruction table, remove old instructions
-rw-r--r-- | 01/commands.txt | 2 | ||||
-rw-r--r-- | 01/in00 | 4 | ||||
-rw-r--r-- | 02/in01 | 4 | ||||
-rw-r--r-- | README.md | 100 | ||||
-rw-r--r-- | instructions.txt | 136 |
5 files changed, 90 insertions, 156 deletions
diff --git a/01/commands.txt b/01/commands.txt index 9cfdb1e..8856802 100644 --- a/01/commands.txt +++ b/01/commands.txt @@ -33,8 +33,6 @@ sw - mov word [rbx], ax lw - mov ax, word [rbx] sb - mov byte [rbx], al lb - mov al, byte [rbx] -Sq - mov qword [rsp], rax -Lq - mov rax, qword [rsp] nA - neg rax +B - add rax, rbx @@ -10110,7 +10110,7 @@ cc cc cc cc cc cc cc cc 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 -04 48 8b 04 24 00 00 00 +00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 @@ -11006,7 +11006,7 @@ cc cc cc cc cc cc cc cc 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 -04 48 89 04 24 00 00 00 +00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 @@ -10328,7 +10328,7 @@ the formatting changed appropriately. ;00;00;00;00;00;00;00;00 ;00;00;00;00;00;00;00;00 ;00;00;00;00;00;00;00;00 -;04;48;8b;04;24;00;00;00 +;00;00;00;00;00;00;00;00 ;00;00;00;00;00;00;00;00 ;00;00;00;00;00;00;00;00 ;00;00;00;00;00;00;00;00 @@ -11224,7 +11224,7 @@ the formatting changed appropriately. ;00;00;00;00;00;00;00;00 ;00;00;00;00;00;00;00;00 ;00;00;00;00;00;00;00;00 -;04;48;89;04;24;00;00;00 +;00;00;00;00;00;00;00;00 ;00;00;00;00;00;00;00;00 ;00;00;00;00;00;00;00;00 ;00;00;00;00;00;00;00;00 @@ -3,7 +3,7 @@ Compilers nowadays are written in languages like C, which themselves need to be compiled. But then, you need a C compiler to compile your C compiler! Of course, the very first C compiler was not written in C (because how would it be -compiled?). Instead, it was built up over time, starting from a very basic +compiled?). Instead, it was built up over time, starting from a basic assembler, eventually reaching a full-scale compiler. In this repository, we'll explore how that's done. Each directory represents a new "stage" in the process. The first one, `00`, is a hand-written @@ -37,9 +37,9 @@ want to know before starting. You don't need to understand everything about each of these, just get a general idea: +- the basics of programming - what a system call is - what memory is -- what a programming language is - what a compiler is - what an executable file is - number bases -- if a number is preceded by 0x, 0o, or 0b in this series, that @@ -55,19 +55,9 @@ decimal. - ASCII, null-terminated strings - how pointers work - how floating-point numbers work -- some basic Intel-style x86-64 assembly -It will help you a lot to know how to program (with any programming language), -but it's not strictly necessary. - -## instruction set - -x86-64 has a *gigantic* instruction set. The manual for it is over 2,000 pages -long! So it makes sense to select only a small subset of it to use. -The set I've chosen can be found in `instructions.txt`. -I think it achieves a pretty good balance between having few enough -instructions to be manageable and having enough instructions to be useable. -To be clear, you don't need to read that file to understand the series. +If you aren't familiar with x86-64 assembly, be sure to check out the instruction list +below. ## principles @@ -103,6 +93,88 @@ an operating system to a USB key with a circuit or something, assuming you trust your CPU... I'll leave that to someone else. +## instruction set + +x86-64 has a *gigantic* instruction set. The manual for it is over 2,000 pages +long! So it makes sense to select only a small subset of it to use. + +Here are all the instructions we'll be using. If you're not familiar with +x86-64 assembly, you might want to look over these (but you don't need to understand everything). + +In the table below, `IMM64` means a 64-bit *immediate* (a constant number). +`rdx:rax` refers to the 128-bit number you get by combining `rdx` and `rax`. + +``` +┌──────────────────────┬───────────────────┬────────────────────────────────────────┐ +│ Instruction │ Encoding │ Description │ +├──────────────────────┼───────────────────┼────────────────────────────────────────┤ +│ mov rax, IMM64 │ 48 b8 IMM64 │ set rax to the 64-bit value IMM64 │ +│ xor eax, eax │ 31 c0 │ set rax to 0 (shorter than mov rax, 0) │ +│ xor edx, edx │ 31 d2 │ set rdx to 0 │ +│ mov RDEST, RSRC │ 48 89 (DEST|SRC<<3|0xc0) │ set register DEST to current │ +│ │ │ value of register SRC │ +│ mov r8, rax │ 49 89 c0 │ set r8 to rax (only used for syscalls) │ +│ mov r9, rax │ 49 89 c1 │ set r9 to rax (only used for syscalls) │ +│ mov r10, rax │ 49 89 c2 │ set r10 to rax (only used for syscalls)│ +│ xchg rax, rbx │ 48 93 │ exchange the values of rax and rbx │ +│ mov [rbx], rax │ 48 89 03 │ store rax as 8 bytes at address rbx │ +│ mov rax, [rbx] │ 48 8b 03 │ load 8 bytes from address rbx into rax │ +│ mov [rbx], eax │ 89 03 │ store eax as 4 bytes at address rbx │ +│ mov eax, [rbx] │ 8b 03 │ load 4 bytes from address rbx into eax │ +│ mov [rbx], ax │ 66 89 03 │ store ax as 2 bytes at address rbx │ +│ mov ax, [rbx] │ 66 8b 03 │ load 2 bytes from address rbx into eax │ +│ mov [rbx], al │ 88 03 │ store al as 1 byte at address rbx │ +│ mov al, [rbx] │ 8a 03 │ load 1 byte from addrress rbx into al │ +│ mov rax, [rbp+IMM32] │ 48 8b 85 IMM32 │ load 8 bytes from address rbp+IMM32 │ +│ │ │ into rax (note: IMM32 may be negative) │ +│ lea rax, [rbp+IMM32] │ 48 8d 85 IMM32 │ set rax to rbp+IMM32 │ +│ lea rsp, [rbp+IMM32] │ 48 8d a5 IMM32 │ set rsp to rbp+IMM32 │ +│ mov [rbp+IMM32], rax │ 48 89 85 IMM32 │ store rax in 8 bytes at rbp+IMM32 │ +│ mov [rsp+IMM32], rax │ 48 89 84 24 IMM32 │ store rax in 8 bytes at rsp+IMM32 │ +│ mov [rsp], rbp │ 48 89 2c 24 │ store rbp in 8 bytes at rsp │ +│ mov rbp, [rsp] │ 48 8b 2c 24 │ load 8 bytes from rsp into rbp │ +│ neg rax │ 48 f7 d8 │ set rax to -rax │ +│ add rax, rbx │ 48 01 d8 │ add rbx to rax │ +│ sub rax, rbx │ 48 29 d8 │ subtract rbx from rax │ +│ imul rbx │ 48 f7 eb │ set rdx:rax to rax * rbx (signed) │ +│ idiv rbx │ 48 f7 fb │ divide rdx:rax by rbx (signed); put │ +│ │ │ quotient in rax, remainder in rbx │ +│ mul rbx │ 48 f7 e3 │ like imul, but unsigned │ +│ div rbx │ 48 f7 f3 │ like idiv, but with unsigned division │ +│ not rax │ 48 f7 d0 │ set rax to ~rax (bitwise not) │ +│ and rax, rbx │ 48 21 d8 │ set rax to rax & rbx (bitwise and) │ +│ or rax, rbx │ 48 09 d8 │ set rax to rax | rbx (bitwise or) │ +│ xor rax, rbx │ 48 31 d8 │ set rax to rax ^ rbx (bitwise xor) │ +│ shl rax, cl │ 48 d3 e0 │ set rax to rax << cl (left shift) │ +│ shl rax, IMM8 │ 48 c1 e0 IMM8 │ set rax to rax << IMM8 │ +│ shr rax, cl │ 48 d3 e8 │ set rax to rax >> cl (zero-extend) │ +│ shr rax, IMM8 │ 48 c1 e8 IMM8 │ set rax to rax >> IMM8 (zero-extend) │ +│ sar rax, cl │ 48 d3 f8 │ set rax to rax >> cl (sign-extend) │ +│ sar rax, IMM8 │ 48 c1 f8 IMM8 │ set rax to rax >> IMM8 (sign-extend) │ +│ sub rsp, IMM32 │ 48 81 ec IMM32 │ subtract IMM32 from rsp │ +│ add rsp, IMM32 │ 48 81 c4 IMM32 │ add IMM32 to rsp │ +│ cmp rax, rbx │ 48 39 d8 │ compare rax with rbx (see je, jl, etc.)│ +│ test rax, rax │ 48 85 c0 │ equivalent to cmp rax, 0 │ +│ jmp IMM32 │ e9 IMM32 │ jump to offset IMM32 from here │ +│ je IMM32 │ 0f 84 IMM32 │ jump to IMM32 if equal │ +│ jne IMM32 │ 0f 85 IMM32 │ jump if not equal │ +│ jl IMM32 │ 0f 8c IMM32 │ jump if less than │ +│ jg IMM32 │ 0f 8f IMM32 │ jump if greater than │ +│ jle IMM32 │ 0f 8e IMM32 │ jump if less than or equal to │ +│ jge IMM32 │ 0f 8d IMM32 │ jump if greater than or equal to │ +│ jb IMM32 │ 0f 82 IMM32 │ jump if "below" (like jl but unsigned) │ +│ ja IMM32 │ 0f 87 IMM32 │ jump if "above" (like jg but unsigned) │ +│ jbe IMM32 │ 0f 86 IMM32 │ jump if below or equal to │ +│ jae IMM32 │ 0f 83 IMM32 │ jump if above or equal to │ +│ call rax │ ff d0 │ call the function at address rax │ +│ ret │ c3 │ return from function │ +│ syscall │ 0f 05 │ execute a system call │ +│ nop │ 90 │ do nothing │ +└──────────────────────┴───────────────────┴────────────────────────────────────────┘ +``` + +More will be added in the future as needed. + ## license ``` diff --git a/instructions.txt b/instructions.txt deleted file mode 100644 index 4758835..0000000 --- a/instructions.txt +++ /dev/null @@ -1,136 +0,0 @@ -Linux syscall calling convention: -rax - syscall number -rdi, rsi, rdx, r10, r8, r9 - arguments -return value placed in rax - -Instruction set: - -mov rax, imm64 ->48 b8 IMM64 -xor eax, eax (sets rax to 0, much shorter than mov rax, 0) ->31 c0 -xor edx, edx ->31 d2 -mov rdest, rsrc -ax bx cx dx sp bp si di -0 3 1 2 4 5 6 7 ->48 89 (dest | src << 3 | 0xc0) -mov r8, rax (for syscalls) ->49 89 c0 -mov r9, rax (for syscalls) ->49 89 c1 -mov r10, rax (for syscalls) ->49 89 c2 -xchg rax, rbx ->48 93 -mov qword [rbx], rax ->48 89 03 -mov rax, qword [rbx] ->48 8b 03 -mov dword [rbx], eax ->89 03 -mov eax, dword [rbx] ->8b 03 -mov word [rbx], ax ->66 89 03 -mov ax, word [rbx] ->66 8b 03 -mov byte [rbx], al ->88 03 -mov al, byte [rbx] ->8a 03 -mov rax, qword [rbp+imm32] ->48 8b 85 IMM32 (note: imm may be negative) -lea rax, [rbp+imm32] ->48 8d 85 IMM32 (note: imm may be negative) -lea rsp, [rbp+imm32] ->48 8d a5 IMM32 (note: imm may be negative) -mov qword [rbp+imm32], rax ->48 89 85 IMM32 (note: imm may be negative) -mov qword [rsp+imm32], rax ->48 89 84 24 IMM32 (note: imm may be negative) -mov qword [rsp], rbp ->48 89 2c 24 -mov rbp, qword [rsp] ->48 8b 2c 24 -mov ebx, imm32 ->bb IMM32 -neg rax ->48 f7 d8 -add rax, rbx ->48 01 d8 -sub rax, rbx ->48 29 d8 -imul rbx ->48 f7 eb -idiv rbx ->48 f7 fb -mul rbx ->48 f7 e3 -div rbx ->48 f7 f3 -not rax ->48 f7 d0 -and rax, rbx ->48 21 d8 -or rax, rbx ->48 09 d8 -xor rax, rbx ->48 31 d8 -shl rax, cl ->48 d3 e0 -shl rax, imm8 ->48 c1 e0 IMM8 -shr rax, cl ->48 d3 e8 -shr rax, imm8 ->48 c1 e8 IMM8 -sar rax, cl ->48 d3 f8 -sar rax, imm8 ->48 c1 f8 IMM8 -sub rsp, imm32 ->48 81 ec IMM32 -add rsp, imm32 ->48 81 c4 IMM32 -cmp rax, rbx ->48 39 d8 -test rax, rax ->48 85 c0 -jmp rel32 ->e9 REL32 -je rel32 ->0f 84 REL32 -jne rel32 ->0f 85 REL32 -jl rel32 ->0f 8c REL32 -jg rel32 ->0f 8f REL32 -jle rel32 ->0f 8e REL32 -jge rel32 ->0f 8d REL32 -jb rel32 ->0f 82 REL32 -ja rel32 ->0f 87 REL32 -jbe rel32 ->0f 86 REL32 -jae rel32 ->0f 83 REL32 -call rax ->ff d0 -ret ->c3 -syscall ->0f 05 -nop ->90 -(more will be added as needed) - -to be removed: -mov qword [rsp], rax ->48 89 04 24 -mov rax, qword [rsp] ->48 8b 04 24 |