summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorpommicket <pommicket@gmail.com>2022-01-07 20:30:29 -0500
committerpommicket <pommicket@gmail.com>2022-01-07 20:31:53 -0500
commitf82581ed761680ad607f34a2506bca24a2f5acde (patch)
treee2e2a54d15c9e832b2aa0d6120a421c9c69f69d0
parente52793324a9f693ec8b5d218d99b7d2577f3f614 (diff)
instruction table, remove old instructions
-rw-r--r--01/commands.txt2
-rw-r--r--01/in004
-rw-r--r--02/in014
-rw-r--r--README.md100
-rw-r--r--instructions.txt136
5 files changed, 90 insertions, 156 deletions
diff --git a/01/commands.txt b/01/commands.txt
index 9cfdb1e..8856802 100644
--- a/01/commands.txt
+++ b/01/commands.txt
@@ -33,8 +33,6 @@ sw - mov word [rbx], ax
lw - mov ax, word [rbx]
sb - mov byte [rbx], al
lb - mov al, byte [rbx]
-Sq - mov qword [rsp], rax
-Lq - mov rax, qword [rsp]
nA - neg rax
+B - add rax, rbx
diff --git a/01/in00 b/01/in00
index d3d0582..7fb012e 100644
--- a/01/in00
+++ b/01/in00
@@ -10110,7 +10110,7 @@ cc cc cc cc cc cc cc cc
00 00 00 00 00 00 00 00
00 00 00 00 00 00 00 00
00 00 00 00 00 00 00 00
-04 48 8b 04 24 00 00 00
+00 00 00 00 00 00 00 00
00 00 00 00 00 00 00 00
00 00 00 00 00 00 00 00
00 00 00 00 00 00 00 00
@@ -11006,7 +11006,7 @@ cc cc cc cc cc cc cc cc
00 00 00 00 00 00 00 00
00 00 00 00 00 00 00 00
00 00 00 00 00 00 00 00
-04 48 89 04 24 00 00 00
+00 00 00 00 00 00 00 00
00 00 00 00 00 00 00 00
00 00 00 00 00 00 00 00
00 00 00 00 00 00 00 00
diff --git a/02/in01 b/02/in01
index b008079..58fcc28 100644
--- a/02/in01
+++ b/02/in01
@@ -10328,7 +10328,7 @@ the formatting changed appropriately.
;00;00;00;00;00;00;00;00
;00;00;00;00;00;00;00;00
;00;00;00;00;00;00;00;00
-;04;48;8b;04;24;00;00;00
+;00;00;00;00;00;00;00;00
;00;00;00;00;00;00;00;00
;00;00;00;00;00;00;00;00
;00;00;00;00;00;00;00;00
@@ -11224,7 +11224,7 @@ the formatting changed appropriately.
;00;00;00;00;00;00;00;00
;00;00;00;00;00;00;00;00
;00;00;00;00;00;00;00;00
-;04;48;89;04;24;00;00;00
+;00;00;00;00;00;00;00;00
;00;00;00;00;00;00;00;00
;00;00;00;00;00;00;00;00
;00;00;00;00;00;00;00;00
diff --git a/README.md b/README.md
index 893fd36..dfd6eb4 100644
--- a/README.md
+++ b/README.md
@@ -3,7 +3,7 @@
Compilers nowadays are written in languages like C, which themselves need to be
compiled. But then, you need a C compiler to compile your C compiler! Of course,
the very first C compiler was not written in C (because how would it be
-compiled?). Instead, it was built up over time, starting from a very basic
+compiled?). Instead, it was built up over time, starting from a basic
assembler, eventually reaching a full-scale compiler.
In this repository, we'll explore how that's done. Each directory
represents a new "stage" in the process. The first one, `00`, is a hand-written
@@ -37,9 +37,9 @@ want to know before starting.
You don't need to understand everything about each of these, just get
a general idea:
+- the basics of programming
- what a system call is
- what memory is
-- what a programming language is
- what a compiler is
- what an executable file is
- number bases -- if a number is preceded by 0x, 0o, or 0b in this series, that
@@ -55,19 +55,9 @@ decimal.
- ASCII, null-terminated strings
- how pointers work
- how floating-point numbers work
-- some basic Intel-style x86-64 assembly
-It will help you a lot to know how to program (with any programming language),
-but it's not strictly necessary.
-
-## instruction set
-
-x86-64 has a *gigantic* instruction set. The manual for it is over 2,000 pages
-long! So it makes sense to select only a small subset of it to use.
-The set I've chosen can be found in `instructions.txt`.
-I think it achieves a pretty good balance between having few enough
-instructions to be manageable and having enough instructions to be useable.
-To be clear, you don't need to read that file to understand the series.
+If you aren't familiar with x86-64 assembly, be sure to check out the instruction list
+below.
## principles
@@ -103,6 +93,88 @@ an operating system to a USB key with a circuit or something,
assuming you trust your CPU...
I'll leave that to someone else.
+## instruction set
+
+x86-64 has a *gigantic* instruction set. The manual for it is over 2,000 pages
+long! So it makes sense to select only a small subset of it to use.
+
+Here are all the instructions we'll be using. If you're not familiar with
+x86-64 assembly, you might want to look over these (but you don't need to understand everything).
+
+In the table below, `IMM64` means a 64-bit *immediate* (a constant number).
+`rdx:rax` refers to the 128-bit number you get by combining `rdx` and `rax`.
+
+```
+┌──────────────────────┬───────────────────┬────────────────────────────────────────┐
+│ Instruction │ Encoding │ Description │
+├──────────────────────┼───────────────────┼────────────────────────────────────────┤
+│ mov rax, IMM64 │ 48 b8 IMM64 │ set rax to the 64-bit value IMM64 │
+│ xor eax, eax │ 31 c0 │ set rax to 0 (shorter than mov rax, 0) │
+│ xor edx, edx │ 31 d2 │ set rdx to 0 │
+│ mov RDEST, RSRC │ 48 89 (DEST|SRC<<3|0xc0) │ set register DEST to current │
+│ │ │ value of register SRC │
+│ mov r8, rax │ 49 89 c0 │ set r8 to rax (only used for syscalls) │
+│ mov r9, rax │ 49 89 c1 │ set r9 to rax (only used for syscalls) │
+│ mov r10, rax │ 49 89 c2 │ set r10 to rax (only used for syscalls)│
+│ xchg rax, rbx │ 48 93 │ exchange the values of rax and rbx │
+│ mov [rbx], rax │ 48 89 03 │ store rax as 8 bytes at address rbx │
+│ mov rax, [rbx] │ 48 8b 03 │ load 8 bytes from address rbx into rax │
+│ mov [rbx], eax │ 89 03 │ store eax as 4 bytes at address rbx │
+│ mov eax, [rbx] │ 8b 03 │ load 4 bytes from address rbx into eax │
+│ mov [rbx], ax │ 66 89 03 │ store ax as 2 bytes at address rbx │
+│ mov ax, [rbx] │ 66 8b 03 │ load 2 bytes from address rbx into eax │
+│ mov [rbx], al │ 88 03 │ store al as 1 byte at address rbx │
+│ mov al, [rbx] │ 8a 03 │ load 1 byte from addrress rbx into al │
+│ mov rax, [rbp+IMM32] │ 48 8b 85 IMM32 │ load 8 bytes from address rbp+IMM32 │
+│ │ │ into rax (note: IMM32 may be negative) │
+│ lea rax, [rbp+IMM32] │ 48 8d 85 IMM32 │ set rax to rbp+IMM32 │
+│ lea rsp, [rbp+IMM32] │ 48 8d a5 IMM32 │ set rsp to rbp+IMM32 │
+│ mov [rbp+IMM32], rax │ 48 89 85 IMM32 │ store rax in 8 bytes at rbp+IMM32 │
+│ mov [rsp+IMM32], rax │ 48 89 84 24 IMM32 │ store rax in 8 bytes at rsp+IMM32 │
+│ mov [rsp], rbp │ 48 89 2c 24 │ store rbp in 8 bytes at rsp │
+│ mov rbp, [rsp] │ 48 8b 2c 24 │ load 8 bytes from rsp into rbp │
+│ neg rax │ 48 f7 d8 │ set rax to -rax │
+│ add rax, rbx │ 48 01 d8 │ add rbx to rax │
+│ sub rax, rbx │ 48 29 d8 │ subtract rbx from rax │
+│ imul rbx │ 48 f7 eb │ set rdx:rax to rax * rbx (signed) │
+│ idiv rbx │ 48 f7 fb │ divide rdx:rax by rbx (signed); put │
+│ │ │ quotient in rax, remainder in rbx │
+│ mul rbx │ 48 f7 e3 │ like imul, but unsigned │
+│ div rbx │ 48 f7 f3 │ like idiv, but with unsigned division │
+│ not rax │ 48 f7 d0 │ set rax to ~rax (bitwise not) │
+│ and rax, rbx │ 48 21 d8 │ set rax to rax & rbx (bitwise and) │
+│ or rax, rbx │ 48 09 d8 │ set rax to rax | rbx (bitwise or) │
+│ xor rax, rbx │ 48 31 d8 │ set rax to rax ^ rbx (bitwise xor) │
+│ shl rax, cl │ 48 d3 e0 │ set rax to rax << cl (left shift) │
+│ shl rax, IMM8 │ 48 c1 e0 IMM8 │ set rax to rax << IMM8 │
+│ shr rax, cl │ 48 d3 e8 │ set rax to rax >> cl (zero-extend) │
+│ shr rax, IMM8 │ 48 c1 e8 IMM8 │ set rax to rax >> IMM8 (zero-extend) │
+│ sar rax, cl │ 48 d3 f8 │ set rax to rax >> cl (sign-extend) │
+│ sar rax, IMM8 │ 48 c1 f8 IMM8 │ set rax to rax >> IMM8 (sign-extend) │
+│ sub rsp, IMM32 │ 48 81 ec IMM32 │ subtract IMM32 from rsp │
+│ add rsp, IMM32 │ 48 81 c4 IMM32 │ add IMM32 to rsp │
+│ cmp rax, rbx │ 48 39 d8 │ compare rax with rbx (see je, jl, etc.)│
+│ test rax, rax │ 48 85 c0 │ equivalent to cmp rax, 0 │
+│ jmp IMM32 │ e9 IMM32 │ jump to offset IMM32 from here │
+│ je IMM32 │ 0f 84 IMM32 │ jump to IMM32 if equal │
+│ jne IMM32 │ 0f 85 IMM32 │ jump if not equal │
+│ jl IMM32 │ 0f 8c IMM32 │ jump if less than │
+│ jg IMM32 │ 0f 8f IMM32 │ jump if greater than │
+│ jle IMM32 │ 0f 8e IMM32 │ jump if less than or equal to │
+│ jge IMM32 │ 0f 8d IMM32 │ jump if greater than or equal to │
+│ jb IMM32 │ 0f 82 IMM32 │ jump if "below" (like jl but unsigned) │
+│ ja IMM32 │ 0f 87 IMM32 │ jump if "above" (like jg but unsigned) │
+│ jbe IMM32 │ 0f 86 IMM32 │ jump if below or equal to │
+│ jae IMM32 │ 0f 83 IMM32 │ jump if above or equal to │
+│ call rax │ ff d0 │ call the function at address rax │
+│ ret │ c3 │ return from function │
+│ syscall │ 0f 05 │ execute a system call │
+│ nop │ 90 │ do nothing │
+└──────────────────────┴───────────────────┴────────────────────────────────────────┘
+```
+
+More will be added in the future as needed.
+
## license
```
diff --git a/instructions.txt b/instructions.txt
deleted file mode 100644
index 4758835..0000000
--- a/instructions.txt
+++ /dev/null
@@ -1,136 +0,0 @@
-Linux syscall calling convention:
-rax - syscall number
-rdi, rsi, rdx, r10, r8, r9 - arguments
-return value placed in rax
-
-Instruction set:
-
-mov rax, imm64
->48 b8 IMM64
-xor eax, eax (sets rax to 0, much shorter than mov rax, 0)
->31 c0
-xor edx, edx
->31 d2
-mov rdest, rsrc
-ax bx cx dx sp bp si di
-0 3 1 2 4 5 6 7
->48 89 (dest | src << 3 | 0xc0)
-mov r8, rax (for syscalls)
->49 89 c0
-mov r9, rax (for syscalls)
->49 89 c1
-mov r10, rax (for syscalls)
->49 89 c2
-xchg rax, rbx
->48 93
-mov qword [rbx], rax
->48 89 03
-mov rax, qword [rbx]
->48 8b 03
-mov dword [rbx], eax
->89 03
-mov eax, dword [rbx]
->8b 03
-mov word [rbx], ax
->66 89 03
-mov ax, word [rbx]
->66 8b 03
-mov byte [rbx], al
->88 03
-mov al, byte [rbx]
->8a 03
-mov rax, qword [rbp+imm32]
->48 8b 85 IMM32 (note: imm may be negative)
-lea rax, [rbp+imm32]
->48 8d 85 IMM32 (note: imm may be negative)
-lea rsp, [rbp+imm32]
->48 8d a5 IMM32 (note: imm may be negative)
-mov qword [rbp+imm32], rax
->48 89 85 IMM32 (note: imm may be negative)
-mov qword [rsp+imm32], rax
->48 89 84 24 IMM32 (note: imm may be negative)
-mov qword [rsp], rbp
->48 89 2c 24
-mov rbp, qword [rsp]
->48 8b 2c 24
-mov ebx, imm32
->bb IMM32
-neg rax
->48 f7 d8
-add rax, rbx
->48 01 d8
-sub rax, rbx
->48 29 d8
-imul rbx
->48 f7 eb
-idiv rbx
->48 f7 fb
-mul rbx
->48 f7 e3
-div rbx
->48 f7 f3
-not rax
->48 f7 d0
-and rax, rbx
->48 21 d8
-or rax, rbx
->48 09 d8
-xor rax, rbx
->48 31 d8
-shl rax, cl
->48 d3 e0
-shl rax, imm8
->48 c1 e0 IMM8
-shr rax, cl
->48 d3 e8
-shr rax, imm8
->48 c1 e8 IMM8
-sar rax, cl
->48 d3 f8
-sar rax, imm8
->48 c1 f8 IMM8
-sub rsp, imm32
->48 81 ec IMM32
-add rsp, imm32
->48 81 c4 IMM32
-cmp rax, rbx
->48 39 d8
-test rax, rax
->48 85 c0
-jmp rel32
->e9 REL32
-je rel32
->0f 84 REL32
-jne rel32
->0f 85 REL32
-jl rel32
->0f 8c REL32
-jg rel32
->0f 8f REL32
-jle rel32
->0f 8e REL32
-jge rel32
->0f 8d REL32
-jb rel32
->0f 82 REL32
-ja rel32
->0f 87 REL32
-jbe rel32
->0f 86 REL32
-jae rel32
->0f 83 REL32
-call rax
->ff d0
-ret
->c3
-syscall
->0f 05
-nop
->90
-(more will be added as needed)
-
-to be removed:
-mov qword [rsp], rax
->48 89 04 24
-mov rax, qword [rsp]
->48 8b 04 24