summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorpommicket <pommicket@gmail.com>2021-11-19 09:52:27 -0500
committerpommicket <pommicket@gmail.com>2021-11-19 09:52:27 -0500
commit9760d898b7f3e4b43337bac18c842d95c9f3ea6c (patch)
treed2ede7ea6fa0ad59395219322c7ba2ca4e126105
parent17cf6b6fa02db452c3b0b88b09b8884f73b0c1eb (diff)
start 04b compiler
-rw-r--r--.gitignore1
-rw-r--r--04a/.gitignore1
-rw-r--r--04b/in03487
-rw-r--r--04b/in0495
-rw-r--r--04b/in04b96
5 files changed, 584 insertions, 96 deletions
diff --git a/.gitignore b/.gitignore
index d1648a7..6967345 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
README.html
out??
+out???
markdown
diff --git a/04a/.gitignore b/04a/.gitignore
deleted file mode 100644
index fe70990..0000000
--- a/04a/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-out*
diff --git a/04b/in03 b/04b/in03
new file mode 100644
index 0000000..dbc78e4
--- /dev/null
+++ b/04b/in03
@@ -0,0 +1,487 @@
+; initialize global_variables_end
+C=:global_variables_end
+D=:global_variables
+8C=D
+; initialize static_memory_end
+C=:static_memory_end
+D=x500000
+
+I=8S
+A=d3
+?I!A:usage_error
+; open input file
+ J=S
+ ; argv[1] is at *(rsp+16)
+ J+=d16
+ J=8J
+ I=d0
+ syscall x2
+ J=A
+ ?J<0:input_file_error
+; open output file
+ J=S
+ ; argv[2] is at *(rsp+24)
+ J+=d24
+ J=8J
+ I=x241
+ D=x1ed
+ syscall x2
+ J=A
+ ?J<0:output_file_error
+
+:read_line
+; increment line number
+D=:line_number
+C=8D
+C+=d1
+8D=C
+
+; use rbp to store line pointer
+R=:line
+:read_line_loop
+ ; read 1 byte into rbp
+ J=d3
+ I=R
+ D=d1
+ syscall x0
+ D=A
+ ?D=0:eof
+
+ ; check if the character was a newline:
+ C=1R
+ D=xa
+ ?C=D:read_line_loop_end
+ ; check if the character was a tab:
+ D=x9
+ ; if so, don't increment rbp
+ ?C=D:read_line
+ ; check if the character was a semicolon:
+ D=';
+ ; if so, it's a comment
+ ?C=D:handle_comment
+
+ R+=d1
+ !:read_line_loop
+
+ :handle_comment
+ ; read out rest of line from file
+ J=d3
+ I=R
+ D=d1
+ syscall x0
+ D=A
+ ?D=0:eof
+ C=1R
+ D=xa
+ ; if we didn't reach the end of the line, keep going
+ ?C!D:handle_comment
+
+ !:read_line_loop_end
+:read_line_loop_end
+
+; remove whitespace (specifically, ' ' characters) at end of line
+I=R
+:remove_terminal_whitespace_loop
+ I-=d1
+ C=1I
+ D=x20
+ ?C!D:remove_terminal_whitespace_loop_end
+ ; replace ' ' with a newline
+ D=xa
+ 1I=D
+ !:remove_terminal_whitespace_loop
+:remove_terminal_whitespace_loop_end
+
+; check if this is a blank line
+C=:line
+D=1C
+C=xa
+?C=D:read_line
+
+I=:line
+J=:"global"
+C=x20
+call :string=
+D=A
+?D!0:handle_global
+
+
+!:read_line
+
+:eof
+ J=d0
+ syscall x3c
+
+:handle_global
+ I=:line
+ ; skip "global "
+ I+=d7
+ call :read_type
+ ; put type in R
+ R=A
+ ; skip ' ' after type
+ I+=d1
+ J=:global_variables_end
+ J=8J
+ call :ident_copy
+ ; store type
+ 1J=R
+ J+=d1
+ ; store address
+ D=:static_memory_end
+ D=8D
+ 8J=D
+ ; update :static_memory_end
+ D=:static_memory_end
+ C=8D
+ C+=d8
+ 8D=C
+ ; update :global_variables_end
+ I=:global_variables_end
+ 8I=J
+ ; go read the next line
+ !:read_line
+
+:"global"
+ str global
+ x20
+
+; copy the newline-terminated identifier from rsi to rdi
+:ident_copy
+ C=1I
+ B=C
+ call :isident
+ D=A
+ ?D=0:bad_identifier
+
+ :ident_loop
+ C=1I
+ D=xa
+ ?C=D:ident_loop_end
+ B=C
+ call :isident
+ D=A
+ ?D=0:bad_identifier
+ C=1I
+ 1J=C
+ I+=d1
+ J+=d1
+ !:ident_loop
+ :ident_loop_end
+ return
+
+
+; can the character in rbx appear in an identifier?
+:isident
+ A='0
+ ?B<A:return_0
+ ; note: 58 = '9' + 1
+ A=d58
+ ?B<A:return_1
+ A='A
+ ?B<A:return_0
+ ; note: 91 = 'z' + 1
+ A=d91
+ ?B<A:return_1
+ A='z
+ ?B>A:return_0
+ ; 96 = 'a' - 1
+ A=d96
+ ?B>A:return_1
+ A='_
+ ?B=A:return_1
+ !:return_0
+
+; read the space-terminated type from rsi, advance rsi, and set rax to the corresponding type number:
+; 0 for non-pointer types
+; 1 for pointer to char
+; 2 for pointer to short
+; 4 for pointer to int
+; 8 for pointer to long
+:read_type
+ C=1I
+ D='*
+ ?C=D:read_pointer_type
+ ; it's not a pointer
+ call :read_simple_type
+ A=d0
+ return
+ :read_pointer_type
+ ; it's a pointer!
+ I+=d1
+ !:read_simple_type
+
+; returns 1 for char, 2 for short, 4 for int, 8 for long
+:read_simple_type
+ R=I
+ C=x20
+ I=R
+ J=:"char"
+ call :string=
+ D=A
+ ?D!0:return_1
+ I=R
+ J=:"short"
+ call :string=
+ D=A
+ ?D!0:return_2
+ I=R
+ J=:"int"
+ call :string=
+ D=A
+ ?D!0:return_4
+ I=R
+ J=:"long"
+ call :string=
+ D=A
+ ?D!0:return_8
+ !:bad_type
+
+:"char"
+ str char
+ x20
+:"short"
+ str short
+ x20
+:"int"
+ str int
+ x20
+:"long"
+ str long
+ x20
+
+
+:usage_error
+ B=:usage_error_message
+ call :general_error
+
+:usage_error_message
+ str Please provide an input and an output file.
+ xa
+ x0
+
+:input_file_error
+ B=:input_file_error_message
+ !:general_error
+
+:input_file_error_message
+ str Couldn't open input file.
+ xa
+ x0
+
+:output_file_error
+ B=:output_file_error_message
+ !:general_error
+
+:output_file_error_message
+ str Couldn't open output file.
+ xa
+ x0
+
+:bad_identifier
+ B=:bad_identifier_error_message
+ !:program_error
+
+:bad_identifier_error_message
+ str Bad identifier.
+ xa
+ x0
+
+:bad_type
+ B=:bad_type_error_message
+ !:program_error
+
+:bad_type_error_message
+ str Bad type.
+ xa
+ x0
+
+:general_error
+ call :eputs
+ J=d1
+ syscall x3c
+
+:program_error
+ R=B
+
+ B=:"Line"
+ call :eputs
+
+ D=:line_number
+ D=8D
+ B=D
+ call :eputn
+
+ B=:line_number_separator
+ call :eputs
+
+ B=R
+ call :eputs
+ J=d1
+ syscall x3c
+
+:"Line"
+ str Line
+ x20
+ x0
+
+:line_number_separator
+ str :
+ x20
+ x0
+
+:strlen
+ I=B
+ D=B
+ :strlen_loop
+ C=1I
+ ?C=0:strlen_ret
+ I+=d1
+ !:strlen_loop
+ :strlen_ret
+ I-=D
+ A=I
+ return
+
+; check if strings in rdi and rsi are equal, up to terminator in rcx
+:string=
+ D=1I
+ A=1J
+ ?D!A:return_0
+ ?D=C:return_1
+ I+=d1
+ J+=d1
+ !:string=
+
+; check if strings in rdi and rsi are equal, up to the first non-identifier character
+:ident=
+ D=1I
+ B=D
+ call :isident
+ ; I ended
+ ?A=0:ident=_I_end
+
+ D=1J
+ B=D
+ call :isident
+ ; J ended, but I didn't
+ ?A=0:return_0
+
+ ; we haven't reached the end of either
+ D=1I
+ A=1J
+ ?D!A:return_0
+ I+=d1
+ J+=d1
+ !:ident=
+:ident=_I_end
+ D=1J
+ B=D
+ call :isident
+ ; check if J also ended
+ ?A=0:return_1
+ ; J didn't end
+ !:return_0
+
+:return_0
+ A=d0
+ return
+:return_1
+ A=d1
+ return
+:return_2
+ A=d2
+ return
+:return_3
+ A=d3
+ return
+:return_4
+ A=d4
+ return
+:return_5
+ A=d5
+ return
+:return_6
+ A=d6
+ return
+:return_7
+ A=d7
+ return
+:return_8
+ A=d8
+ return
+
+; write the character in rbx to the file in rdi.
+:fputc
+ C=B
+ I=S
+ I-=d1
+ 1I=C
+ D=d1
+ syscall x1
+ return
+
+; write the string in rbx to stderr
+:eputs
+ J=B
+ call :strlen
+ D=A
+ I=J
+ J=d2
+ syscall x1
+ return
+
+; write rbx in decimal to stderr
+:eputn
+ I=B
+ J=S
+ J-=d1
+ :eputn_loop
+ D=d0
+ ; divide by 10
+ B=d10
+ A=I
+ div
+ ; quotient is new number
+ I=A
+ ; add remainder to string
+ D+='0
+ 1J=D
+ J-=d1
+ ?I!0:eputn_loop
+ D=S
+ D-=J
+ I=J
+ J=d2
+ syscall x1
+ return
+
+; copy rdx bytes from rsi to rdi.
+; this copies from the left: if you're doing an overlapped copy, rsi should be greater than rdi
+:memcpy
+ ?D=0:return_0
+ A=1I
+ 1J=A
+ I+=d1
+ J+=d1
+ D-=d1
+ !:memcpy
+
+; put a 0 byte before the line (this is important for removing whitespace at the end of the line,
+; specifically, we don't want this to be a space character)
+x0
+:line
+ reserve d1000
+
+align
+:global_variables_end
+ reserve d8
+:static_memory_end
+ reserve d8
+:line_number
+ reserve d8
+:global_variables
+ reserve d50000
+
+; we shouldn't end the file with a reserve; we don't handle that properly
+x00
diff --git a/04b/in04 b/04b/in04
deleted file mode 100644
index 1b362ad..0000000
--- a/04b/in04
+++ /dev/null
@@ -1,95 +0,0 @@
-// types: char, uchar, short, ushort, int, uint, long, ulong, *type
-// declaration:
-// static <type> <name>;
-// local <type> <name>;
-// :<label>
-// statement:
-// <declaration>
-// if <term> <==/</>/>=/<=/!=> <term> goto <label>
-// goto <label>
-// <lvalue> = <rvalue>
-// <lvalue> += <rvalue>
-// <lvalue> -= <rvalue>
-// <function>(<term>, <term>, ...)
-// syscall(<term>, <term>, ...)
-// return <rvalue>;
-// term:
-// <var>
-// <number>
-// number:
-// 'c
-// 12345
-// 0xabc
-// lvalue:
-// <var>
-// *<var>
-// <var>[<term>]
-// rvalue:
-// `<string>`
-// <var>
-// &<var>
-// *<var>
-// <var>[<term>]
-// ~<var>
-// <function>(<term>, <term>, ...)
-// syscall(<term>, <term>, ...)
-// <term> + <term>
-// <term> - <term>
-// <term> * <term>
-// <term> / <term>
-// <term> % <term>
-// <term> & <term>
-// <term> | <term>
-// <term> ^ <term>
-// <term> < <term> (left shift)
-// <term> > <term> (right shift)
-// (<term> ] <term>)
-
-main();
-
-static char x;
-static uchar y;
-static long z;
-
-function strlen(*char s)
- local ulong len;
- local char c;
- len = 0;
- :strlen.loop
- c = s[len];
- if c == 0 goto strlen.loop_end;
- len += 1;
- goto strlen.loop
- :strlen.loop_end
- return len;
-
-function putc(char c)
- local char *p;
- p = &c;
- syscall(1, 1, p, 1, 0, 0, 0, 0);
- return;
-
-function puts(*char s)
- local ulong len;
- len = strlen(s);
- syscall(1, 1, s, len, 0, 0, 0, 0);
- return;
-
-function main()
- local *char hello;
- hello = `Hello, world!
-`;
- puts(hello);
- syscall(0x3c, 0, 0, 0, 0, 0, 0, 0);
-
-function f(*long x, **long y)
- local long v;
- local *long p;
- v = *x;
- p = *y;
- *p = v;
- if v == 0 goto something;
- p[1] = v + 1;
- return p[2];
- :something
- return p[1];
diff --git a/04b/in04b b/04b/in04b
new file mode 100644
index 0000000..f312b54
--- /dev/null
+++ b/04b/in04b
@@ -0,0 +1,96 @@
+; types: char, short, int, long, *type
+; declaration:
+; global <type> <name>
+; local <type> <name>
+; :<label>
+; statement:
+; <declaration>
+; if <term> <==/</>/>=/<=/!=> <term> goto <label> NOTE: this uses signed comparisons
+; goto <label>
+; <lvalue> = <rvalue>
+; <lvalue> += <rvalue>
+; <lvalue> -= <rvalue>
+; <function>(<term>, <term>, ...)
+; syscall(<term>, <term>, ...)
+; return <rvalue>
+; byte <number>
+; term:
+; <var>
+; <number>
+; number:
+; 'c
+; 12345
+; 0xabc
+; lvalue:
+; <var>
+; *<var>
+; <var>[<term>]
+; rvalue:
+; `<string>`
+; <var>
+; &<var>
+; *<var>
+; <var>[<term>]
+; ~<var>
+; <function>(<term>, <term>, ...)
+; syscall(<term>, <term>, ...)
+; <term> + <term>
+; <term> - <term>
+; NOTE: *, /, % are signed (imul and idiv)
+; <term> * <term>
+; <term> / <term>
+; <term> % <term>
+; <term> & <term>
+; <term> | <term>
+; <term> ^ <term>
+; <term> < <term> (left shift)
+; <term> > <term> (unsigned right shift)
+
+main() ; hello
+
+global char x
+global short y ;123
+global long z
+
+function strlen(*char s)
+ local long len
+ local char c
+ len = 0
+ :strlen.loop
+ c = s[len]
+ if c == 0 goto strlen.loop_end
+ len += 1
+ goto strlen.loop
+ :strlen.loop_end
+ return len
+
+function putc(char c)
+ local char *p
+ p = &c
+ syscall(1, 1, p, 1, 0, 0, 0, 0)
+ return
+
+function puts(*char s)
+ local long len
+ len = strlen(s)
+ syscall(1, 1, s, len, 0, 0, 0, 0)
+ return
+
+function main()
+ local *char hello
+ hello = `Hello, world!
+`
+ puts(hello)
+ syscall(0x3c, 0, 0, 0, 0, 0, 0, 0)
+
+function f(*long x, *long y)
+ local long v
+ local *long p
+ v = *x
+ p = *y
+ *p = v
+ if v == 0 goto something
+ p[1] = v + 1
+ return p[2]
+ :something
+ return p[1]