summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorpommicket <pommicket@gmail.com>2022-01-27 18:52:39 -0500
committerpommicket <pommicket@gmail.com>2022-01-27 18:52:39 -0500
commit01b8a4d728cb714e734ce308324757bfa07f02cf (patch)
tree89eee677fdf0a845fcbd1a631b8ba0ed5d321d06
parentb5a498aa524a8ce450f6ee7120409acd2ddcf6be (diff)
switch to using mmap for output file
-rw-r--r--05/constants.b8
-rw-r--r--05/main.b27
-rw-r--r--05/main.c2
-rw-r--r--05/parse.b11
-rw-r--r--05/tokenize.b19
-rw-r--r--05/util.b52
-rw-r--r--README.md2
7 files changed, 91 insertions, 30 deletions
diff --git a/05/constants.b b/05/constants.b
index cc4335e..d0f2060 100644
--- a/05/constants.b
+++ b/05/constants.b
@@ -1,9 +1,13 @@
; this is the format of the executables we produce:
-; elf header + code 4MB addresses 0x400000-0x7fffff
+; elf header 4MB addresses 0x000000-0x400000 (no, it won't actually take up that much space)
+; code 4MB addresses 0x400000-0x7fffff
; read-only data 4MB addresses 0x800000-0xbfffff
; read-write data 4MB addresses 0xc00000-0xffffff
-#define RODATA_OFFSET 0x400000
+; note that file offsets and runtime addresses are the same.
+; you should be able to change these constants without breaking anything:
#define RODATA_ADDR 0x800000
+#define RWDATA_END 0x1000000
+#define EXECUTABLE_SIZE 0x1000000
; C OPERATOR PRECEDENCE
; lowest
diff --git a/05/main.b b/05/main.b
index 4ff7e5c..d988ee3 100644
--- a/05/main.b
+++ b/05/main.b
@@ -8,8 +8,6 @@ byte 0
byte 0
goto main
-global output_fd
-
global object_macros_size
global function_macros_size
@@ -36,6 +34,9 @@ global enumerators
; for unions, offset will always be 0.
global structures
global structures_bytes_used
+; file offset/runtime address to write next piece of read-only data; initialized in main
+global rodata_end_addr
+global output_file_data
#include util.b
#include idents.b
@@ -149,6 +150,8 @@ function main
local ast
local p
local i
+ local output_fd
+
fill_in_powers_of_10()
typedefs = ident_list_create(100000)
@@ -173,8 +176,12 @@ function main
input_filename = argv1
output_filename = argv2
:have_filenames
- output_fd = open_w(output_filename)
- rodata_end_offset = RODATA_OFFSET
+ output_fd = open_rw(output_filename, 493)
+ rodata_end_addr = RODATA_ADDR
+
+ ftruncate(output_fd, RWDATA_END)
+ output_file_data = mmap(0, RWDATA_END, PROT_READ_WRITE, MAP_SHARED, output_fd, 0)
+ if output_file_data ] 0xffffffffffff0000 goto mmap_output_fd_failed
pptokens = split_into_preprocessing_tokens(input_filename)
;print_pptokens(pptokens)
@@ -195,8 +202,20 @@ function main
parse_tokens(tokens)
+ p = output_file_data + RODATA_ADDR
+ munmap(output_file_data, RWDATA_END)
+ close(output_fd)
+
exit(0)
+:mmap_output_fd_failed
+ fputs(2, .str_mmap_output_fd_failed)
+ exit(1)
+:str_mmap_output_fd_failed
+ string Couldn't mmap output file.
+ byte 10
+ byte 0
+
:usage_error
fputs(2, .str_usage_error)
exit(1)
diff --git a/05/main.c b/05/main.c
index deea88f..3b094ad 100644
--- a/05/main.c
+++ b/05/main.c
@@ -24,5 +24,5 @@ typedef union B{
} c;
}B;
-typedef int x[sizeof(A)];
+typedef int x[sizeof(A)+sizeof"hello"];
typedef int y[sizeof(struct B)];
diff --git a/05/parse.b b/05/parse.b
index 91b0d74..48a5256 100644
--- a/05/parse.b
+++ b/05/parse.b
@@ -1500,7 +1500,7 @@ function type_sizeof
if c == TYPE_ARRAY goto sizeof_array
if c == TYPE_STRUCT goto sizeof_struct
- fputs(2, .str_sizeof_bad) ; @TODO
+ fputs(2, .str_sizeof_bad)
exit(1)
:str_sizeof_bad
string type_sizeof bad type.
@@ -1621,7 +1621,6 @@ function evaluate_constant_expression
c = *1expr
if c == EXPRESSION_CONSTANT_INT goto eval_constant_int
- if c == EXPRESSION_IDENTIFIER goto eval_constant_identifier
if c == EXPRESSION_UNARY_PLUS goto eval_unary_plus
if c == EXPRESSION_UNARY_MINUS goto eval_unary_minus
if c == EXPRESSION_BITWISE_NOT goto eval_bitwise_not
@@ -1666,14 +1665,6 @@ function evaluate_constant_expression
:str_eval_cast_bad_type
string Bad type for constant cast (note: floating-point casts are not supported even though they are standard).
byte 0
- :eval_constant_identifier
- ; @TODO: enum values
- fputs(2, .str_constant_identifier)
- exit(1)
- :str_constant_identifier
- string Constant identifiers not handled (see @TODO).
- byte 10
- byte 0
:eval_constant_int
expr += 8
*8p_value = *8expr
diff --git a/05/tokenize.b b/05/tokenize.b
index f5b0c30..e52b30a 100644
--- a/05/tokenize.b
+++ b/05/tokenize.b
@@ -97,9 +97,6 @@ function get_keyword_str
byte 0
-; file offset to write next piece of read-only data; initialized in main.b
-global rodata_end_offset
-
; turn pptokens into tokens, written to out.
; tokens are 16 bytes and have the following format:
; uchar type
@@ -256,26 +253,28 @@ function tokenize
data = c
goto token_output
:tokenize_string_literal
- n = rodata_end_offset - RODATA_OFFSET
- n += RODATA_ADDR ; address of string
- lseek(output_fd, rodata_end_offset, SEEK_SET)
+ data = rodata_end_addr
+ p = output_file_data + rodata_end_addr
+
:string_literal_loop
in += 1 ; skip opening "
:string_literal_char_loop
if *1in == '" goto string_literal_char_loop_end
c = read_c_char(&in)
if c ] 255 goto bad_char_in_string
- fputc(output_fd, c)
+ *1p = c
+ p += 1
goto string_literal_char_loop
:string_literal_char_loop_end
pptoken_skip(&in) ; skip closing "
pptoken_skip_spaces(&in)
if *1in == '" goto string_literal_loop ; string concatenation, e.g. "Hello, " "world!"
- fputc(output_fd, 0) ; null terminator
- rodata_end_offset = lseek(output_fd, 0, SEEK_CUR)
+ *1p = 0 ; null terminator
+ p += 1
+ rodata_end_addr = p - output_file_data
+
*1out = TOKEN_STRING_LITERAL
out += 2 ; no info
- data = n
goto token_output
:tokenize_float
; @NONSTANDARD: this doesn't allow for floats whose integral part is >=2^64, e.g. 1000000000000000000000000.0
diff --git a/05/util.b b/05/util.b
index 89d9ce9..9e7afbb 100644
--- a/05/util.b
+++ b/05/util.b
@@ -139,12 +139,48 @@ function die
fputs(2, message)
exit(1)
+function ftruncate
+ argument fd
+ argument length
+ local x
+ x = syscall(77, fd, length)
+ if x != 0 goto ftruncate_failed
+ return
+
+:ftruncate_failed
+ fputs(2, .str_ftruncate_failed)
+ exit(1)
+:str_ftruncate_failed
+ string ftruncated failed.
+ byte 10
+ byte 0
+
+function mmap
+ argument addr
+ argument length
+ argument prot
+ argument flags
+ argument fd
+ argument offset
+ return syscall(9, addr, length, prot, flags, fd, offset)
+
+function munmap
+ argument addr
+ argument length
+ return syscall(11, addr, length)
+
+#define PROT_READ 1
+#define PROT_WRITE 2
+#define PROT_READ_WRITE 3
+#define MAP_SHARED 0x01
+#define MAP_PRIVATE_ANONYMOUS 0x22
+
function malloc
argument size
local total_size
local memory
total_size = size + 8
- memory = syscall(9, 0, total_size, 3, 0x22, -1, 0)
+ memory = mmap(0, total_size, PROT_READ_WRITE, MAP_PRIVATE_ANONYMOUS, -1, 0)
if memory ] 0xffffffffffff0000 goto malloc_failed
*8memory = total_size
return memory + 8
@@ -164,7 +200,7 @@ function free
local size
psize = memory - 8
size = *8psize
- syscall(11, psize, size)
+ munmap(psize, size)
return
; returns a pointer to a null-terminated string containing the
@@ -568,6 +604,18 @@ function open_w
:open_w_error
file_error(filename)
return -1
+
+; open the given file for reading and writing with the given mode
+function open_rw
+ argument filename
+ argument mode
+ local fd
+ fd = syscall(2, filename, 0x242, mode)
+ if fd < 0 goto open_rw_error
+ return fd
+ :open_rw_error
+ file_error(filename)
+ return -1
function close
argument fd
diff --git a/README.md b/README.md
index f379be1..02134d9 100644
--- a/README.md
+++ b/README.md
@@ -96,7 +96,7 @@ I'll leave that to someone else.
## instruction set
x86-64 has a *gigantic* instruction set. The manual for it is over 2,000 pages
-long! So it makes sense to select only a small subset of it to use.
+long! To make things simpler, we will only use a small subset.
Here are all the instructions we'll be using. If you're not familiar with
x86-64 assembly, you might want to look over these (but you don't need to understand everything).