From 01b8a4d728cb714e734ce308324757bfa07f02cf Mon Sep 17 00:00:00 2001 From: pommicket Date: Thu, 27 Jan 2022 18:52:39 -0500 Subject: switch to using mmap for output file --- 05/constants.b | 8 ++++++-- 05/main.b | 27 +++++++++++++++++++++++---- 05/main.c | 2 +- 05/parse.b | 11 +---------- 05/tokenize.b | 19 +++++++++---------- 05/util.b | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++-- README.md | 2 +- 7 files changed, 91 insertions(+), 30 deletions(-) diff --git a/05/constants.b b/05/constants.b index cc4335e..d0f2060 100644 --- a/05/constants.b +++ b/05/constants.b @@ -1,9 +1,13 @@ ; this is the format of the executables we produce: -; elf header + code 4MB addresses 0x400000-0x7fffff +; elf header 4MB addresses 0x000000-0x400000 (no, it won't actually take up that much space) +; code 4MB addresses 0x400000-0x7fffff ; read-only data 4MB addresses 0x800000-0xbfffff ; read-write data 4MB addresses 0xc00000-0xffffff -#define RODATA_OFFSET 0x400000 +; note that file offsets and runtime addresses are the same. +; you should be able to change these constants without breaking anything: #define RODATA_ADDR 0x800000 +#define RWDATA_END 0x1000000 +#define EXECUTABLE_SIZE 0x1000000 ; C OPERATOR PRECEDENCE ; lowest diff --git a/05/main.b b/05/main.b index 4ff7e5c..d988ee3 100644 --- a/05/main.b +++ b/05/main.b @@ -8,8 +8,6 @@ byte 0 byte 0 goto main -global output_fd - global object_macros_size global function_macros_size @@ -36,6 +34,9 @@ global enumerators ; for unions, offset will always be 0. global structures global structures_bytes_used +; file offset/runtime address to write next piece of read-only data; initialized in main +global rodata_end_addr +global output_file_data #include util.b #include idents.b @@ -149,6 +150,8 @@ function main local ast local p local i + local output_fd + fill_in_powers_of_10() typedefs = ident_list_create(100000) @@ -173,8 +176,12 @@ function main input_filename = argv1 output_filename = argv2 :have_filenames - output_fd = open_w(output_filename) - rodata_end_offset = RODATA_OFFSET + output_fd = open_rw(output_filename, 493) + rodata_end_addr = RODATA_ADDR + + ftruncate(output_fd, RWDATA_END) + output_file_data = mmap(0, RWDATA_END, PROT_READ_WRITE, MAP_SHARED, output_fd, 0) + if output_file_data ] 0xffffffffffff0000 goto mmap_output_fd_failed pptokens = split_into_preprocessing_tokens(input_filename) ;print_pptokens(pptokens) @@ -195,8 +202,20 @@ function main parse_tokens(tokens) + p = output_file_data + RODATA_ADDR + munmap(output_file_data, RWDATA_END) + close(output_fd) + exit(0) +:mmap_output_fd_failed + fputs(2, .str_mmap_output_fd_failed) + exit(1) +:str_mmap_output_fd_failed + string Couldn't mmap output file. + byte 10 + byte 0 + :usage_error fputs(2, .str_usage_error) exit(1) diff --git a/05/main.c b/05/main.c index deea88f..3b094ad 100644 --- a/05/main.c +++ b/05/main.c @@ -24,5 +24,5 @@ typedef union B{ } c; }B; -typedef int x[sizeof(A)]; +typedef int x[sizeof(A)+sizeof"hello"]; typedef int y[sizeof(struct B)]; diff --git a/05/parse.b b/05/parse.b index 91b0d74..48a5256 100644 --- a/05/parse.b +++ b/05/parse.b @@ -1500,7 +1500,7 @@ function type_sizeof if c == TYPE_ARRAY goto sizeof_array if c == TYPE_STRUCT goto sizeof_struct - fputs(2, .str_sizeof_bad) ; @TODO + fputs(2, .str_sizeof_bad) exit(1) :str_sizeof_bad string type_sizeof bad type. @@ -1621,7 +1621,6 @@ function evaluate_constant_expression c = *1expr if c == EXPRESSION_CONSTANT_INT goto eval_constant_int - if c == EXPRESSION_IDENTIFIER goto eval_constant_identifier if c == EXPRESSION_UNARY_PLUS goto eval_unary_plus if c == EXPRESSION_UNARY_MINUS goto eval_unary_minus if c == EXPRESSION_BITWISE_NOT goto eval_bitwise_not @@ -1666,14 +1665,6 @@ function evaluate_constant_expression :str_eval_cast_bad_type string Bad type for constant cast (note: floating-point casts are not supported even though they are standard). byte 0 - :eval_constant_identifier - ; @TODO: enum values - fputs(2, .str_constant_identifier) - exit(1) - :str_constant_identifier - string Constant identifiers not handled (see @TODO). - byte 10 - byte 0 :eval_constant_int expr += 8 *8p_value = *8expr diff --git a/05/tokenize.b b/05/tokenize.b index f5b0c30..e52b30a 100644 --- a/05/tokenize.b +++ b/05/tokenize.b @@ -97,9 +97,6 @@ function get_keyword_str byte 0 -; file offset to write next piece of read-only data; initialized in main.b -global rodata_end_offset - ; turn pptokens into tokens, written to out. ; tokens are 16 bytes and have the following format: ; uchar type @@ -256,26 +253,28 @@ function tokenize data = c goto token_output :tokenize_string_literal - n = rodata_end_offset - RODATA_OFFSET - n += RODATA_ADDR ; address of string - lseek(output_fd, rodata_end_offset, SEEK_SET) + data = rodata_end_addr + p = output_file_data + rodata_end_addr + :string_literal_loop in += 1 ; skip opening " :string_literal_char_loop if *1in == '" goto string_literal_char_loop_end c = read_c_char(&in) if c ] 255 goto bad_char_in_string - fputc(output_fd, c) + *1p = c + p += 1 goto string_literal_char_loop :string_literal_char_loop_end pptoken_skip(&in) ; skip closing " pptoken_skip_spaces(&in) if *1in == '" goto string_literal_loop ; string concatenation, e.g. "Hello, " "world!" - fputc(output_fd, 0) ; null terminator - rodata_end_offset = lseek(output_fd, 0, SEEK_CUR) + *1p = 0 ; null terminator + p += 1 + rodata_end_addr = p - output_file_data + *1out = TOKEN_STRING_LITERAL out += 2 ; no info - data = n goto token_output :tokenize_float ; @NONSTANDARD: this doesn't allow for floats whose integral part is >=2^64, e.g. 1000000000000000000000000.0 diff --git a/05/util.b b/05/util.b index 89d9ce9..9e7afbb 100644 --- a/05/util.b +++ b/05/util.b @@ -139,12 +139,48 @@ function die fputs(2, message) exit(1) +function ftruncate + argument fd + argument length + local x + x = syscall(77, fd, length) + if x != 0 goto ftruncate_failed + return + +:ftruncate_failed + fputs(2, .str_ftruncate_failed) + exit(1) +:str_ftruncate_failed + string ftruncated failed. + byte 10 + byte 0 + +function mmap + argument addr + argument length + argument prot + argument flags + argument fd + argument offset + return syscall(9, addr, length, prot, flags, fd, offset) + +function munmap + argument addr + argument length + return syscall(11, addr, length) + +#define PROT_READ 1 +#define PROT_WRITE 2 +#define PROT_READ_WRITE 3 +#define MAP_SHARED 0x01 +#define MAP_PRIVATE_ANONYMOUS 0x22 + function malloc argument size local total_size local memory total_size = size + 8 - memory = syscall(9, 0, total_size, 3, 0x22, -1, 0) + memory = mmap(0, total_size, PROT_READ_WRITE, MAP_PRIVATE_ANONYMOUS, -1, 0) if memory ] 0xffffffffffff0000 goto malloc_failed *8memory = total_size return memory + 8 @@ -164,7 +200,7 @@ function free local size psize = memory - 8 size = *8psize - syscall(11, psize, size) + munmap(psize, size) return ; returns a pointer to a null-terminated string containing the @@ -568,6 +604,18 @@ function open_w :open_w_error file_error(filename) return -1 + +; open the given file for reading and writing with the given mode +function open_rw + argument filename + argument mode + local fd + fd = syscall(2, filename, 0x242, mode) + if fd < 0 goto open_rw_error + return fd + :open_rw_error + file_error(filename) + return -1 function close argument fd diff --git a/README.md b/README.md index f379be1..02134d9 100644 --- a/README.md +++ b/README.md @@ -96,7 +96,7 @@ I'll leave that to someone else. ## instruction set x86-64 has a *gigantic* instruction set. The manual for it is over 2,000 pages -long! So it makes sense to select only a small subset of it to use. +long! To make things simpler, we will only use a small subset. Here are all the instructions we'll be using. If you're not familiar with x86-64 assembly, you might want to look over these (but you don't need to understand everything). -- cgit v1.2.3