diff options
author | pommicket <pommicket@gmail.com> | 2022-02-19 12:01:56 -0500 |
---|---|---|
committer | pommicket <pommicket@gmail.com> | 2022-02-19 12:01:56 -0500 |
commit | 9c6b9a1450d6610a37234b016bc1cfb021f30ee8 (patch) | |
tree | 9416b9c6b13b30f9107030cdccca7b33b1b27d60 | |
parent | 59b7931165ecbd189214142b95d3d2033f4f579f (diff) |
full build of tcc with itself - doesn't match gcc :o
-rw-r--r-- | 05/Makefile | 36 | ||||
-rw-r--r-- | 05/README.md | 145 | ||||
-rw-r--r-- | 05/main.c | 29 | ||||
-rw-r--r-- | 05/stdlib.h | 13 | ||||
-rw-r--r-- | 05/tcc-0.9.27/.gitignore | 2 | ||||
-rw-r--r-- | 05/tcc-0.9.27/config.h | 8 | ||||
-rw-r--r-- | 05/tcc-0.9.27/lib/libtcc1.c | 9 | ||||
-rw-r--r-- | 05/tcc-0.9.27/stdlib.h | 13 | ||||
-rw-r--r-- | 05/tcc-0.9.27/tcc.h | 8 | ||||
-rw-r--r-- | 05/tcc-0.9.27/tccgen.c | 14 | ||||
-rw-r--r-- | markdown.c | 17 |
11 files changed, 250 insertions, 44 deletions
diff --git a/05/Makefile b/05/Makefile index 628d286..902a7bf 100644 --- a/05/Makefile +++ b/05/Makefile @@ -1,6 +1,8 @@ TCCDIR=tcc-0.9.27 -TCC=$(TCCDIR)/tcc -all: out04 a.out test.out README.html +TCC0=$(TCCDIR)/tcc0 +TCC1=$(TCCDIR)/tcc1 +TCCINST=/usr/local/lib/tcc-bootstrap +all: out04 a.out test.out README.html $(TCCDIR)/lib/libtcc1.a in04: *.b ../04a/out04 ../04a/out04 main.b in04 out04: in04 ../04/out03 @@ -11,13 +13,25 @@ out04: in04 ../04/out03 ./out04 $< $@ a.out: main.c *.h out04 ./out04 -test.out: $(TCC) test.s.o test.c.o - $(TCC) -static -nostdlib test.s.o test.c.o -o test.out -test.s.o: $(TCC) test.s - $(TCC) -static -nostdlib -c test.s -o test.s.o -test.c.o: $(TCC) test.c - $(TCC) -static -nostdlib -c test.c -o test.c.o -$(TCC): $(TCCDIR)/*.c $(TCCDIR)/*.h out04 - cd $(TCCDIR) && ../out04 tcc.c tcc +test.out: $(TCC0) test.s.o test.c.o + $(TCC0) -static -nostdlib test.s.o test.c.o -o test.out +test.s.o: $(TCC0) test.s + $(TCC0) -static -nostdlib -c test.s -o test.s.o +test.c.o: $(TCC0) test.c + $(TCC0) -static -nostdlib -c test.c -o test.c.o +$(TCC0): $(TCCDIR)/*.c $(TCCDIR)/*.h out04 + cd $(TCCDIR) && ../out04 tcc.c tcc0 +$(TCCDIR)/lib/libtcc1.a: $(TCC0) $(TCCDIR)/lib/*.[cS] + $(TCC0) -c $(TCCDIR)/lib/alloca86_64-bt.S -o $(TCCDIR)/lib/alloca86_64-bt.o + $(TCC0) -c $(TCCDIR)/lib/alloca86_64.S -o $(TCCDIR)/lib/alloca86_64.o + $(TCC0) -c $(TCCDIR)/lib/va_list.c -o $(TCCDIR)/lib/va_list.o + $(TCC0) -c $(TCCDIR)/lib/libtcc1.c -o $(TCCDIR)/lib/libtcc1.o + $(TCC0) -ar $(TCCDIR)/lib/libtcc1.a $(TCCDIR)/lib/*.o +install-tcc0: $(TCCDIR)/lib/libtcc1.a $(TCCDIR)/include/*.h + mkdir -p $(TCCINST)/include + cp -r $(TCCDIR)/include/*.h $(TCCINST)/include/ + cp -r $(TCCDIR)/lib/libtcc1.a $(TCCINST)/ +$(TCC1): $(TCC0) $(TCCINST)/libtcc1.a + cd $(TCCDIR) && ./tcc0 tcc.c -o tcc1 clean: - rm -f out* README.html *.out *.o $(TCC) + rm -f out* README.html *.out *.o $(TCC0) $(TCC1) $(TCCDIR)/lib/*.[oa] diff --git a/05/README.md b/05/README.md index 5e5ba1d..5f54a9a 100644 --- a/05/README.md +++ b/05/README.md @@ -9,8 +9,8 @@ make ``` to build our C compiler and TCC. This will take some time (approx. 25 seconds on my computer). -A test program, `test.out` will be compiled using `tcc`. If you run -it, you should get the output +Two test programs will be produced: `a.out`, compiled using our C compiler, and +`test.out`, compiled using `tcc`. If you run either one, you should get the output ``` Hello, world! @@ -107,8 +107,149 @@ Tokens are one of: - A character literal (e.g. `'a'`, `'\n'`) - A floating-point literal (e.g. `3.6`, `5e10`) +Next, an internal representation of the program is constructed in memory. +This is where we read the tokens `if` `(` `a` `)` `printf` `(` `"Hello!\n"` `)` `;` +and interpret it as an if statement, whose condition is the variable `a`, and whose +body consists of the single statement calling the `printf` function with the argument `"Hello!\n"`. + +Finally, we output the code for every function. + +## executable format + +This compiler's executables are much more sophisticated than the previous ones'. +Instead of storing code and data all in one segment, we have three segments: one +6MB segment for code (the program's functions are only allowed to use up 4MB of that, though), +one 4MB segment for read-only data (strings), and one 4MB segment for read-write data. + +Well, it *should* only be read-write, but unfortunately it also has to be executable... + +## syscalls + +Of course, we need some way of making system calls in C. +We do this with a macro, `__syscall`, which you'll find in `stdc_common.h`: + +``` +static unsigned char __syscall_data[] = { + // mov rax, [rsp+24] + 0x48, 0x8b, 0x84, 0x24, 24, 0, 0, 0, + // mov rdi, rax + 0x48, 0x89, 0xc7, + // mov rax, [rsp+32] + 0x48, 0x8b, 0x84, 0x24, 32, 0, 0, 0, + // mov rsi, rax + 0x48, 0x89, 0xc6, + // mov rax, [rsp+40] + 0x48, 0x8b, 0x84, 0x24, 40, 0, 0, 0, + // mov rdx, rax + 0x48, 0x89, 0xc2, + // mov rax, [rsp+48] + 0x48, 0x8b, 0x84, 0x24, 48, 0, 0, 0, + // mov r10, rax + 0x49, 0x89, 0xc2, + // mov rax, [rsp+56] + 0x48, 0x8b, 0x84, 0x24, 56, 0, 0, 0, + // mov r8, rax + 0x49, 0x89, 0xc0, + // mov rax, [rsp+64] + 0x48, 0x8b, 0x84, 0x24, 64, 0, 0, 0, + // mov r9, rax + 0x49, 0x89, 0xc1, + // mov rax, [rsp+16] + 0x48, 0x8b, 0x84, 0x24, 16, 0, 0, 0, + // syscall + 0x0f, 0x05, + // mov [rsp+8], rax + 0x48, 0x89, 0x84, 0x24, 8, 0, 0, 0, + // ret + 0xc3 +}; + +#define __syscall(no, arg1, arg2, arg3, arg4, arg5, arg6)\ + (((unsigned long (*)(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long))__syscall_data)\ + (no, arg1, arg2, arg3, arg4, arg5, arg6)) +``` + +The `__syscall_data` array contains machine language instructions which perform a system call, and the +`__syscall` macro "calls" the array as if it were a function. This is why we need a read-write-executable data +segment -- otherwise we'd need to implement system calls in the compiler. + +## C standard library + +The C89 standard specifies a bunch of "standard library" functions which any implementation has to make available, e.g. +`printf()`, `atoi()`, `exit()`. +Fortunately, we don't have to write these functions in the 04 language; we can write them in C. + +To use a particular function, a C program needs to include the appropriate header file, e.g. +`#include <stdio.h>` lets you use `printf()` and other I/O-related functions. Normally, +these header files just declare what types the parameters to the functions should be, +but we actually put the function implementations there. + +Let's take a look at the contents of `ctype.h`, which provides the functions `islower`, `isupper`, etc.: +``` +#ifndef _CTYPE_H +#define _CTYPE_H + +#include <stdc_common.h> + +int islower(int c) { + return c >= 'a' && c <= 'z'; +} + +int isupper(int c) { + return c >= 'A' && c <= 'Z'; +} + +int isalpha(int c) { + return isupper(c) || islower(c); +} + +int isalnum(int c) { + return isalpha(c) || isdigit(c); +} + +... + +#endif +``` +The first two lines and last line prevent problems when the file is included multiple times. +We begin by including `stdc_common.h`, which has a bunch of functions and type definitions which all +our header files use, and then we define each of the necessary C standard library functions. + + ## limitations +There are various minor ways in which this compiler doesn't actually handle all of C89. +Here is a list of things we do wrong (this list is probably missing things, though): + +- [trigraphs](https://en.wikipedia.org/wiki/Digraphs_and_trigraphs#C) are not handled +- `char[]` string literal initializers can't contain null characters (e.g. `char x[] = "a\0b";` doesn't work) +- you can only access members of l-values (e.g. `int x = function_which_returns_struct().member` doesn't work) +- no default-int (this is a legacy feature of C, e.g. `main() { }` can technically stand in for `int main() {}`) +- the keyword `auto` is not handled (again, a legacy feature of C) +- `default:` must be the last label in a switch statement. +- external variable declarations are ignored (e.g. `extern int x; int main() { return x; } int x = 5; ` doesn't work) +- `typedef`s, and `struct`/`union`/`enum` declarations aren't allowed inside functions +- conditional expressions aren't allowed inside `case` (horribly, `switch (x) { case 5 ? 6 : 3: ; }` is legal C). +- bit-fields aren't handled +- Technically, `1[array]` is equivalent to `array[1]`, but we don't handle that. +- C89 has *very* weird typing rules about `void*`/`non-void*` inside conditional expressions. We don't handle that properly. +- C89 allows calling functions without declaring them, for legacy reasons. We don't handle that. +- Floating-point constant expressions are very limited. Only `double` literals and 0 are supported (it was hard enough +to parse floating-point literals in a language without floating-point variables!) +- Floating-point literals can't have their integer part greater than 2<sup>64</sup>-1. +- Redefining a macro is always an error, even if it's the same definition. +- You can't have a variable/function/etc. called `defined`. +- Various little things about when macros are evaluated in some contexts. +setjmp.h:// @NONSTANDARD: we don't actually support setjmp +stddef.h:// @NONSTANDARD: we don't have wchar_t +stdlib.h:// @NONSTANDARD: we don't define MB_CUR_MAX or any of the mbtowc functions +time.h:// @NONSTANDARD(except in UTC+0): we don't support local time in timezones other than UTC+0. +time.h: // @NONSTANDARD-ish. + + +Also, the keywords `signed`, `volatile`, `register`, and `const` are all ignored. This shouldn't have an effect +on any legal C program, though. + ## modifications of tcc's source code @@ -1,26 +1,15 @@ -/* #define _STDLIB_DEBUG */ -/* #include <math.h> */ #include <stdio.h> -/* #include <signal.h> */ -/* #include <stdlib.h> */ -/* #include <string.h> */ -/* #include <time.h> */ -/* #include <float.h> */ -/* #include <setjmp.h> */ -/* */ +#include <stdlib.h> int main(int argc, char **argv) { - int *p = 0x100; - p += 1; - switch (5) { - case 5: - switch (6) { - default:; - } - case 6: - ; - } - printf("%p\n",p); + printf("%p\n", malloc(1024*16)); + int *list = malloc(1024*4); + printf("%p \n",list); + list[1023] = 77; + list = realloc(list, 1024*64); + printf("%p \n",list); + printf("%d\n",list[1023]); + free(list); return 0; } diff --git a/05/stdlib.h b/05/stdlib.h index b93d4aa..cbfd0b5 100644 --- a/05/stdlib.h +++ b/05/stdlib.h @@ -73,12 +73,25 @@ void *realloc(void *ptr, size_t size) { free(ptr); return NULL; } +#if 0 + // this (better) implementation doesn't seem to be copying stuff to the + // new mapping properly uint64_t *memory = (char *)ptr - 16; uint64_t old_size = *memory; uint64_t *new_memory = _mremap(memory, old_size, size, MREMAP_MAYMOVE); if ((uint64_t)new_memory > 0xffffffffffff0000) return NULL; *new_memory = size; return (char *)new_memory + 16; +#endif + + uint64_t *memory = (char *)ptr - 16; + uint64_t old_size = *memory; + void *new = malloc(size); + char *new_dat = (char *)new + 16; + *(uint64_t *)new = size; + memcpy(new_dat, ptr, old_size); + free(ptr); + return new_dat; } diff --git a/05/tcc-0.9.27/.gitignore b/05/tcc-0.9.27/.gitignore index a50ca5e..f7853f0 100644 --- a/05/tcc-0.9.27/.gitignore +++ b/05/tcc-0.9.27/.gitignore @@ -16,6 +16,8 @@ a.out tcc_g tcc +tcc[0123456789] +tcc[0123456789]a *-tcc libtcc*.def diff --git a/05/tcc-0.9.27/config.h b/05/tcc-0.9.27/config.h index d363b97..95ec14d 100644 --- a/05/tcc-0.9.27/config.h +++ b/05/tcc-0.9.27/config.h @@ -1,8 +1,10 @@ #define TCC_VERSION "0.9.27" #define CONFIG_TCC_STATIC 1 -#define CONFIG_TCC_ELFINTERP "/XXX" -#define CONFIG_TCC_CRT_PREFIX "/XXX" -#define CONFIG_SYSROOT "/XXX" +//#define CONFIG_TCC_ELFINTERP "/XXX" +//#define CONFIG_TCC_CRT_PREFIX "/XXX" +//#define CONFIG_SYSROOT "/XXX" #define inline #define TCC_TARGET_X86_64 1 #define ONE_SOURCE 1 +#define CONFIG_LDDIR "lib/x86_64-linux-gnu" +#define CONFIG_TCCDIR "/usr/local/lib/tcc-bootstrap" diff --git a/05/tcc-0.9.27/lib/libtcc1.c b/05/tcc-0.9.27/lib/libtcc1.c index 0e46618..633696b 100644 --- a/05/tcc-0.9.27/lib/libtcc1.c +++ b/05/tcc-0.9.27/lib/libtcc1.c @@ -613,10 +613,17 @@ unsigned long long __fixunsxfdi (long double a1) return 0; } + +static long double negate_ld(long double d) { + register unsigned long long *p = (unsigned long long *)&d; + p[1] ^= 1ul<<15; + return *(long double *)p; +} + long long __fixxfdi (long double a1) { long long ret; int s; - ret = __fixunsxfdi((s = a1 >= 0) ? a1 : -a1); + ret = __fixunsxfdi((s = a1 >= 0) ? a1 : negate_ld(a1)); return s ? ret : -ret; } #endif /* !ARM */ diff --git a/05/tcc-0.9.27/stdlib.h b/05/tcc-0.9.27/stdlib.h index b93d4aa..cbfd0b5 100644 --- a/05/tcc-0.9.27/stdlib.h +++ b/05/tcc-0.9.27/stdlib.h @@ -73,12 +73,25 @@ void *realloc(void *ptr, size_t size) { free(ptr); return NULL; } +#if 0 + // this (better) implementation doesn't seem to be copying stuff to the + // new mapping properly uint64_t *memory = (char *)ptr - 16; uint64_t old_size = *memory; uint64_t *new_memory = _mremap(memory, old_size, size, MREMAP_MAYMOVE); if ((uint64_t)new_memory > 0xffffffffffff0000) return NULL; *new_memory = size; return (char *)new_memory + 16; +#endif + + uint64_t *memory = (char *)ptr - 16; + uint64_t old_size = *memory; + void *new = malloc(size); + char *new_dat = (char *)new + 16; + *(uint64_t *)new = size; + memcpy(new_dat, ptr, old_size); + free(ptr); + return new_dat; } diff --git a/05/tcc-0.9.27/tcc.h b/05/tcc-0.9.27/tcc.h index c7f6c72..6c3c10c 100644 --- a/05/tcc-0.9.27/tcc.h +++ b/05/tcc-0.9.27/tcc.h @@ -30,14 +30,15 @@ #include <string.h> #include <errno.h> #include <math.h> -#ifdef __GNUC__ +#include <float.h> +#if defined __GNUC__ || defined __TINYC__ #include <fcntl.h> #endif #include <setjmp.h> #include <time.h> #ifndef _WIN32 -#ifdef __GNUC__ +#if defined __GNUC__ || defined __TINYC__ # include <unistd.h> # include <sys/time.h> #endif @@ -211,7 +212,8 @@ extern long double strtold (const char *__nptr, char **__endptr); # define CONFIG_TCC_SYSINCLUDEPATHS \ "{B}/include" \ ":" ALSO_TRIPLET(CONFIG_SYSROOT "/usr/local/include") \ - ":" ALSO_TRIPLET(CONFIG_SYSROOT "/usr/include") + ":" ALSO_TRIPLET(CONFIG_SYSROOT "/usr/include") \ + ":/usr/include/x86_64-linux-gnu" # endif #endif diff --git a/05/tcc-0.9.27/tccgen.c b/05/tcc-0.9.27/tccgen.c index 7f5d1e6..ecb7812 100644 --- a/05/tcc-0.9.27/tccgen.c +++ b/05/tcc-0.9.27/tccgen.c @@ -2458,6 +2458,16 @@ static void gen_cast_s(int t) gen_cast(&type); } +static long double negate_ld(long double d) { + #if LDBL_MANT_DIG == 64 + register unsigned long long *p = (unsigned long long *)&d; + p[1] ^= 1ul<<15; + return *(long double *)p; + #else + return -d; + #endif +} + static void gen_cast(CType *type) { int sbt, dbt, sf, df, c, p; @@ -2499,12 +2509,12 @@ static void gen_cast(CType *type) if ((sbt & VT_UNSIGNED) || !(vtop->c.i >> 63)) vtop->c.ld = vtop->c.i; else - vtop->c.ld = -(long double)-vtop->c.i; + vtop->c.ld = negate_ld((long double)-vtop->c.i); } else if(!sf) { if ((sbt & VT_UNSIGNED) || !(vtop->c.i >> 31)) vtop->c.ld = (uint32_t)vtop->c.i; else - vtop->c.ld = -(long double)-(uint32_t)vtop->c.i; + vtop->c.ld = negate_ld((long double)-(uint32_t)vtop->c.i); } if (dbt == VT_FLOAT) @@ -19,7 +19,12 @@ static void output_md_text(FILE *out, int *flags, int line_number, const char *t for (p = text; *p; ++p) { if ((*flags & FLAG_CODE) && *p != '`') { - putc(*p, out); + switch (*p) { + case '<': fprintf(out, "<"); break; + case '>': fprintf(out, ">"); break; + case '&': fprintf(out, "&"); break; + default: putc(*p, out); break; + } continue; } switch (*p) { @@ -198,10 +203,18 @@ int main(int argc, char **argv) { fprintf(out, "<pre><code>\n"); while (fgets(line, sizeof line, in)) { + char *p; ++line_number; if (strncmp(line, "```", 3) == 0) break; - fprintf(out, "%s", line); + for (p = line; *p; ++p) { + switch (*p) { + case '<': fprintf(out, "<"); break; + case '>': fprintf(out, ">"); break; + case '&': fprintf(out, "&"); break; + default: fputc(*p, out); break; + } + } } fprintf(out, "</code></pre>\n"); |