From 82ef8c5f715a92e6e6c1708b5706332066e9edc8 Mon Sep 17 00:00:00 2001 From: pommicket Date: Sat, 14 Aug 2021 01:38:09 -0400 Subject: initial commit --- .gitignore | 11 + Makefile | 2 + main.c | 820 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 833 insertions(+) create mode 100644 .gitignore create mode 100644 Makefile create mode 100644 main.c diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b7645df --- /dev/null +++ b/.gitignore @@ -0,0 +1,11 @@ +*.so +out.c +dlsub +tags +TAGS +*~ +*.exe +*.ilk +*.lib +*.exp +*.dll diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..324430b --- /dev/null +++ b/Makefile @@ -0,0 +1,2 @@ +dlsub: main.c + $(CC) -O0 -g -o dlsub main.c -std=c89 -Wpedantic -pedantic -Wall -Wextra -Wshadow -Wconversion -Wimplicit-fallthrough diff --git a/main.c b/main.c new file mode 100644 index 0000000..3942f98 --- /dev/null +++ b/main.c @@ -0,0 +1,820 @@ +#define VERSION "0.0" + +#define MAX_HEADERS 2000 +#define MAX_LIBS 2000 +#define MAX_INCLUDES 2000 + +#if __GNUC__ +#define UNUSED __attribute__((unused)) +#else +#define UNUSED +#endif + +#if __unix__ + #define _POSIX_C_SOURCE 200809L + #include + #include + #include + #include + #include + #include + #include + #include + #include + #include + #include + #define C_PREPROCESSOR_DEFAULT "/usr/bin/tcc" + + static int file_is_readable(const char *name) { + return access(name, R_OK) == 0; + } + + #if __GNUC__ + #pragma GCC diagnostic push + #pragma GCC diagnostic error "-Wpadded" + #endif + typedef struct { + unsigned magic; + unsigned char class; + unsigned char endianness; + unsigned char version; + unsigned char abi; + unsigned char abi_version; + char _pad1[7]; + unsigned short type; + unsigned short architecture; + unsigned version2; + size_t entry; + size_t phoff; + size_t shoff; + unsigned flags; + unsigned short ehsize; + unsigned short phentsize; + unsigned short phnum; + unsigned short shentsize; + unsigned short shnum; + unsigned short shstrndx; + } ELFHeader; + + typedef struct { + unsigned name; + unsigned type; + size_t flags; + size_t addr; + size_t offset; + size_t size; + unsigned link; + unsigned info; + size_t addralign; + size_t entsize; + } ELFSectionHeader; + + #if LONG_MAX == 0x7fffffff + /* 32-bit struct */ + typedef struct { + unsigned name; + size_t value; + size_t size; + unsigned char info; + unsigned char other; + unsigned short shndx; + } ELFSym; + #else + /* 64-bit struct */ + typedef struct { + unsigned name; + unsigned char info; + unsigned char other; + unsigned short shndx; + size_t value; + size_t size; + } ELFSym; + #endif + + #if __GNUC__ + #pragma GCC diagnostic pop + #endif + +#elif _WIN32 + #define C_PREPROCESSOR_DEFAULT "cl" + #include + #include + #include + #include + #include + #include + #error "@TODO" +#else + #error "Unsupported operating system." +#endif + + +#define VERSION_TEXT "dlsub version " VERSION "\n" + + + +static void show_help_text_and_exit(const char *program_name) { + printf(VERSION_TEXT + "\n"); + printf( + "Usage: %s -i
-l ...\n", + program_name); + printf( + "Options:\n" + "\t-i
Add a header file to be processed.\n" + "\t-I Add an include directory when preprocessing header files.\n" + "\t-l Set the dynamic library file you want to replace.\n" + "\t-o Set the output file.\n" + ); + printf( + "\t--help Show this help text and exit.\n" + "\t--version Show version number and exit.\n" + "Environment variables:\n" + "\tC_PREPROCESSOR - Program to be used for C preprocessing (default: '" C_PREPROCESSOR_DEFAULT "')\n" + ); + exit(0); +} + +/* can `c' appear in a C identifier? */ +static int is_ident(int c) { + return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_'; +} + +static unsigned long str_hash(const char *str) { + unsigned long hash = 5381; + const unsigned char *p; + + for (p = (const unsigned char *)str; *p; ++p) + hash = ((hash << 5) + hash) + *p; + + return hash; +} + + +typedef struct { + unsigned char declared; /* have we found this in a header file */ + char symbol[1]; +} SymbolHashEntry; + +typedef struct { + size_t n_entries; + size_t n_present_entries; + SymbolHashEntry **entries; +} SymbolHashTable; + +static void symbol_hash_table_grow(SymbolHashTable *table) { + size_t i, new_n_entries = 2*table->n_entries + 55; + SymbolHashEntry **new_entries = calloc(new_n_entries, sizeof *new_entries); + if (!new_entries) { + fprintf(stderr, "Out of memory.\n"); + exit(2); + } + + for (i = 0; i < table->n_entries; ++i) { + SymbolHashEntry *entry = table->entries[i]; + unsigned long p; + if (!entry) continue; + p = str_hash(entry->symbol) % new_n_entries; + while (new_entries[p]) { + ++p; + if (p >= new_n_entries) p -= new_n_entries; + } + new_entries[p] = entry; + } + + free(table->entries); + table->entries = new_entries; + table->n_entries = new_n_entries; +} + +static void symbol_hash_table_insert(SymbolHashTable *table, const char *sym_name) { + unsigned long p; + SymbolHashEntry *entry; + if (table->n_present_entries * 2 >= table->n_entries) { + symbol_hash_table_grow(table); + } + + p = str_hash(sym_name) % table->n_entries; + while ((entry = table->entries[p])) { + if (strcmp(entry->symbol, sym_name) == 0) + return; /* already exists */ + + ++p; + if (p >= table->n_entries) p -= table->n_entries; + } + entry = calloc(1, sizeof *entry + strlen(sym_name) + 1); + if (!entry) { + fprintf(stderr, "Out of memory.\n"); + exit(2); + } + strcpy(entry->symbol, sym_name); + ++table->n_present_entries; + table->entries[p] = entry; +} + +static SymbolHashEntry *symbol_hash_table_get(SymbolHashTable *table, const char *name) { + unsigned long p = str_hash(name) % table->n_entries; + SymbolHashEntry *entry; + while ((entry = table->entries[p])) { + if (strcmp(entry->symbol, name) == 0) + return entry; + ++p; + if (p >= table->n_entries) p -= table->n_entries; + } + return 0; +} + +static void symbol_hash_table_free(SymbolHashTable *table) { + size_t i; + for (i = 0; i < table->n_entries; ++i) + free(table->entries[i]); + free(table->entries); + memset(table, 0, sizeof *table); +} + +int main(int argc, char **argv) { + static int headers[MAX_HEADERS]; + static int includes[MAX_INCLUDES]; + char *preprocessed_headers; + size_t preprocessed_headers_len = 0; + const char *preprocessor_program = C_PREPROCESSOR_DEFAULT; + const char *output_filename = NULL; + const char *input_filename = NULL; + + SymbolHashTable all_symbols = {0}; /* all symbols in all provided libraries */ + + int i, n_headers = 0, n_includes = 0; + + { + char *p = getenv("C_PREPROCESSOR"); + if (p) + preprocessor_program = p; + } + + /* parse arguments */ + for (i = 1; i < argc; ++i) { + if (argv[i][0] == '-') { + switch (argv[i][1]) { + case 'i': + if (n_headers >= MAX_HEADERS) { + fprintf(stderr, "Too many headers!\n"); + exit(-1); + } else if (i < argc-1) { + headers[n_headers++] = i+1; + ++i; + } else { + fprintf(stderr, "-i must be followed by a file name.\n"); + exit(-1); + } + break; + case 'l': + if (input_filename) { + fprintf(stderr, "-l specified multiple times.\n"); + exit(-1); + } else if (i < argc-1) { + input_filename = argv[i+1]; + ++i; + } else { + fprintf(stderr, "-l must be followed by a file name.\n"); + exit(-1); + } + break; + case 'I': + if (n_includes >= MAX_INCLUDES) { + fprintf(stderr, "Too many includes!\n"); + exit(-1); + } else if (i < argc-1) { + includes[n_includes++] = i+1; + ++i; + } else { + fprintf(stderr, "-I must be followed by a directory name.\n"); + exit(-1); + } + break; + case 'o': + if (output_filename) { + fprintf(stderr, "-o specified twice.\n"); + exit(-1); + } else if (i < argc-1) { + output_filename = argv[i+1]; + ++i; + } else { + fprintf(stderr, "-o must be followed by a file name.\n"); + exit(-1); + } + break; + case '-': + if (strcmp(argv[i], "--help") == 0) { + show_help_text_and_exit(argv[0]); + } else if (strcmp(argv[i], "--version") == 0) { + printf(VERSION_TEXT); + exit(0); + } + break; + } + } else { + fprintf(stderr, "Stray argument (#%d): '%s'.\n", i, argv[i]); + exit(-1); + } + } + + if (!output_filename) { + output_filename = "out.c"; + } + + if (!input_filename || n_headers == 0) { + show_help_text_and_exit(argv[0]); + } + + /* check that files exist */ + for (i = 0; i < n_headers; ++i) { + if (!file_is_readable(argv[headers[i]])) { + fprintf(stderr, "Can't open provided header file: '%s'.\n", argv[headers[i]]); + exit(1); + } + } + if (!file_is_readable(input_filename)) { + fprintf(stderr, "Can't open provided dynamic library file: '%s'.\n", input_filename); + exit(1); + } + + /* read library */ + { + const char *libname = input_filename; + FILE *fp = fopen(libname, "rb"); + + if (!fp) { + char prefix[128]; + sprintf(prefix, "Couldn't open %.100s.\n", libname); + perror(prefix); + exit(1); + } + + #if __unix__ + { + ELFHeader elf_header = {0}; + int any_dynsym = 0; + size_t dynstr_offset = 0; + unsigned shidx; + + fread(&elf_header, sizeof elf_header, 1, fp); + + if (elf_header.magic != 0x464c457f || elf_header.type != 3) { + fprintf(stderr, "%s is not an ELF dynamic library.\n", libname); + exit(2); + } + if (elf_header.endianness != 1 + || elf_header.shentsize < (sizeof(size_t) == 4 ? 0x28 : 0x40) + || elf_header.class * 4 != sizeof(size_t) + || elf_header.shoff > LONG_MAX) { + fprintf(stderr, "%s has an unsupported or invalid ELF format.\n", libname); + exit(2); + } + + { + ELFSectionHeader strtab_header = {0}, section_header = {0}; + + fseek(fp, (long)elf_header.shoff + (long)elf_header.shentsize * elf_header.shstrndx, SEEK_SET); + fread(&strtab_header, sizeof strtab_header, 1, fp); + + for (shidx = 0; shidx < elf_header.shnum; ++shidx) { + char secname[32] = {0}; + fseek(fp, (long)(elf_header.shoff + shidx * elf_header.shentsize), SEEK_SET); + + fread(§ion_header, sizeof section_header, 1, fp); + fseek(fp, (long)strtab_header.offset + (long)section_header.name, SEEK_SET); + fread(secname, 1, sizeof secname-1, fp); + if (strcmp(secname, ".dynstr") == 0) { + dynstr_offset = section_header.offset; + } + } + } + + if (dynstr_offset == 0) { + fprintf(stderr, "%s has no .dynstr section.\n", libname); + exit(2); + } + + + { + ELFSectionHeader section_header = {0}; + + for (shidx = 0; shidx < elf_header.shnum; ++shidx) { + fseek(fp, (long)(elf_header.shoff + shidx * elf_header.shentsize), SEEK_SET); + fread(§ion_header, sizeof section_header, 1, fp); + if (section_header.type == 0xB /* SHT_DYNSYM */) { + ELFSym sym = {0}; + size_t nsyms, sym_idx; + char sym_name[256] = {0}; + + any_dynsym = 1; + + if (section_header.entsize < sizeof(ELFSym) || section_header.offset > LONG_MAX) { + fprintf(stderr, "%s has an unsupported or invalid ELF format.\n", libname); + exit(2); + } + + nsyms = section_header.size / section_header.entsize; + + for (sym_idx = 0; sym_idx < nsyms; ++sym_idx) { + + fseek(fp, (long)(section_header.offset + sym_idx * section_header.entsize), SEEK_SET); + fread(&sym, sizeof sym, 1, fp); + if (sym.other == 2 || sym.other == 6 /* check visibility */ + || (sym.info & 0xf) != 2 /* check if is function */ + || (sym.info >> 4) != 1 /* make sure "bind" is global */ + || sym.value == 0 /* make sure this function is actually *from* this library */) { + /* this symbol isn't a function exported by the dynamic library. it's something else */ + } else { + fseek(fp, (long)(dynstr_offset + sym.name), SEEK_SET); + fread(sym_name, 1, sizeof sym_name - 1, fp); + /*printf("%02x %02x %08lx %08lx %s\n",sym.other,sym.info,sym.value,sym.size,sym_name);*/ + symbol_hash_table_insert(&all_symbols, sym_name); + } + } + } + } + } + + if (!any_dynsym) { + fprintf(stderr, "%s does not have a symbol table.\n", libname); + exit(2); + } + } + #else + #error "@TODO" + #endif + + fclose(fp); + } + +#if __unix__ + { + int preprocessed_headers_fd = fileno(tmpfile()); + pid_t compiler_process; + int pipefd[2]; + + if (preprocessed_headers_fd == -1) { + perror("Couldn't create temporary preprocessing output file"); + exit(2); + } + + if (pipe(pipefd) == -1) { + perror("Couldn't create pipe"); + exit(2); + } + + compiler_process = fork(); + + switch (compiler_process) { + case -1: + perror("Couldn't create compiler process"); + exit(2); + case 0: { + static char *cc_argv[2*MAX_INCLUDES + 10]; + /* child */ + close(pipefd[1]); /* don't need write end of pipe */ + if (dup2(pipefd[0], 0) == -1) { + perror("Couldn't redirect pipe to C compiler's stdin"); + exit(2); + } + if (dup2(preprocessed_headers_fd, 1) == -1) { + perror("Couldn't redirect C compiler's stdout to file"); + exit(2); + } + cc_argv[0] = (char *)preprocessor_program; + cc_argv[1] = "-E"; + cc_argv[2] = "-"; + cc_argv[3] = "-D"; + cc_argv[4] = "SDL_DISABLE_IMMINTRIN_H"; + for (i = 0; i < n_includes; ++i) { + cc_argv[5+2*i+0] = "-I"; + cc_argv[5+2*i+1] = argv[includes[i]]; + } + if (execv(preprocessor_program, cc_argv) == -1) { + perror("Couldn't start C compiler"); + exit(2); + } + } break; + default: + break; + } + + /* parent */ + close(pipefd[0]); /* don't need read end of pipe */ + for (i = 0; i < n_headers; ++i) { + const char *header = argv[headers[i]]; + int fd = open(header, O_RDONLY); + char buf[4096]; + ssize_t bytes_read; + + if (fd == -1) { + char prefix[128]; + sprintf(prefix, "Couldn't open %.100s", header); + perror(prefix); + exit(2); + } + + while ((bytes_read = read(fd, buf, sizeof buf)) > 0) { + write(pipefd[1], buf, (size_t)bytes_read); + } + + if (bytes_read < 0) { + char prefix[128]; + sprintf(prefix, "Error reading %.100s", header); + perror(prefix); + exit(2); + } + + close(fd); + } + + close(pipefd[1]); + + /* wait for compiler to finish */ + while (1) { + int status = 0; + wait(&status); + if (WIFEXITED(status)) { + int exit_status = WEXITSTATUS(status); + if (exit_status == 0) { + break; + } else { + fprintf(stderr, "C preprocessor failed with exit code %d\n", exit_status); + exit(exit_status); + } + } else if (WIFSIGNALED(status)) { + int sig = WTERMSIG(status); + fprintf(stderr, "C preprocessor terminated with signal %s (#%d)\n", strsignal(sig), sig); + exit(2); + } + } + + { + struct stat statbuf = {0}; + fstat(preprocessed_headers_fd, &statbuf); + preprocessed_headers_len = (size_t)statbuf.st_size; + preprocessed_headers = mmap(NULL, preprocessed_headers_len, PROT_READ, MAP_PRIVATE, + preprocessed_headers_fd, 0); + if (preprocessed_headers == MAP_FAILED) { + perror("Couldn't map preprocessed headers file into memory"); + exit(2); + } + } + + } +#else + #error "@TODO" +#endif + + + /* figure out functions! */ + + + { + FILE *output = fopen(output_filename, "w"); + size_t c = 0; + const char *semicolon; + + if (!output) { + perror("Couldn't open output file"); + exit(1); + } + + while ((semicolon = memchr(preprocessed_headers + c, ';', preprocessed_headers_len - c))) { + char statement_data[1024], *statement = statement_data; + + { + + int in_line_directive = 0; + + /* get rid of whitespace + line directives before actual statements */ + for (; c < preprocessed_headers_len; ++c) { + switch (preprocessed_headers[c]) { + case '\n': + if (in_line_directive) + in_line_directive = 0; + break; + case '\r': + case '\t': + case ' ': + case '\v': + break; + case '#': + in_line_directive = 1; + break; + default: + if (!in_line_directive) + goto brk; + break; + } + } + brk: + { + size_t l = (size_t)(semicolon - &preprocessed_headers[c]); + if (l > sizeof statement_data - 5) + l = sizeof statement_data - 5; + memcpy(statement, &preprocessed_headers[c], l); + statement[l] = 0; + } + } + + { + char *p; + char *in, *out; + + /* remove line directives */ + while ((p = strchr(statement, '#'))) { + char *end = strchr(p, '\n'); + if (end) { + memmove(p, end + 1, (size_t)((statement + strlen(statement) + 1) - (end + 1))); + } else { + *p = '\0'; + } + } + + /* normalize whitespace */ + for (p = statement; *p; ++p) { + if (isspace(*p)) *p = ' '; + } + + /* remove duplicate/unnecessary whitespace */ + for (in = statement, out = statement; *in; ++in) { + if (in[0] == ' ' && strchr(" ()*,", in[1])) { + continue; + } else if (strchr(" ()*,", in[0]) && in[1] == ' ') { + *out++ = *in; + while (in[1] == ' ') ++in; + } else { + *out++ = *in; + } + } + *out = 0; + + /* remove leading whitespace */ + if (*statement == ' ') + ++statement; + /* remove trailing whitespace */ + if (*statement && statement[strlen(statement)-1] == ' ') { + statement[strlen(statement)-1] = '\0'; + } + + /* remove "extern" at beginning */ + if (strncmp(statement, "extern ", 7) == 0) { + statement += 7; + } + + { + /* remove GCC's __attribute__ s */ + char *attr; + while ((attr = strstr(statement, "__attribute__(("))) { + int paren_level = 2; + + p = attr; + + p += 15; + for (; *p; ++p) { + if (paren_level == 0) break; + switch (*p) { + case '(': ++paren_level; break; + case ')': --paren_level; break; + } + } + if (*p == ' ') ++p; + memmove(attr, p, (size_t)(statement + strlen(statement) + 1 - p)); + } + } + + /* @TODO(windows): remove __cdecl, et al. */ + } + + if ( + /* these conditions aren't airtight but practically speaking they're good */ + strlen(statement) < 5 /* shortest possible function declaration is A f(); */ + || strchr(statement, '(') == NULL + || strchr(statement, ')') == NULL + || strchr(statement, '{') != NULL + || strchr(statement, '}') != NULL + || (strncmp(statement, "struct ", 6) == 0) + || (strncmp(statement, "enum ", 5) == 0) + || (strncmp(statement, "union ", 6) == 0) + || (strncmp(statement, "static ", 7) == 0) + || (strncmp(statement, "typedef ", 8) == 0) + ) { + /* not a function declaration */ + } else { + /* possibly a function declaration */ + char *func_name = statement, *func_name_end; + if (strncmp(func_name, "const ", 6) == 0) func_name += 6; + while (is_ident(*func_name)) ++func_name; + if (*func_name == ' ') ++func_name; + if (strncmp(func_name, "const ", 6) == 0) func_name += 6; + + while (*func_name) { + if (is_ident(*func_name) && func_name[1] == '(') { + /* we got it! */ + func_name_end = func_name + 1; + while (is_ident(*func_name)) + --func_name; + ++func_name; + break; + } + ++func_name; + } + + if (*func_name) { + SymbolHashEntry *entry; + + *func_name_end = '\0'; + entry = symbol_hash_table_get(&all_symbols, func_name); + *func_name_end = '('; + + if (!entry) { + /* ignore this function; it's not part of one of the libraries we're concerned with */ + } else if (entry->declared) { + /* already processed this function */ + } else { + entry->declared = 1; + fprintf(output, + "typedef %.*s (*PTR_%.*s)%.*s;\n" + "static PTR_%.*s REAL_%.*s;\n", + (int)(func_name - statement), + statement, + (int)(func_name_end - func_name), + func_name, + (int)(statement + strlen(statement) - func_name_end), + func_name_end, + + (int)(func_name_end - func_name), + func_name, + (int)(func_name_end - func_name), + func_name + ); + } + } + + } + c = (size_t)(semicolon - preprocessed_headers); + ++c; + } + fprintf(output, "\n\n\n" + "static void dlsub_init(void);\n" + "\n" + "#if __unix__\n" + "#include \n" + "#define DLSUB_GET_DLHANDLE(filename) dlopen(filename, RTLD_LAZY)\n" + "#define DLSUB_GET_SYM dlsym\n" + "static void __attribute__((constructor)) dlsub_constructor(void) {\n" + "\tdlsub_init();\n" + "}\n" + ); + fprintf(output, + "#elif _WIN32\n" + "extern void *__stdcall LoadLibraryA(const char *);\n" + "extern int (*__stdcall GetProcAddress(void *, const char *))(void);\n" + "#define DLSUB_GET_DLHANDLE LoadLibraryA\n" + "#define DLSUB_GET_SYM GetProcAddress\n" + "unsigned __stdcall DllMain(void *instDLL, unsigned reason, void *_reserved) {\n" + "\t(void)instDLL; (void)_reserved;\n" + "\tswitch (reason) {\n" + "\tcase 1: /* DLL loaded */\n" + "\t\tdlsub_init();\n" + "\t\tbreak;\n" + "\tcase 0: /* DLL unloaded */\n" + "\t\tbreak;\n" + "}\n" + ); + fprintf(output, + "#else\n" + "#error \"Unrecognized OS.\"\n" + "#endif\n" + ); + fprintf(output, "static void dlsub_init(void) {\n" + "\tvoid *handle = DLSUB_GET_DLHANDLE(\"%s\");\n", + input_filename); + + { + size_t s; + for (s = 0; s < all_symbols.n_entries; ++s) { + const SymbolHashEntry *entry = all_symbols.entries[s]; + const char *symbol; + if (!entry) continue; + symbol = entry->symbol; + if (entry->declared) { + fprintf(output, "\tREAL_%s = (PTR_%s)DLSUB_GET_SYM(handle, \"%s\");\n", symbol, symbol, symbol); + } else { + fprintf(stderr, "Warning: Function '%s' declared in library, not found in any header file. It will not be made available.\n", symbol); + } + } + } + + fprintf(output, "}\n"); + + + fclose(output); + } + + symbol_hash_table_free(&all_symbols); + + return 0; +} -- cgit v1.2.3