From 0128e6811c92dd6f659fcc6dba849b9a4d87e4be Mon Sep 17 00:00:00 2001 From: pommicket Date: Sat, 14 Aug 2021 14:06:43 -0400 Subject: undeclared functions, etc. etc. --- .gitignore | 6 + README.md | 181 ++++++++++++++++++++++++++ main.c | 424 +++++++++++++++++++++++++++++++++++++------------------------ 3 files changed, 442 insertions(+), 169 deletions(-) create mode 100644 README.md diff --git a/.gitignore b/.gitignore index b7645df..6de5a42 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,11 @@ *.so +*.so.* +*.o out.c +sdl.c +x11.c +math.c +*.asm dlsub tags TAGS diff --git a/README.md b/README.md new file mode 100644 index 0000000..7ff7ea3 --- /dev/null +++ b/README.md @@ -0,0 +1,181 @@ +# dlsub + +A tool for replacing a subset of functions in dynamic libraries. + +Let's say you're meddling around with a program that uses +[SDL](https://libsdl.org). One thing you might want to do is replace an SDL +function (e.g. `SDL_SetWindowTitle`) with your own function so you have control +over it (e.g. you can set your own special window title). + +This is possible on most operating systems. On Unix-like systems, you can use +the `LD_LIBRARY_PATH` environment variable to fool an application into using +your own dynamic library instead of the dynamic library it was intending to use. +On Windows, you can create a DLL in the same directory as the executable with +the same name as the one you want to replace. But the issue with this is you +probably only want to replace a few functions, and you still want access to the +original library functions (e.g. the *real* `SDL_SetWindowTitle`). This is where +dlsub comes in. + +## Dependencies + +You will need: + +- [nasm](https://nasm.us) +- a C compiler + +To install these on Ubuntu/Debian: + +``` +sudo apt install nasm tcc +``` + +On Unix-like systems, the default is to use TCC (for faster preprocessing and +less likelihood of weird syntax messing dlsub up). You can, however, override +this by setting the C_PREPROCESSOR environment variable. + +## Figuring out which library file is being used + +On Windows, it may just be a DLL file in the same directory as the exe. +Otherwise, you can install [depends.exe](https://www.dependencywalker.com/); +good luck. + +On Unix-like systems, if you want to know what specific library files an +executable is using, run: + +```bash +ldd +``` + +## Usage + +The standard usage of dlsub is: + +```bash +dlsub -I -i -l -o +``` + +You can see `dlsub --help` for a list of all options. + +You can specify multiple header files if the library has more than one. +Here is an example invocation for replacing SDL: + +```bash +dlsub --no-warn -l /usr/lib/x86_64-linux-gnu/libSDL2-2.0.so -I /usr/include/SDL2 -i SDL.h -i SDL_syswm.h -i SDL_vulkan.h -C -DSDL_DISABLE_IMMINTRIN_H -o sdl +``` + +(the `-DSDL_DISABLE_IMMINTRIN_H` is needed for tcc, and it also speeds up +processing) + +You should now get a file called `sdl.c` and another one called `sdl.asm`. +Now let's say you want to replace `SDL_SetWindowTitle`. First, delete the line +in `sdl.asm`: + +``` +global SDL_SetWindowTitle:function +``` + +(This deletes the default replacement, i.e. to redirect to the real SDL +function). +Now at the end of sdl.c, add: + +```c +DLSUB_EXPORT void SDL_SetWindowTitle(SDL_Window *window, const char *title) { + REAL_SDL_SetWindowTitle(window, "substitute title"); +} +``` + +The `DLSUB_EXPORT` ensures that the function is exported out to the dynamic +library (on Windows, where that distinction is made). + +You can now compile libSDL2-2.0.so.0 on Linux, with: +``` +nasm -f elf64 sdl.asm +cc -fPIC -shared sdl.o sdl.c -o libSDL2-2.0.so.0 -I/usr/include/SDL2 +``` + +And run a program that uses SDL like this: +``` +LD_LIBRARY_PATH=/directory/where/your/library/file/is ./some_application +``` + +Note that dlsub *cannot* handle dynamic libraries' objects (e.g. +`extern int foo;`), so if there are any you will have to make your own +substitutes for those. + +## What's with the assembly file? + +dlsub needs `nasm` in order to work. This is partly because of varargs +functions: In C, there's no way of redirecting one varargs +function to another. + +Also, if for whatever reason there's a function in the dynamic library +that's not defined in any header file, it would be impossible to keep it working +without assembly. + +## More examples... + +### Replacing `XNextEvent` from libX11 + +```bash +dlsub --no-warn -l /usr/lib/x86_64-linux-gnu/libX11.so.6 -I /usr/include/X11 -i Xlib.h -i Xutil.h -o x11 +``` + +(you can ignore the warnings). + +Delete the line from x11.asm: + +``` +global XNextEvent:function +``` + +Add to the bottom of x11.c: + +```c +/* let's hope nobody needs to use these */ +void *_XCreateMutex_fn, *_XFreeMutex_fn, *_XLockMutex_fn, + *_XUnlockMutex_fn, *_Xglobal_lock; + +int XNextEvent(Display *dpy, XEvent *event) { + int ret = REAL_XNextEvent(dpy, event); + /* + change all key events to pressing "v" + (may be a different key for non-QWERTY keyboards) + */ + if (event->type == KeyPress || event->type == KeyRelease) + event->xkey.keycode = 55; + return ret; +} +``` + +```bash +nasm -f elf64 x11.asm +cc -fPIC -shared x11.c x11.o -o libX11.so.6 -I/usr/include/X11 +``` + +### Replacing `exp` from libm + +Here's a silly example. This could cause some... interesting behavior. + +Note that because of glibc name-mangling you can only replace *some* libm +functions. + +```bash +dlsub --no-warn -l /lib/x86_64-linux-gnu/libm.so.6 -I /usr/include -i math.h -o math +``` + +Delete from math.asm: +``` +global exp:function +``` + +Add to math.c: +``` +double exp(double x) { + return 2.0 * x; +} +``` + +``` +nasm -f elf64 math.asm +cc -fPIC -shared math.c math.o -o libm.so.6 +``` diff --git a/main.c b/main.c index 3942f98..cf3ebdf 100644 --- a/main.c +++ b/main.c @@ -1,9 +1,5 @@ #define VERSION "0.0" -#define MAX_HEADERS 2000 -#define MAX_LIBS 2000 -#define MAX_INCLUDES 2000 - #if __GNUC__ #define UNUSED __attribute__((unused)) #else @@ -124,9 +120,11 @@ static void show_help_text_and_exit(const char *program_name) { "\t-i
Add a header file to be processed.\n" "\t-I Add an include directory when preprocessing header files.\n" "\t-l Set the dynamic library file you want to replace.\n" - "\t-o Set the output file.\n" + "\t-o Set the output file name.\n" + "\t-C Add an argument for the C preprocessor.\n" ); printf( + "\t--no-warn Disable warnings if a function isn't found in a header file.\n" "\t--help Show this help text and exit.\n" "\t--version Show version number and exit.\n" "Environment variables:\n" @@ -233,17 +231,16 @@ static void symbol_hash_table_free(SymbolHashTable *table) { } int main(int argc, char **argv) { - static int headers[MAX_HEADERS]; - static int includes[MAX_INCLUDES]; char *preprocessed_headers; size_t preprocessed_headers_len = 0; const char *preprocessor_program = C_PREPROCESSOR_DEFAULT; - const char *output_filename = NULL; + const char *output_name = NULL; const char *input_filename = NULL; + int no_warn = 0; SymbolHashTable all_symbols = {0}; /* all symbols in all provided libraries */ - int i, n_headers = 0, n_includes = 0; + int i; { char *p = getenv("C_PREPROCESSOR"); @@ -251,88 +248,86 @@ int main(int argc, char **argv) { preprocessor_program = p; } - /* parse arguments */ - for (i = 1; i < argc; ++i) { - if (argv[i][0] == '-') { - switch (argv[i][1]) { - case 'i': - if (n_headers >= MAX_HEADERS) { - fprintf(stderr, "Too many headers!\n"); - exit(-1); - } else if (i < argc-1) { - headers[n_headers++] = i+1; - ++i; - } else { - fprintf(stderr, "-i must be followed by a file name.\n"); - exit(-1); - } - break; - case 'l': - if (input_filename) { - fprintf(stderr, "-l specified multiple times.\n"); - exit(-1); - } else if (i < argc-1) { - input_filename = argv[i+1]; - ++i; - } else { - fprintf(stderr, "-l must be followed by a file name.\n"); - exit(-1); - } - break; - case 'I': - if (n_includes >= MAX_INCLUDES) { - fprintf(stderr, "Too many includes!\n"); - exit(-1); - } else if (i < argc-1) { - includes[n_includes++] = i+1; - ++i; - } else { - fprintf(stderr, "-I must be followed by a directory name.\n"); - exit(-1); - } - break; - case 'o': - if (output_filename) { - fprintf(stderr, "-o specified twice.\n"); - exit(-1); - } else if (i < argc-1) { - output_filename = argv[i+1]; - ++i; - } else { - fprintf(stderr, "-o must be followed by a file name.\n"); - exit(-1); - } - break; - case '-': - if (strcmp(argv[i], "--help") == 0) { - show_help_text_and_exit(argv[0]); - } else if (strcmp(argv[i], "--version") == 0) { - printf(VERSION_TEXT); - exit(0); + { + /* parse arguments */ + for (i = 1; i < argc; ++i) { + if (argv[i][0] == '-') { + switch (argv[i][1]) { + case 'i': + if (i < argc-1) { + ++i; + } else { + fprintf(stderr, "-i must be followed by a file name.\n"); + exit(-1); + } + break; + case 'l': + if (i < argc-1) { + input_filename = argv[i+1]; + ++i; + } else { + fprintf(stderr, "-l must be followed by a file name.\n"); + exit(-1); + } + break; + case 'I': + if (i < argc-1) { + ++i; + } else { + fprintf(stderr, "-I must be followed by a directory name.\n"); + exit(-1); + } + break; + case 'C': + if (i < argc-1) { + ++i; + } else { + fprintf(stderr, "-I must be followed by a directory name.\n"); + exit(-1); + } + break; + case 'o': + if (output_name) { + fprintf(stderr, "-o specified twice.\n"); + exit(-1); + } else if (i < argc-1) { + output_name = argv[i+1]; + ++i; + } else { + fprintf(stderr, "-o must be followed by a file name.\n"); + exit(-1); + } + break; + case '-': + if (strcmp(argv[i], "--help") == 0) { + show_help_text_and_exit(argv[0]); + } else if (strcmp(argv[i], "--version") == 0) { + printf(VERSION_TEXT); + exit(0); + } else if (strcmp(argv[i], "--no-warn") == 0) { + no_warn = 1; + } else goto unrecognized; + break; + default: + unrecognized: + fprintf(stderr, "Unrecognized flag: '%s'.\n", argv[i]); + break; } - break; + } else { + fprintf(stderr, "Stray argument (#%d): '%s'.\n", i, argv[i]); + exit(-1); } - } else { - fprintf(stderr, "Stray argument (#%d): '%s'.\n", i, argv[i]); - exit(-1); + } + if (!input_filename) { + show_help_text_and_exit(argv[0]); } } - if (!output_filename) { - output_filename = "out.c"; + if (!output_name) { + output_name = "out"; } - if (!input_filename || n_headers == 0) { - show_help_text_and_exit(argv[0]); - } - /* check that files exist */ - for (i = 0; i < n_headers; ++i) { - if (!file_is_readable(argv[headers[i]])) { - fprintf(stderr, "Can't open provided header file: '%s'.\n", argv[headers[i]]); - exit(1); - } - } if (!file_is_readable(input_filename)) { fprintf(stderr, "Can't open provided dynamic library file: '%s'.\n", input_filename); exit(1); @@ -471,7 +466,9 @@ int main(int argc, char **argv) { perror("Couldn't create compiler process"); exit(2); case 0: { - static char *cc_argv[2*MAX_INCLUDES + 10]; + #define MAX_CC_ARGV 4000 + static char *cc_argv[MAX_CC_ARGV+1]; + int a = 0; /* child */ close(pipefd[1]); /* don't need write end of pipe */ if (dup2(pipefd[0], 0) == -1) { @@ -482,15 +479,28 @@ int main(int argc, char **argv) { perror("Couldn't redirect C compiler's stdout to file"); exit(2); } - cc_argv[0] = (char *)preprocessor_program; - cc_argv[1] = "-E"; - cc_argv[2] = "-"; - cc_argv[3] = "-D"; - cc_argv[4] = "SDL_DISABLE_IMMINTRIN_H"; - for (i = 0; i < n_includes; ++i) { - cc_argv[5+2*i+0] = "-I"; - cc_argv[5+2*i+1] = argv[includes[i]]; + + cc_argv[a++] = (char *)preprocessor_program; + cc_argv[a++] = "-E"; + cc_argv[a++] = "-"; + for (i = 1; i < argc-1; ++i) { + if (strcmp(argv[i], "-I") == 0) { + if (a + 2 > MAX_CC_ARGV) { + fprintf(stderr, "Too many compiler arguments.\n"); + exit(-1); + } + cc_argv[a++] = "-I"; + cc_argv[a++] = argv[i+1]; + } else if (strcmp(argv[i], "-C") == 0) { + if (a + 1 > MAX_CC_ARGV) { + fprintf(stderr, "Too many compiler arguments.\n"); + exit(-1); + } + cc_argv[a++] = argv[i+1]; + } + if (argv[i][0] == '-' && argv[i][1] != '-') ++i; } + cc_argv[a++] = NULL; if (execv(preprocessor_program, cc_argv) == -1) { perror("Couldn't start C compiler"); exit(2); @@ -502,31 +512,51 @@ int main(int argc, char **argv) { /* parent */ close(pipefd[0]); /* don't need read end of pipe */ - for (i = 0; i < n_headers; ++i) { - const char *header = argv[headers[i]]; - int fd = open(header, O_RDONLY); - char buf[4096]; - ssize_t bytes_read; - - if (fd == -1) { - char prefix[128]; - sprintf(prefix, "Couldn't open %.100s", header); - perror(prefix); - exit(2); - } - - while ((bytes_read = read(fd, buf, sizeof buf)) > 0) { - write(pipefd[1], buf, (size_t)bytes_read); - } + for (i = 1; i < argc-1; ++i) { + if (strcmp(argv[i], "-i") == 0) { + const char *header = argv[i+1]; + int fd = open(header, O_RDONLY); + char buf[4096]; + ssize_t bytes_read; + + if (fd == -1) { + /* check include directories */ + char path[520]; + int j; + for (j = 1; j < argc-1; ++j) { + if (strcmp(argv[j], "-I") == 0) { + const char *dir = argv[j+1]; + sprintf(path, "%.256s/%.256s", dir, header); + fd = open(path, O_RDONLY); + if (fd != -1) break; + } + if (argv[j][0] == '-' && argv[j][1] != '-') ++j; + } + + if (fd == -1) { + char prefix[128]; + sprintf(prefix, "Couldn't open %.100s", header); + perror(prefix); + kill(SIGKILL, compiler_process); + exit(2); + } + } + + while ((bytes_read = read(fd, buf, sizeof buf)) > 0) { + write(pipefd[1], buf, (size_t)bytes_read); + } - if (bytes_read < 0) { - char prefix[128]; - sprintf(prefix, "Error reading %.100s", header); - perror(prefix); - exit(2); - } + if (bytes_read < 0) { + char prefix[128]; + sprintf(prefix, "Error reading %.100s", header); + perror(prefix); + kill(SIGKILL, compiler_process); + exit(2); + } - close(fd); + close(fd); + } + if (argv[i][0] == '-' && argv[i][1] != '-') ++i; } close(pipefd[1]); @@ -572,15 +602,76 @@ int main(int argc, char **argv) { { - FILE *output = fopen(output_filename, "w"); + FILE *c_output; + FILE *nasm_output; size_t c = 0; const char *semicolon; - if (!output) { - perror("Couldn't open output file"); - exit(1); + + { + char filename[1024]; + + sprintf(filename, "%.1000s.c", output_name); + c_output = fopen(filename, "w"); + if (!c_output) { + perror("Couldn't open C output file"); + exit(1); + } + + sprintf(filename, "%.1000s.asm", output_name); + nasm_output = fopen(filename, "w"); + if (!nasm_output) { + perror("Couldn't open nasm output file"); + exit(1); + } } + for (i = 1; i < argc-1; ++i) { + if (strcmp(argv[i], "-i") == 0) { + fprintf(c_output, "#include <%s>\n", argv[i+1]); + } + if (argv[i][0] == '-' && argv[i][1] != '-') ++i; + } + + fprintf(c_output, "#define DLSUB_REAL_DL_NAME \"%s\"\n", input_filename); + + fprintf(c_output, + "static void dlsub_init(void);\n" + "\n" + "#if __unix__\n" + "#include \n" + "#define DLSUB_GET_DLHANDLE(filename) dlopen(filename, RTLD_LAZY)\n" + "#define DLSUB_GET_SYM(handle, name) ((void(*)(void))dlsym(handle, name))\n" + "#define DLSUB_EXPORT\n" + "static void __attribute__((constructor)) dlsub_constructor(void) {\n" + "\tdlsub_init();\n" + "}\n" + ); + fprintf(c_output, + "#elif _WIN32\n" + "extern void *__stdcall LoadLibraryA(const char *);\n" + "extern int (*__stdcall GetProcAddress(void *, const char *))(void);\n" + "#define DLSUB_GET_DLHANDLE LoadLibraryA\n" + "#define DLSUB_GET_SYM GetProcAddress\n" + "#define DLSUB_EXPORT __declspec((dllexport))\n" + "unsigned __stdcall DllMain(void *instDLL, unsigned reason, void *_reserved) {\n" + "\t(void)instDLL; (void)_reserved;\n" + "\tswitch (reason) {\n" + "\tcase 1: /* DLL loaded */\n" + "\t\tdlsub_init();\n" + "\t\tbreak;\n" + "\tcase 0: /* DLL unloaded */\n" + "\t\tbreak;\n" + "}\n" + ); + fprintf(c_output, + "#else\n" + "#error \"Unrecognized OS.\"\n" + "#endif\n" + ); + + fprintf(c_output, "\n\n"); + while ((semicolon = memchr(preprocessed_headers + c, ';', preprocessed_headers_len - c))) { char statement_data[1024], *statement = statement_data; @@ -640,9 +731,9 @@ int main(int argc, char **argv) { /* remove duplicate/unnecessary whitespace */ for (in = statement, out = statement; *in; ++in) { - if (in[0] == ' ' && strchr(" ()*,", in[1])) { + if (in[0] == ' ' && strchr(" (){}*,", in[1])) { continue; - } else if (strchr(" ()*,", in[0]) && in[1] == ' ') { + } else if (strchr(" (){}*,", in[0]) && in[1] == ' ') { *out++ = *in; while (in[1] == ' ') ++in; } else { @@ -659,6 +750,11 @@ int main(int argc, char **argv) { statement[strlen(statement)-1] = '\0'; } + while (statement[0] == '}') { + /* this can happen with inline functions */ + ++statement; + } + /* remove "extern" at beginning */ if (strncmp(statement, "extern ", 7) == 0) { statement += 7; @@ -684,10 +780,8 @@ int main(int argc, char **argv) { memmove(attr, p, (size_t)(statement + strlen(statement) + 1 - p)); } } - - /* @TODO(windows): remove __cdecl, et al. */ } - + if ( /* these conditions aren't airtight but practically speaking they're good */ strlen(statement) < 5 /* shortest possible function declaration is A f(); */ @@ -735,9 +829,9 @@ int main(int argc, char **argv) { /* already processed this function */ } else { entry->declared = 1; - fprintf(output, + fprintf(c_output, "typedef %.*s (*PTR_%.*s)%.*s;\n" - "static PTR_%.*s REAL_%.*s;\n", + "PTR_%.*s REAL_%.*s;\n", (int)(func_name - statement), statement, (int)(func_name_end - func_name), @@ -757,61 +851,53 @@ int main(int argc, char **argv) { c = (size_t)(semicolon - preprocessed_headers); ++c; } - fprintf(output, "\n\n\n" - "static void dlsub_init(void);\n" - "\n" - "#if __unix__\n" - "#include \n" - "#define DLSUB_GET_DLHANDLE(filename) dlopen(filename, RTLD_LAZY)\n" - "#define DLSUB_GET_SYM dlsym\n" - "static void __attribute__((constructor)) dlsub_constructor(void) {\n" - "\tdlsub_init();\n" - "}\n" - ); - fprintf(output, - "#elif _WIN32\n" - "extern void *__stdcall LoadLibraryA(const char *);\n" - "extern int (*__stdcall GetProcAddress(void *, const char *))(void);\n" - "#define DLSUB_GET_DLHANDLE LoadLibraryA\n" - "#define DLSUB_GET_SYM GetProcAddress\n" - "unsigned __stdcall DllMain(void *instDLL, unsigned reason, void *_reserved) {\n" - "\t(void)instDLL; (void)_reserved;\n" - "\tswitch (reason) {\n" - "\tcase 1: /* DLL loaded */\n" - "\t\tdlsub_init();\n" - "\t\tbreak;\n" - "\tcase 0: /* DLL unloaded */\n" - "\t\tbreak;\n" - "}\n" - ); - fprintf(output, - "#else\n" - "#error \"Unrecognized OS.\"\n" - "#endif\n" - ); - fprintf(output, "static void dlsub_init(void) {\n" - "\tvoid *handle = DLSUB_GET_DLHANDLE(\"%s\");\n", - input_filename); { size_t s; + const SymbolHashEntry *entry; + const char *symbol; + + fprintf(nasm_output, "default rel\n"); + for (s = 0; s < all_symbols.n_entries; ++s) { - const SymbolHashEntry *entry = all_symbols.entries[s]; - const char *symbol; + entry = all_symbols.entries[s]; if (!entry) continue; symbol = entry->symbol; - if (entry->declared) { - fprintf(output, "\tREAL_%s = (PTR_%s)DLSUB_GET_SYM(handle, \"%s\");\n", symbol, symbol, symbol); - } else { - fprintf(stderr, "Warning: Function '%s' declared in library, not found in any header file. It will not be made available.\n", symbol); + fprintf(nasm_output, "extern REAL_%s\n", symbol); + if (!entry->declared) { + fprintf(c_output, "void (*REAL_%s)(void);\n", symbol); + if (!no_warn) + fprintf(stderr, "Warning: Function '%s' declared in library, not found in any header file. It will not be usable from C.\n", symbol); } } + + fprintf(nasm_output, "section .text\n"); + fprintf(c_output, "static void dlsub_init(void) {\n" + "\tvoid *handle = DLSUB_GET_DLHANDLE(DLSUB_REAL_DL_NAME);\n"); + + for (s = 0; s < all_symbols.n_entries; ++s) { + entry = all_symbols.entries[s]; + if (!entry) continue; + symbol = entry->symbol; + fprintf(c_output, "\tREAL_%s = (%s%s)DLSUB_GET_SYM(handle, \"%s\");\n", symbol, + entry->declared ? "PTR_" : "void (*)(void)", entry->declared ? symbol : "", symbol); + fprintf(nasm_output, "global %s:function\n", symbol); + } + + + for (s = 0; s < all_symbols.n_entries; ++s) { + entry = all_symbols.entries[s]; + if (!entry) continue; + symbol = entry->symbol; + fprintf(nasm_output, "%s: mov r11, [REAL_%s wrt ..gotpc]\njmp [r11]\n", symbol, symbol); + } } - fprintf(output, "}\n"); + fprintf(c_output, "}\n"); - fclose(output); + fclose(c_output); + fclose(nasm_output); } symbol_hash_table_free(&all_symbols); -- cgit v1.2.3