diff options
author | pommicket <pommicket@gmail.com> | 2023-01-01 12:05:48 -0500 |
---|---|---|
committer | pommicket <pommicket@gmail.com> | 2023-01-01 12:05:48 -0500 |
commit | 8308d6fc53ee339a0b5cac4b9b837179c61efbaa (patch) | |
tree | e4f94a5695e9d5cf035582799738dc2e75c0e779 /util.c | |
parent | 57834dc0b116a62b806781ddec101bca4cda3abb (diff) |
more reorganizing
Diffstat (limited to 'util.c')
-rw-r--r-- | util.c | 295 |
1 files changed, 73 insertions, 222 deletions
@@ -8,48 +8,52 @@ #endif #include "base.h" +#include "util.h" -// Is this character a "word" character? -static bool is_word(char32_t c) { +// on 16-bit systems, this is 16383. on 32/64-bit systems, this is 1073741823 +// it is unusual to have a string that long. +#define STRLEN_SAFE_MAX (UINT_MAX >> 2) + +bool is_word(char32_t c) { return c > WCHAR_MAX || c == '_' || iswalnum((wint_t)c); } -static bool is_digit(char32_t c) { +bool is_digit(char32_t c) { return c < WCHAR_MAX && iswdigit((wint_t)c); } -static bool is_space(char32_t c) { +bool is_space(char32_t c) { return c < WCHAR_MAX && iswspace((wint_t)c); } -static bool is_a_tty(FILE *out) { - return - #if __unix__ - isatty(fileno(out)) +bool is_a_tty(FILE *out) { + #if _WIN32 + int fd = _fileno(out); + return fd >= 0 && _isatty(fd); #else - false + int fd = fileno(out); + return fd >= 0 && isatty(fd); #endif - ; } -static const char *term_italics(FILE *out) { +const char *term_italics(FILE *out) { return is_a_tty(out) ? "\x1b[3m" : ""; } -static const char *term_bold(FILE *out) { +const char *term_bold(FILE *out) { return is_a_tty(out) ? "\x1b[1m" : ""; } -static const char *term_yellow(FILE *out) { +const char *term_yellow(FILE *out) { return is_a_tty(out) ? "\x1b[93m" : ""; } -static const char *term_clear(FILE *out) { +const char *term_clear(FILE *out) { return is_a_tty(out) ? "\x1b[0m" : ""; } -static u8 util_popcount(u64 x) { +u8 util_popcount(u64 x) { #ifdef __GNUC__ return (u8)__builtin_popcountll(x); #else @@ -62,7 +66,7 @@ static u8 util_popcount(u64 x) { #endif } -static u8 util_count_leading_zeroes32(u32 x) { +u8 util_count_leading_zeroes32(u32 x) { if (x == 0) return 32; // GCC's __builtin_clz is undefined for x = 0 #if __GNUC__ && UINT_MAX == 4294967295 return (u8)__builtin_clz(x); @@ -80,12 +84,12 @@ static u8 util_count_leading_zeroes32(u32 x) { #endif } -static bool util_is_power_of_2(u64 x) { +bool util_is_power_of_2(u64 x) { return util_popcount(x) == 1; } // for finding a character in a char32 string -static char32_t *util_mem32chr(char32_t *s, char32_t c, size_t n) { +char32_t *util_mem32chr(char32_t *s, char32_t c, size_t n) { for (size_t i = 0; i < n; ++i) { if (s[i] == c) { return &s[i]; @@ -94,7 +98,7 @@ static char32_t *util_mem32chr(char32_t *s, char32_t c, size_t n) { return NULL; } -static char32_t const *util_mem32chr_const(char32_t const *s, char32_t c, size_t n) { +const char32_t *util_mem32chr_const(const char32_t *s, char32_t c, size_t n) { for (size_t i = 0; i < n; ++i) { if (s[i] == c) { return &s[i]; @@ -103,23 +107,23 @@ static char32_t const *util_mem32chr_const(char32_t const *s, char32_t c, size_t return NULL; } -static bool str_has_prefix(char const *str, char const *prefix) { +bool str_has_prefix(const char *str, const char *prefix) { return strncmp(str, prefix, strlen(prefix)) == 0; } // e.g. "/usr/share/bla" has the path prefix "/usr/share" but not "/usr/sha" -static bool str_has_path_prefix(const char *path, const char *prefix) { +bool str_has_path_prefix(const char *path, const char *prefix) { size_t prefix_len = strlen(prefix); if (strncmp(path, prefix, prefix_len) != 0) return false; return path[prefix_len] == '\0' || strchr(ALL_PATH_SEPARATORS, path[prefix_len]); } -static bool streq(char const *a, char const *b) { +bool streq(const char *a, const char *b) { return strcmp(a, b) == 0; } -static size_t strn_len(const char *src, size_t n) { +size_t strn_len(const char *src, size_t n) { const char *p = src; // in C99 and C++11/14, calling memchr with a size larger than // the size of the object is undefined behaviour. @@ -131,7 +135,7 @@ static size_t strn_len(const char *src, size_t n) { } // duplicates at most n characters from src -static char *strn_dup(char const *src, size_t n) { +char *strn_dup(const char *src, size_t n) { size_t len = strn_len(src, n); if (n > len) n = len; @@ -144,26 +148,12 @@ static char *strn_dup(char const *src, size_t n) { } // duplicates a null-terminated string. the returned string should be passed to free() -static char *str_dup(char const *src) { +char *str_dup(const char *src) { return strn_dup(src, SIZE_MAX); } -// like snprintf, but not screwed up on windows -#define str_printf(str, size, ...) (str)[(size) - 1] = '\0', snprintf((str), (size) - 1, __VA_ARGS__) -// like snprintf, but the size is taken to be the length of the array str. -// first, check that str is actually an array -#define strbuf_printf(str, ...) assert(sizeof str != 4 && sizeof str != 8), \ - str_printf(str, sizeof str, __VA_ARGS__) -#define str_catf(str, size, ...) str_printf((str) + strlen(str), (size) - strlen(str), __VA_ARGS__) -#define strbuf_catf(str, ...) assert(sizeof str != 4 && sizeof str != 8), \ - str_catf(str, sizeof str, __VA_ARGS__) - -// on 16-bit systems, this is 16383. on 32/64-bit systems, this is 1073741823 -// it is unusual to have a string that long. -#define STRLEN_SAFE_MAX (UINT_MAX >> 2) - // safer version of strncat. dst_sz includes a null terminator. -static void strn_cat(char *dst, size_t dst_sz, char const *src, size_t src_len) { +void strn_cat(char *dst, size_t dst_sz, const char *src, size_t src_len) { size_t dst_len = strlen(dst); // make sure dst_len + src_len + 1 doesn't overflow @@ -190,12 +180,12 @@ static void strn_cat(char *dst, size_t dst_sz, char const *src, size_t src_len) } // safer version of strcat. dst_sz includes a null terminator. -static void str_cat(char *dst, size_t dst_sz, char const *src) { +void str_cat(char *dst, size_t dst_sz, const char *src) { strn_cat(dst, dst_sz, src, strlen(src)); } // safer version of strncpy. dst_sz includes a null terminator. -static void strn_cpy(char *dst, size_t dst_sz, char const *src, size_t src_len) { +void strn_cpy(char *dst, size_t dst_sz, const char *src, size_t src_len) { size_t n = src_len; // number of bytes to copy for (size_t i = 0; i < n; ++i) { if (src[i] == '\0') { @@ -216,14 +206,10 @@ static void strn_cpy(char *dst, size_t dst_sz, char const *src, size_t src_len) } // safer version of strcpy. dst_sz includes a null terminator. -static void str_cpy(char *dst, size_t dst_sz, char const *src) { +void str_cpy(char *dst, size_t dst_sz, const char *src) { strn_cpy(dst, dst_sz, src, SIZE_MAX); } -#define strbuf_cpy(dst, src) str_cpy(dst, sizeof dst, src) -#define strbuf_cat(dst, src) str_cat(dst, sizeof dst, src) - - char *a_sprintf(PRINTF_FORMAT_STRING const char *fmt, ...) ATTRIBUTE_PRINTF(1, 2); char *a_sprintf(const char *fmt, ...) { // idk if you can always just pass NULL to vsnprintf @@ -244,7 +230,7 @@ char *a_sprintf(const char *fmt, ...) { // advances str to the start of the next UTF8 character -static void utf8_next_char_const(char const **str) { +static void utf8_next_char_const(const char **str) { if (**str) { do { ++*str; @@ -252,21 +238,16 @@ static void utf8_next_char_const(char const **str) { } } -/* -returns the first instance of needle in haystack, where both are UTF-8 strings, ignoring the case of the characters, -or NULL if the haystack does not contain needle -WARNING: O(strlen(haystack) * strlen(needle)) -*/ -static char *stristr(char const *haystack, char const *needle) { +char *strstr_case_insensitive(const char *haystack, const char *needle) { size_t needle_bytes = strlen(needle), haystack_bytes = strlen(haystack); if (needle_bytes > haystack_bytes) return NULL; - char const *haystack_end = haystack + haystack_bytes; - char const *needle_end = needle + needle_bytes; + const char *haystack_end = haystack + haystack_bytes; + const char *needle_end = needle + needle_bytes; - for (char const *haystack_start = haystack; haystack_start + needle_bytes <= haystack_end; utf8_next_char_const(&haystack_start)) { - char const *p = haystack_start, *q = needle; + for (const char *haystack_start = haystack; haystack_start + needle_bytes <= haystack_end; utf8_next_char_const(&haystack_start)) { + const char *p = haystack_start, *q = needle; bool match = true; // check if p matches q @@ -288,27 +269,14 @@ static char *stristr(char const *haystack, char const *needle) { return NULL; } -static void print_bytes(u8 *bytes, size_t n) { - u8 *b, *end; +void print_bytes(const u8 *bytes, size_t n) { + const u8 *b, *end; for (b = bytes, end = bytes + n; b != end; ++b) - printf("%x ", *b); + printf("%02x ", *b); printf("\n"); } -/* -does this predicate hold for all the characters of s. predicate is int (*)(int) instead -of bool (*)(char) so that you can pass isprint, etc. to it. -*/ -static bool str_satisfies(char const *s, int (*predicate)(int)) { - char const *p; - for (p = s; *p; ++p) - if (!predicate(*p)) - return false; - return true; -} - - -static int strcmp_case_insensitive(char const *a, char const *b) { +int strcmp_case_insensitive(const char *a, const char *b) { #if _WIN32 return _stricmp(a, b); #else @@ -316,25 +284,27 @@ static int strcmp_case_insensitive(char const *a, char const *b) { #endif } -// function to be passed into qsort for case insensitive sorting -static int str_qsort_case_insensitive_cmp(const void *av, const void *bv) { - char const *const *a = av, *const *b = bv; +int str_qsort_case_insensitive_cmp(const void *av, const void *bv) { + const char *const *a = av, *const *b = bv; return strcmp_case_insensitive(*a, *b); } -// imo windows has the argument order right here #if _WIN32 -#define qsort_with_context qsort_s +void qsort_with_context(void *base, size_t nmemb, size_t size, + int (*compar)(void *, const void *, const void *), + void *arg) { + qsort_s(base, nmemb, size, compar, arg); +} #else typedef struct { int (*compar)(void *, const void *, const void *); void *context; } QSortWithContext; -static int qsort_with_context_cmp(const void *a, const void *b, void *context) { +int qsort_with_context_cmp(const void *a, const void *b, void *context) { QSortWithContext *c = context; return c->compar(c->context, a, b); } -static void qsort_with_context(void *base, size_t nmemb, size_t size, +void qsort_with_context(void *base, size_t nmemb, size_t size, int (*compar)(void *, const void *, const void *), void *arg) { QSortWithContext ctx = { @@ -345,17 +315,15 @@ static void qsort_with_context(void *base, size_t nmemb, size_t size, } #endif -// the actual file name part of the path; get rid of the containing directory. -// NOTE: the returned string is part of path, so you don't need to free it or anything. -static char const *path_filename(char const *path) { - char const *last_path_sep = strrchr(path, PATH_SEPARATOR); +const char *path_filename(const char *path) { + const char *last_path_sep = strrchr(path, PATH_SEPARATOR); if (last_path_sep) return last_path_sep + 1; // (a relative path with no path separators) return path; } -static bool path_is_absolute(char const *path) { +bool path_is_absolute(const char *path) { return path[0] == PATH_SEPARATOR #if _WIN32 || path[1] == ':' @@ -363,8 +331,7 @@ static bool path_is_absolute(char const *path) { ; } -// assuming `dir` is an absolute path, returns the absolute path of `relpath`, relative to `dir`. -static void path_full(char const *dir, char const *relpath, char *abspath, size_t abspath_size) { +void path_full(const char *dir, const char *relpath, char *abspath, size_t abspath_size) { assert(abspath_size); assert(dir[0]); abspath[0] = '\0'; @@ -386,7 +353,7 @@ static void path_full(char const *dir, char const *relpath, char *abspath, size_ while (*relpath) { size_t component_len = strcspn(relpath, ALL_PATH_SEPARATORS); - char const *component_end = relpath + component_len; + const char *component_end = relpath + component_len; size_t len = strlen(abspath); if (component_len == 1 && relpath[0] == '.') { @@ -411,18 +378,25 @@ static void path_full(char const *dir, char const *relpath, char *abspath, size_ } } -// returns true if the paths are the same. -// handles the fact that paths are case insensitive on windows. -// treats links as different from the files they point to. -static bool paths_eq(char const *path1, char const *path2) { -#if _WIN32 - return _stricmp(path1, path2) == 0; -#else +bool paths_eq(const char *path1, const char *path2) { +#if __unix__ return streq(path1, path2); +#else + char fixed_path1[8192]; + char fixed_path2[8192]; + strbuf_cpy(fixed_path1, path1); + strbuf_cpy(fixed_path2, path2); + for (size_t i = 0; fixed_path1[i]; ++i) + if (fixed_path1[i] == '/') + fixed_path1[i] = '\\'; + for (size_t i = 0; fixed_path2[i]; ++i) + if (fixed_path2[i] == '/') + fixed_path2[i] = '\\'; + return _stricmp(fixed_path1, fixed_path2) == 0; #endif } -static void change_directory(char const *path) { +void change_directory(const char *path) { #if _WIN32 _chdir(path); #else @@ -430,8 +404,7 @@ static void change_directory(char const *path) { #endif } -// returns true on success -static bool copy_file(char const *src, char const *dst) { +bool copy_file(const char *src, const char *dst) { bool success = false; FILE *src_file = fopen(src, "rb"); if (src_file) { @@ -452,125 +425,3 @@ static bool copy_file(char const *src, char const *dst) { } -static uint64_t str_hash(char const *str, size_t len) { - uint64_t hash = 0; - char const *p = str, *end = str + len; - for (; p < end; ++p) { - hash = ((hash * 1664737020647550361 + 123843) << 8) + 2918635993572506131*(uint64_t)*p; - } - return hash; -} - -typedef struct { - char *str; - size_t len; - uint64_t data[]; -} StrHashTableSlot; - -typedef StrHashTableSlot *StrHashTableSlotPtr; - -typedef struct { - StrHashTableSlot **slots; - size_t data_size; - size_t nentries; /* # of filled slots */ -} StrHashTable; - -static inline void str_hash_table_create(StrHashTable *t, size_t data_size) { - t->slots = NULL; - t->data_size = data_size; - t->nentries = 0; -} - -static StrHashTableSlot **str_hash_table_slot_get(StrHashTableSlot **slots, char const *s, size_t s_len, size_t i) { - StrHashTableSlot **slot; - size_t slots_cap = arr_len(slots); - while (1) { - assert(i < slots_cap); - slot = &slots[i]; - if (!*slot) break; - if (s && (*slot)->str && - s_len == (*slot)->len && memcmp(s, (*slot)->str, s_len) == 0) - break; - i = (i+1) % slots_cap; - } - return slot; -} - -static void str_hash_table_grow(StrHashTable *t) { - size_t slots_cap = arr_len(t->slots); - if (slots_cap <= 2 * t->nentries) { - StrHashTableSlot **new_slots = NULL; - size_t new_slots_cap = slots_cap * 2 + 10; - arr_set_len(new_slots, new_slots_cap); - memset(new_slots, 0, new_slots_cap * sizeof *new_slots); - arr_foreach_ptr(t->slots, StrHashTableSlotPtr, slotp) { - StrHashTableSlot *slot = *slotp; - if (slot) { - uint64_t new_hash = str_hash(slot->str, slot->len); - StrHashTableSlot **new_slot = str_hash_table_slot_get(new_slots, slot->str, slot->len, new_hash % new_slots_cap); - *new_slot = slot; - } - } - arr_clear(t->slots); - t->slots = new_slots; - } -} - -static inline size_t str_hash_table_slot_size(StrHashTable *t) { - return sizeof(StrHashTableSlot) + ((t->data_size + sizeof(uint64_t) - 1) / sizeof(uint64_t)) * sizeof(uint64_t); -} - -static StrHashTableSlot *str_hash_table_insert_(StrHashTable *t, char const *str, size_t len) { - size_t slots_cap; - uint64_t hash; - StrHashTableSlot **slot; - str_hash_table_grow(t); - slots_cap = arr_len(t->slots); - hash = str_hash(str, len); - slot = str_hash_table_slot_get(t->slots, str, len, hash % slots_cap); - if (!*slot) { - *slot = calloc(1, str_hash_table_slot_size(t)); - char *s = (*slot)->str = calloc(1, len + 1); - memcpy(s, str, len); - (*slot)->len = len; - ++t->nentries; - } - return *slot; -} - -// does NOT check for a null byte. -static inline void *str_hash_table_insert_with_len(StrHashTable *t, char const *str, size_t len) { - return str_hash_table_insert_(t, str, len)->data; -} - -static inline void *str_hash_table_insert(StrHashTable *t, char const *str) { - return str_hash_table_insert_(t, str, strlen(str))->data; -} - -static void str_hash_table_clear(StrHashTable *t) { - arr_foreach_ptr(t->slots, StrHashTableSlotPtr, slotp) { - if (*slotp) { - free((*slotp)->str); - } - free(*slotp); - } - arr_clear(t->slots); - t->nentries = 0; -} - -static StrHashTableSlot *str_hash_table_get_(StrHashTable *t, char const *str, size_t len) { - size_t nslots = arr_len(t->slots), slot_index; - if (!nslots) return NULL; - slot_index = str_hash(str, len) % arr_len(t->slots); - return *str_hash_table_slot_get(t->slots, str, len, slot_index); -} - -static inline void *str_hash_table_get_with_len(StrHashTable *t, char const *str, size_t len) { - StrHashTableSlot *slot = str_hash_table_get_(t, str, len); - if (!slot) return NULL; - return slot->data; -} - -static inline void *str_hash_table_get(StrHashTable *t, char const *str) { - return str_hash_table_get_with_len(t, str, strlen(str)); -} |