more reorganizing

author: pommicket <pommicket@gmail.com> 2023-01-01 12:05:48 -0500
committer: pommicket <pommicket@gmail.com> 2023-01-01 12:05:48 -0500
commit: 8308d6fc53ee339a0b5cac4b9b837179c61efbaa (patch)
tree: e4f94a5695e9d5cf035582799738dc2e75c0e779 /util.c
parent: 57834dc0b116a62b806781ddec101bca4cda3abb (diff)
1 files changed, 73 insertions, 222 deletions
diff --git a/util.c b/util.c
index 49e2a20..3da5f30 100644
--- a/util.c
+++ b/util.c
@@ -8,48 +8,52 @@
 #endif
 
 #include "base.h"
+#include "util.h"
 
-// Is this character a "word" character?
-static bool is_word(char32_t c) {
+// on 16-bit systems, this is 16383. on 32/64-bit systems, this is 1073741823
+// it is unusual to have a string that long.
+#define STRLEN_SAFE_MAX (UINT_MAX >> 2)
+
+bool is_word(char32_t c) {
 	return c > WCHAR_MAX || c == '_' || iswalnum((wint_t)c);
 }
 
-static bool is_digit(char32_t c) {
+bool is_digit(char32_t c) {
 	return c < WCHAR_MAX && iswdigit((wint_t)c);
 }
 
-static bool is_space(char32_t c) {
+bool is_space(char32_t c) {
 	return c < WCHAR_MAX && iswspace((wint_t)c);
 }
 
-static bool is_a_tty(FILE *out) {
-	return
-	#if __unix__
-		isatty(fileno(out))
+bool is_a_tty(FILE *out) {
+	#if _WIN32
+	int fd = _fileno(out);
+	return fd >= 0 && _isatty(fd);
 	#else
-		false
+	int fd = fileno(out);
+	return fd >= 0 && isatty(fd);
 	#endif
-		;
 }
 
-static const char *term_italics(FILE *out) {
+const char *term_italics(FILE *out) {
 	return is_a_tty(out) ? "\x1b[3m" : "";
 }
 
-static const char *term_bold(FILE *out) {
+const char *term_bold(FILE *out) {
 	return is_a_tty(out) ? "\x1b[1m" : "";
 }
 
-static const char *term_yellow(FILE *out) {
+const char *term_yellow(FILE *out) {
 	return is_a_tty(out) ? "\x1b[93m" : "";
 }
 
-static const char *term_clear(FILE *out) {
+const char *term_clear(FILE *out) {
 	return is_a_tty(out) ? "\x1b[0m" : "";
 }
 
 
-static u8 util_popcount(u64 x) {
+u8 util_popcount(u64 x) {
 #ifdef __GNUC__
 	return (u8)__builtin_popcountll(x);
 #else
@@ -62,7 +66,7 @@ static u8 util_popcount(u64 x) {
 #endif
 }
 
-static u8 util_count_leading_zeroes32(u32 x) {
+u8 util_count_leading_zeroes32(u32 x) {
 	if (x == 0) return 32; // GCC's __builtin_clz is undefined for x = 0
 #if __GNUC__ && UINT_MAX == 4294967295
 	return (u8)__builtin_clz(x);
@@ -80,12 +84,12 @@ static u8 util_count_leading_zeroes32(u32 x) {
 #endif
 }
 
-static bool util_is_power_of_2(u64 x) {
+bool util_is_power_of_2(u64 x) {
 	return util_popcount(x) == 1;
 }
 
 // for finding a character in a char32 string
-static char32_t *util_mem32chr(char32_t *s, char32_t c, size_t n) {
+char32_t *util_mem32chr(char32_t *s, char32_t c, size_t n) {
 	for (size_t i = 0; i < n; ++i) {
 		if (s[i] == c) {
 			return &s[i];
@@ -94,7 +98,7 @@ static char32_t *util_mem32chr(char32_t *s, char32_t c, size_t n) {
 	return NULL;
 }
 
-static char32_t const *util_mem32chr_const(char32_t const *s, char32_t c, size_t n) {
+const char32_t *util_mem32chr_const(const char32_t *s, char32_t c, size_t n) {
 	for (size_t i = 0; i < n; ++i) {
 		if (s[i] == c) {
 			return &s[i];
@@ -103,23 +107,23 @@ static char32_t const *util_mem32chr_const(char32_t const *s, char32_t c, size_t
 	return NULL;
 }
 
-static bool str_has_prefix(char const *str, char const *prefix) {
+bool str_has_prefix(const char *str, const char *prefix) {
 	return strncmp(str, prefix, strlen(prefix)) == 0;
 }
 
 // e.g. "/usr/share/bla" has the path prefix "/usr/share" but not "/usr/sha"
-static bool str_has_path_prefix(const char *path, const char *prefix) {
+bool str_has_path_prefix(const char *path, const char *prefix) {
 	size_t prefix_len = strlen(prefix);
 	if (strncmp(path, prefix, prefix_len) != 0)
 		return false;
 	return path[prefix_len] == '\0' || strchr(ALL_PATH_SEPARATORS, path[prefix_len]);
 }
 
-static bool streq(char const *a, char const *b) {
+bool streq(const char *a, const char *b) {
 	return strcmp(a, b) == 0;
 }
 
-static size_t strn_len(const char *src, size_t n) {
+size_t strn_len(const char *src, size_t n) {
 	const char *p = src;
 	// in C99 and C++11/14, calling memchr with a size larger than
 	// the size of the object is undefined behaviour.
@@ -131,7 +135,7 @@ static size_t strn_len(const char *src, size_t n) {
 }
 
 // duplicates at most n characters from src
-static char *strn_dup(char const *src, size_t n) {
+char *strn_dup(const char *src, size_t n) {
 	size_t len = strn_len(src, n);
 	if (n > len)
 		n = len;
@@ -144,26 +148,12 @@ static char *strn_dup(char const *src, size_t n) {
 }
 
 // duplicates a null-terminated string. the returned string should be passed to free()
-static char *str_dup(char const *src) {
+char *str_dup(const char *src) {
 	return strn_dup(src, SIZE_MAX);
 }
 
-// like snprintf, but not screwed up on windows
-#define str_printf(str, size, ...) (str)[(size) - 1] = '\0', snprintf((str), (size) - 1, __VA_ARGS__)
-// like snprintf, but the size is taken to be the length of the array str.
-//                              first, check that str is actually an array
-#define strbuf_printf(str, ...) assert(sizeof str != 4 && sizeof str != 8), \
-	str_printf(str, sizeof str, __VA_ARGS__)
-#define str_catf(str, size, ...) str_printf((str) + strlen(str), (size) - strlen(str), __VA_ARGS__)
-#define strbuf_catf(str, ...) assert(sizeof str != 4 && sizeof str != 8), \
-	str_catf(str, sizeof str, __VA_ARGS__)
-
-// on 16-bit systems, this is 16383. on 32/64-bit systems, this is 1073741823
-// it is unusual to have a string that long.
-#define STRLEN_SAFE_MAX (UINT_MAX >> 2)
-
 // safer version of strncat. dst_sz includes a null terminator.
-static void strn_cat(char *dst, size_t dst_sz, char const *src, size_t src_len) {
+void strn_cat(char *dst, size_t dst_sz, const char *src, size_t src_len) {
 	size_t dst_len = strlen(dst);
 
 	// make sure dst_len + src_len + 1 doesn't overflow
@@ -190,12 +180,12 @@ static void strn_cat(char *dst, size_t dst_sz, char const *src, size_t src_len)
 }
 
 // safer version of strcat. dst_sz includes a null terminator.
-static void str_cat(char *dst, size_t dst_sz, char const *src) {
+void str_cat(char *dst, size_t dst_sz, const char *src) {
 	strn_cat(dst, dst_sz, src, strlen(src));
 }
 
 // safer version of strncpy. dst_sz includes a null terminator.
-static void strn_cpy(char *dst, size_t dst_sz, char const *src, size_t src_len) {
+void strn_cpy(char *dst, size_t dst_sz, const char *src, size_t src_len) {
 	size_t n = src_len; // number of bytes to copy
 	for (size_t i = 0; i < n; ++i) {
 		if (src[i] == '\0') {
@@ -216,14 +206,10 @@ static void strn_cpy(char *dst, size_t dst_sz, char const *src, size_t src_len)
 }
 
 // safer version of strcpy. dst_sz includes a null terminator.
-static void str_cpy(char *dst, size_t dst_sz, char const *src) {
+void str_cpy(char *dst, size_t dst_sz, const char *src) {
 	strn_cpy(dst, dst_sz, src, SIZE_MAX);
 }
 
-#define strbuf_cpy(dst, src) str_cpy(dst, sizeof dst, src)
-#define strbuf_cat(dst, src) str_cat(dst, sizeof dst, src)
-
-
 char *a_sprintf(PRINTF_FORMAT_STRING const char *fmt, ...) ATTRIBUTE_PRINTF(1, 2);
 char *a_sprintf(const char *fmt, ...) {
 	// idk if you can always just pass NULL to vsnprintf
@@ -244,7 +230,7 @@ char *a_sprintf(const char *fmt, ...) {
 
 
 // advances str to the start of the next UTF8 character
-static void utf8_next_char_const(char const **str) {
+static void utf8_next_char_const(const char **str) {
 	if (**str) {
 		do {
 			++*str;
@@ -252,21 +238,16 @@ static void utf8_next_char_const(char const **str) {
 	}
 }
 
-/* 
-returns the first instance of needle in haystack, where both are UTF-8 strings, ignoring the case of the characters,
-or NULL if the haystack does not contain needle
-WARNING: O(strlen(haystack) * strlen(needle))
-*/
-static char *stristr(char const *haystack, char const *needle) {
+char *strstr_case_insensitive(const char *haystack, const char *needle) {
 	size_t needle_bytes = strlen(needle), haystack_bytes = strlen(haystack);
 
 	if (needle_bytes > haystack_bytes) return NULL;
 	
-	char const *haystack_end = haystack + haystack_bytes;
-	char const *needle_end = needle + needle_bytes;
+	const char *haystack_end = haystack + haystack_bytes;
+	const char *needle_end = needle + needle_bytes;
 
-	for (char const *haystack_start = haystack; haystack_start + needle_bytes <= haystack_end; utf8_next_char_const(&haystack_start)) {
-		char const *p = haystack_start, *q = needle;
+	for (const char *haystack_start = haystack; haystack_start + needle_bytes <= haystack_end; utf8_next_char_const(&haystack_start)) {
+		const char *p = haystack_start, *q = needle;
 		bool match = true;
 
 		// check if p matches q
@@ -288,27 +269,14 @@ static char *stristr(char const *haystack, char const *needle) {
 	return NULL;
 }
 
-static void print_bytes(u8 *bytes, size_t n) {
-	u8 *b, *end;
+void print_bytes(const u8 *bytes, size_t n) {
+	const u8 *b, *end;
 	for (b = bytes, end = bytes + n; b != end; ++b)
-		printf("%x ", *b);
+		printf("%02x ", *b);
 	printf("\n");
 }
 
-/*
-does this predicate hold for all the characters of s. predicate is int (*)(int) instead
-of bool (*)(char) so that you can pass isprint, etc. to it.
-*/
-static bool str_satisfies(char const *s, int (*predicate)(int)) {
-	char const *p;
-	for (p = s; *p; ++p)
-		if (!predicate(*p))
-			return false;
-	return true;
-}
-
-
-static int strcmp_case_insensitive(char const *a, char const *b) {
+int strcmp_case_insensitive(const char *a, const char *b) {
 #if _WIN32
 	return _stricmp(a, b);
 #else
@@ -316,25 +284,27 @@ static int strcmp_case_insensitive(char const *a, char const *b) {
 #endif
 }
 
-// function to be passed into qsort for case insensitive sorting
-static int str_qsort_case_insensitive_cmp(const void *av, const void *bv) {
-	char const *const *a = av, *const *b = bv;
+int str_qsort_case_insensitive_cmp(const void *av, const void *bv) {
+	const char *const *a = av, *const *b = bv;
 	return strcmp_case_insensitive(*a, *b);
 }
 
-// imo windows has the argument order right here
 #if _WIN32
-#define qsort_with_context qsort_s
+void qsort_with_context(void *base, size_t nmemb, size_t size,
+	int (*compar)(void *, const void *, const void *),
+	void *arg) {
+	qsort_s(base, nmemb, size, compar, arg);
+}
 #else
 typedef struct {
 	int (*compar)(void *, const void *, const void *);
 	void *context;
 } QSortWithContext;
-static int qsort_with_context_cmp(const void *a, const void *b, void *context) {
+int qsort_with_context_cmp(const void *a, const void *b, void *context) {
 	QSortWithContext *c = context;
 	return c->compar(c->context, a, b);
 }
-static void qsort_with_context(void *base, size_t nmemb, size_t size,
+void qsort_with_context(void *base, size_t nmemb, size_t size,
 	int (*compar)(void *, const void *, const void *),
 	void *arg) {
 	QSortWithContext ctx = {
@@ -345,17 +315,15 @@ static void qsort_with_context(void *base, size_t nmemb, size_t size,
 }
 #endif
 
-// the actual file name part of the path; get rid of the containing directory.
-// NOTE: the returned string is part of path, so you don't need to free it or anything.
-static char const *path_filename(char const *path) {
-	char const *last_path_sep = strrchr(path, PATH_SEPARATOR);
+const char *path_filename(const char *path) {
+	const char *last_path_sep = strrchr(path, PATH_SEPARATOR);
 	if (last_path_sep)
 		return last_path_sep + 1;
 	// (a relative path with no path separators)
 	return path;
 }
 
-static bool path_is_absolute(char const *path) {
+bool path_is_absolute(const char *path) {
 	return path[0] == PATH_SEPARATOR
 	#if _WIN32
 		|| path[1] == ':'
@@ -363,8 +331,7 @@ static bool path_is_absolute(char const *path) {
 		;
 }
 
-// assuming `dir` is an absolute path, returns the absolute path of `relpath`, relative to `dir`.
-static void path_full(char const *dir, char const *relpath, char *abspath, size_t abspath_size) {
+void path_full(const char *dir, const char *relpath, char *abspath, size_t abspath_size) {
 	assert(abspath_size);
 	assert(dir[0]);
 	abspath[0] = '\0';
@@ -386,7 +353,7 @@ static void path_full(char const *dir, char const *relpath, char *abspath, size_
 	
 	while (*relpath) {
 		size_t component_len = strcspn(relpath, ALL_PATH_SEPARATORS);
-		char const *component_end = relpath + component_len;
+		const char *component_end = relpath + component_len;
 
 		size_t len = strlen(abspath);
 		if (component_len == 1 && relpath[0] == '.') {
@@ -411,18 +378,25 @@ static void path_full(char const *dir, char const *relpath, char *abspath, size_
 	}
 }
 
-// returns true if the paths are the same.
-// handles the fact that paths are case insensitive on windows.
-// treats links as different from the files they point to.
-static bool paths_eq(char const *path1, char const *path2) {
-#if _WIN32
-	return _stricmp(path1, path2) == 0;
-#else
+bool paths_eq(const char *path1, const char *path2) {
+#if __unix__
 	return streq(path1, path2);
+#else
+	char fixed_path1[8192];
+	char fixed_path2[8192];
+	strbuf_cpy(fixed_path1, path1);
+	strbuf_cpy(fixed_path2, path2);
+	for (size_t i = 0; fixed_path1[i]; ++i)
+		if (fixed_path1[i] == '/')
+			fixed_path1[i] = '\\';
+	for (size_t i = 0; fixed_path2[i]; ++i)
+		if (fixed_path2[i] == '/')
+			fixed_path2[i] = '\\';
+	return _stricmp(fixed_path1, fixed_path2) == 0;
 #endif
 }
 
-static void change_directory(char const *path) {
+void change_directory(const char *path) {
 #if _WIN32
 	_chdir(path);
 #else
@@ -430,8 +404,7 @@ static void change_directory(char const *path) {
 #endif
 }
 
-// returns true on success
-static bool copy_file(char const *src, char const *dst) {
+bool copy_file(const char *src, const char *dst) {
 	bool success = false;
 	FILE *src_file = fopen(src, "rb");
 	if (src_file) {
@@ -452,125 +425,3 @@ static bool copy_file(char const *src, char const *dst) {
 }
 
 
-static uint64_t str_hash(char const *str, size_t len) {
-	uint64_t hash = 0;
-	char const *p = str, *end = str + len;
-	for (; p < end; ++p) {
-		hash = ((hash * 1664737020647550361 + 123843) << 8) + 2918635993572506131*(uint64_t)*p;
-	}
-	return hash;
-}
-
-typedef struct {
-	char *str;
-	size_t len;
-	uint64_t data[];
-} StrHashTableSlot;
-
-typedef StrHashTableSlot *StrHashTableSlotPtr;
-
-typedef struct {
-	StrHashTableSlot **slots;
-	size_t data_size;
-	size_t nentries; /* # of filled slots */
-} StrHashTable;
-
-static inline void str_hash_table_create(StrHashTable *t, size_t data_size) {
-	t->slots = NULL;
-	t->data_size = data_size;
-	t->nentries = 0;
-}
-
-static StrHashTableSlot **str_hash_table_slot_get(StrHashTableSlot **slots, char const *s, size_t s_len, size_t i) {
-	StrHashTableSlot **slot;
-	size_t slots_cap = arr_len(slots);
-	while (1) {
-		assert(i < slots_cap);
-		slot = &slots[i];
-		if (!*slot) break;
-		if (s && (*slot)->str &&
-			s_len == (*slot)->len && memcmp(s, (*slot)->str, s_len) == 0)
-			break;
-		i = (i+1) % slots_cap;
-	}
-	return slot;
-}
-
-static void str_hash_table_grow(StrHashTable *t) {
-	size_t slots_cap = arr_len(t->slots);
-	if (slots_cap <= 2 * t->nentries) {
-		StrHashTableSlot **new_slots = NULL;
-		size_t new_slots_cap = slots_cap * 2 + 10;
-		arr_set_len(new_slots, new_slots_cap);
-		memset(new_slots, 0, new_slots_cap * sizeof *new_slots);
-		arr_foreach_ptr(t->slots, StrHashTableSlotPtr, slotp) {
-			StrHashTableSlot *slot = *slotp;
-			if (slot) {
-				uint64_t new_hash = str_hash(slot->str, slot->len);
-				StrHashTableSlot **new_slot = str_hash_table_slot_get(new_slots, slot->str, slot->len, new_hash % new_slots_cap);
-				*new_slot = slot;
-			}
-		}
-		arr_clear(t->slots);
-		t->slots = new_slots;
-	}
-}
-
-static inline size_t str_hash_table_slot_size(StrHashTable *t) {
-	return sizeof(StrHashTableSlot) + ((t->data_size + sizeof(uint64_t) - 1) / sizeof(uint64_t)) * sizeof(uint64_t);
-}
-
-static StrHashTableSlot *str_hash_table_insert_(StrHashTable *t, char const *str, size_t len) {
-	size_t slots_cap;
-	uint64_t hash;
-	StrHashTableSlot **slot;
-	str_hash_table_grow(t);
-	slots_cap = arr_len(t->slots);
-	hash = str_hash(str, len);
-	slot = str_hash_table_slot_get(t->slots, str, len, hash % slots_cap);
-	if (!*slot) {
-		*slot = calloc(1, str_hash_table_slot_size(t));
-		char *s = (*slot)->str = calloc(1, len + 1);
-		memcpy(s, str, len);
-		(*slot)->len = len;
-		++t->nentries;
-	}
-	return *slot;
-}
-
-// does NOT check for a null byte.
-static inline void *str_hash_table_insert_with_len(StrHashTable *t, char const *str, size_t len) {
-	return str_hash_table_insert_(t, str, len)->data;
-}
-
-static inline void *str_hash_table_insert(StrHashTable *t, char const *str) {
-	return str_hash_table_insert_(t, str, strlen(str))->data;
-}
-
-static void str_hash_table_clear(StrHashTable *t) {
-	arr_foreach_ptr(t->slots, StrHashTableSlotPtr, slotp) {
-		if (*slotp) {
-			free((*slotp)->str);
-		}
-		free(*slotp);
-	}
-	arr_clear(t->slots);
-	t->nentries = 0;
-}
-
-static StrHashTableSlot *str_hash_table_get_(StrHashTable *t, char const *str, size_t len) {
-	size_t nslots = arr_len(t->slots), slot_index;
-	if (!nslots) return NULL;
-	slot_index = str_hash(str, len) % arr_len(t->slots);
-	return *str_hash_table_slot_get(t->slots, str, len, slot_index);
-}
-
-static inline void *str_hash_table_get_with_len(StrHashTable *t, char const *str, size_t len) {
-	StrHashTableSlot *slot = str_hash_table_get_(t, str, len);
-	if (!slot) return NULL;
-	return slot->data;
-}
-
-static inline void *str_hash_table_get(StrHashTable *t, char const *str) {
-	return str_hash_table_get_with_len(t, str, strlen(str));
-}
author	pommicket <pommicket@gmail.com>	2023-01-01 12:05:48 -0500
committer	pommicket <pommicket@gmail.com>	2023-01-01 12:05:48 -0500
commit	8308d6fc53ee339a0b5cac4b9b837179c61efbaa (patch)
tree	e4f94a5695e9d5cf035582799738dc2e75c0e779 /util.c
parent	57834dc0b116a62b806781ddec101bca4cda3abb (diff)