diff options
author | Leo Tenenbaum <pommicket@gmail.com> | 2021-01-21 13:35:18 -0500 |
---|---|---|
committer | Leo Tenenbaum <pommicket@gmail.com> | 2021-01-21 13:36:36 -0500 |
commit | 8fca7beaf35cfc438d5d29f352f80dd18efe7d2e (patch) | |
tree | 1fee5e822db6292be2d1629edf20d6239524f365 /util.c | |
parent | dd91d6c72625cc7ed2ec5954a5cbca35fd7655d4 (diff) |
file selector now actually working
also made stristr work with UTF-8
Diffstat (limited to 'util.c')
-rw-r--r-- | util.c | 62 |
1 files changed, 44 insertions, 18 deletions
@@ -105,29 +105,50 @@ static void str_cpy(char *dst, size_t dst_sz, char const *src) { dst[n] = 0; } +// advances str to the start of the next UTF8 character +static void utf8_next_char_const(char const **str) { + if (**str) { + do { + ++*str; + } while (((u8)(**str) & 0xC0) == 0x80); // while we are on a continuation byte + } +} + /* -returns the first instance of needle in haystack, ignoring the case of the characters, +returns the first instance of needle in haystack, where both are UTF-8 strings, ignoring the case of the characters, or NULL if the haystack does not contain needle WARNING: O(strlen(haystack) * strlen(needle)) */ static char *stristr(char const *haystack, char const *needle) { - size_t needle_len = strlen(needle), haystack_len = strlen(haystack), i, j; + size_t needle_bytes = strlen(needle), haystack_bytes = strlen(haystack); + + if (needle_bytes > haystack_bytes) return NULL; - if (needle_len > haystack_len) return NULL; // a larger string can't fit in a smaller string + char const *haystack_end = haystack + haystack_bytes; + char const *needle_end = needle + needle_bytes; - for (i = 0; i <= haystack_len - needle_len; ++i) { - char const *p = haystack + i, *q = needle; + for (char const *haystack_start = haystack; haystack_start + needle_bytes <= haystack_end; utf8_next_char_const(&haystack_start)) { + char const *p = haystack_start, *q = needle; + mbstate_t pstate = {0}, qstate = {0}; bool match = true; - for (j = 0; j < needle_len; ++j) { - if (tolower(*p) != tolower(*q)) { - match = false; - break; - } - ++p; - ++q; + + // check if p matches q + while (q < needle_end) { + char32_t pchar = 0, qchar = 0; + size_t bytes_p = mbrtoc32(&pchar, p, (size_t)(haystack_end - p), &pstate); + size_t bytes_q = mbrtoc32(&qchar, q, (size_t)(needle_end - q), &qstate); + if (bytes_p == (size_t)-3) bytes_p = 0; + if (bytes_q == (size_t)-3) bytes_q = 0; + if (bytes_p > (size_t)-3 || bytes_q > (size_t)-3) return NULL; // invalid UTF-8 + bool same = pchar == qchar; + if (pchar < WINT_MAX && qchar < WINT_MAX) // on Windows, there is no way of finding the lower-case version of a codepoint outside the BMP. ): + same = towlower((wint_t)pchar) == towlower((wint_t)qchar); + if (!same) match = false; + p += bytes_p; + q += bytes_q; } if (match) - return (char *)haystack + i; + return (char *)haystack_start; } return NULL; } @@ -151,12 +172,17 @@ static bool str_satisfies(char const *s, int (*predicate)(int)) { return true; } -// function to be passed into qsort for case insensitive sorting -static int str_qsort_case_insensitive_cmp(const void *av, const void *bv) { - char const *const *a = av, *const *b = bv; + +static int strcmp_case_insensitive(char const *a, char const *b) { #if _WIN32 - return _stricmp(*a, *b); + return _stricmp(a, b); #else - return strcasecmp(*a, *b); + return strcasecmp(a, b); #endif } + +// function to be passed into qsort for case insensitive sorting +static int str_qsort_case_insensitive_cmp(const void *av, const void *bv) { + char const *const *a = av, *const *b = bv; + return strcmp_case_insensitive(*a, *b); +} |