diff options
author | Leo Tenenbaum <pommicket@gmail.com> | 2021-01-25 18:00:06 -0500 |
---|---|---|
committer | Leo Tenenbaum <pommicket@gmail.com> | 2021-01-25 18:00:06 -0500 |
commit | 795262f69900af674156bed2bcd0fdb57dbbb55e (patch) | |
tree | 03723b919ff498722d7985a93f9ce7f470596abb /string32.c | |
parent | a56f549a266e14cdc00a98e8dc3e154f5ac6c23e (diff) |
replaced c32rtomb, mbrtoc32 with own versions
these are nicer to use since they don't involve mbstate_t and should be faster since they don't involve locales
Diffstat (limited to 'string32.c')
-rw-r--r-- | string32.c | 13 |
1 files changed, 4 insertions, 9 deletions
@@ -32,19 +32,15 @@ String32 str32_from_utf8(char const *utf8) { char32_t *wide_p = widestr; char const *utf8_p = utf8; char const *utf8_end = utf8_p + len; - mbstate_t mbstate = {0}; while (utf8_p < utf8_end) { char32_t c = 0; - size_t n = mbrtoc32(&c, utf8_p, (size_t)(utf8_end - utf8_p), &mbstate); - if (n == 0// null character. this shouldn't happen. - || n == (size_t)(-2) // incomplete character + size_t n = unicode_utf8_to_utf32(&c, utf8_p, (size_t)(utf8_end - utf8_p)); + if (n == 0 // null character. this shouldn't happen. || n == (size_t)(-1) // invalid UTF-8 ) { free(widestr); widestr = wide_p = NULL; break; - } else if (n == (size_t)(-3)) { // no bytes consumed, but a character was produced - *wide_p++ = c; } else { // n bytes consumed *wide_p++ = c; @@ -65,11 +61,10 @@ static char *str32_to_utf8_cstr(String32 s) { char *utf8 = calloc(4 * s.len + 1, 1); // each codepoint takes up at most 4 bytes in UTF-8, + we need a terminating null byte if (utf8) { char *p = utf8; - mbstate_t mbstate; memset(&mbstate, 0, sizeof mbstate); for (size_t i = 0; i < s.len; ++i) { - size_t bytes = c32rtomb(p, s.str[i], &mbstate); + size_t bytes = unicode_utf32_to_utf8(p, s.str[i]); if (bytes == (size_t)-1) { - // invalid UTF-32 character + // invalid UTF-32 code point free(utf8); return NULL; } else { |