From 16fbe87451b0ca3e8fa35fd04e0afbfab368ea65 Mon Sep 17 00:00:00 2001 From: pommicket Date: Sat, 9 Sep 2023 21:19:26 -0400 Subject: fix handling of TextEdit[] i hate microsoft so much --- unicode.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'unicode.h') diff --git a/unicode.h b/unicode.h index 8765164..cd6a965 100644 --- a/unicode.h +++ b/unicode.h @@ -187,6 +187,22 @@ static size_t unicode_utf16_len(const char *str) { return len; } +// get the number of UTF-32 codepoints needed to encode `str`. +/// +// returns `(size_t)-1` on bad UTF-8 +static size_t unicode_utf32_len(const char *str) { + size_t len = 0; + uint32_t c = 0; + while (*str) { + size_t n = unicode_utf8_to_utf32(&c, str, 4); + if (n >= (size_t)-2) + return (size_t)-1; + ++len; + str += n; + } + return len; +} + /// returns the UTF-8 offset from `str` which corresponds to a UTF-16 offset of /// `utf16_offset` (rounds down if `utf16_offset` is in the middle of a codepoint). /// -- cgit v1.2.3