summaryrefslogtreecommitdiff
path: root/unicode.h
diff options
context:
space:
mode:
authorpommicket <pommicket@gmail.com>2023-09-09 21:19:26 -0400
committerpommicket <pommicket@gmail.com>2023-09-09 21:19:26 -0400
commit16fbe87451b0ca3e8fa35fd04e0afbfab368ea65 (patch)
treedcd4c1e37b4a89173e23047f644df94a9ca92399 /unicode.h
parent0dcfd5a4f1fd865c24c01b17b214e1f72e4c06fe (diff)
fix handling of TextEdit[]
i hate microsoft so much
Diffstat (limited to 'unicode.h')
-rw-r--r--unicode.h16
1 files changed, 16 insertions, 0 deletions
diff --git a/unicode.h b/unicode.h
index 8765164..cd6a965 100644
--- a/unicode.h
+++ b/unicode.h
@@ -187,6 +187,22 @@ static size_t unicode_utf16_len(const char *str) {
return len;
}
+// get the number of UTF-32 codepoints needed to encode `str`.
+///
+// returns `(size_t)-1` on bad UTF-8
+static size_t unicode_utf32_len(const char *str) {
+ size_t len = 0;
+ uint32_t c = 0;
+ while (*str) {
+ size_t n = unicode_utf8_to_utf32(&c, str, 4);
+ if (n >= (size_t)-2)
+ return (size_t)-1;
+ ++len;
+ str += n;
+ }
+ return len;
+}
+
/// returns the UTF-8 offset from `str` which corresponds to a UTF-16 offset of
/// `utf16_offset` (rounds down if `utf16_offset` is in the middle of a codepoint).
///