doxygen documentation

author: pommicket <pommicket@gmail.com> 2023-02-28 19:39:06 -0500
committer: pommicket <pommicket@gmail.com> 2023-02-28 19:39:06 -0500
commit: 52ba910a48a4dd2abb798c3c998ecb0e006c1895 (patch)
tree: 5fa4b3fb1e28ee1027aaa02a78ddf9b9bd7b59f5 /unicode.h
parent: 5e92dac7b8ff9d60bfa3dcc69c3f77663a01aea4 (diff)
1 files changed, 29 insertions, 20 deletions
diff --git a/unicode.h b/unicode.h
index 2b686df..1e517c4 100644
--- a/unicode.h
+++ b/unicode.h
@@ -1,10 +1,14 @@
-// functions for dealing with UTF-8/UTF-16/UTF-32.
-// this file is entirely self-contained.
+/// \file
+/// functions for dealing with UTF-8/UTF-16/UTF-32.
+///
+/// this file is entirely self-contained.
 
 #ifndef UNICODE_H_
 #define UNICODE_H_
+/// useful for "this character couldn't be rendered / is invalid UTF-8"
 #define UNICODE_BOX_CHARACTER 0x2610
-#define UNICODE_CODE_POINTS 0x110000 // number of Unicode code points
+/// number of Unicode code points
+#define UNICODE_CODE_POINTS 0x110000
 
 #include <stddef.h>
 #include <stdint.h>
@@ -19,15 +23,16 @@ static bool unicode_is_continuation_byte(uint8_t byte) {
 	return (byte & 0xC0) == 0x80;
 }
 
-// A lot like mbrtoc32. Doesn't depend on the locale though, for one thing.
-// *c will be filled with the next UTF-8 code point in `str`. `bytes` refers to the maximum
-// number of bytes that can be read from `str` (note: this function will never read past a null
-// byte, even if `bytes` indicates that it could).
-// Returns:
-// 0 - if a null character was encountered or if `bytes == 0`
-// (size_t)-1 - on invalid UTF-8
-// (size_t)-2 - on incomplete code point (str should be longer)
-// other - the number of bytes read from `str`.
+/// A lot like mbrtoc32. Doesn't depend on the locale though, for one thing.
+///
+/// *c will be filled with the next UTF-8 code point in `str`. `bytes` refers to the maximum
+/// number of bytes that can be read from `str` (note: this function will never read past a null
+/// byte, even if `bytes` indicates that it could).
+/// Returns:\n
+/// `0` - if a null character was encountered or if `bytes == 0`\n
+/// `(size_t)-1` - on invalid UTF-8\n
+/// `(size_t)-2` - on incomplete code point (str should be longer)\n
+/// other - the number of bytes read from `str`.
 static size_t unicode_utf8_to_utf32(uint32_t *c, const char *str, size_t bytes) {
 	*c = 0;
 	if (bytes == 0) {
@@ -121,10 +126,11 @@ static size_t unicode_utf8_to_utf32(uint32_t *c, const char *str, size_t bytes)
 	}
 }
 
-// A lot like c32rtomb
-// Converts a UTF-32 codepoint to a UTF-8 string. Writes at most 4 bytes to s.
-// NOTE: It is YOUR JOB to null-terminate your string if the UTF-32 isn't null-terminated!
-// Returns the number of bytes written to s, or (size_t)-1 on invalid UTF-32.
+/// A lot like c32rtomb
+///
+/// Converts a UTF-32 codepoint to a UTF-8 string. Writes at most 4 bytes to s.
+/// NOTE: It is YOUR JOB to null-terminate your string if the UTF-32 isn't null-terminated!
+/// Returns the number of bytes written to `s`, or `(size_t)-1` on invalid UTF-32.
 static size_t unicode_utf32_to_utf8(char *s, uint32_t c32) {
 	uint8_t *p = (uint8_t *)s;
 	if (c32 <= 0x7F) {
@@ -162,7 +168,8 @@ static size_t unicode_utf32_to_utf8(char *s, uint32_t c32) {
 
 
 // get the number of UTF-16 codepoints needed to encode `str`.
-// returns (size_t)-1 on bad UTF-8
+///
+// returns `(size_t)-1` on bad UTF-8
 static size_t unicode_utf16_len(const char *str) {
 	size_t len = 0;
 	uint32_t c = 0;
@@ -179,9 +186,11 @@ static size_t unicode_utf16_len(const char *str) {
 	return len;
 }
 
-// returns the UTF-8 offset from `str` which corresponds to a UTF-16 offset of utf16_offset (rounds down if utf16_offset is in the middle of a codepoint).
-// returns strlen(str) if utf16_offset == unicode_utf16_len(str)
-// returns (size_t)-1 on bad UTF-8, or if utf16_offset > unicode_utf16_len(str)
+/// returns the UTF-8 offset from `str` which corresponds to a UTF-16 offset of
+/// `utf16_offset` (rounds down if `utf16_offset` is in the middle of a codepoint).
+///
+/// returns `strlen(str)` if `utf16_offset == unicode_utf16_len(str)`
+/// returns `(size_t)-1` on bad UTF-8, or if `utf16_offset > unicode_utf16_len(str)`
 static size_t unicode_utf16_to_utf8_offset(const char *str, size_t utf16_offset) {
 	size_t offset = 0;
 	uint32_t c = 0;
author	pommicket <pommicket@gmail.com>	2023-02-28 19:39:06 -0500
committer	pommicket <pommicket@gmail.com>	2023-02-28 19:39:06 -0500
commit	52ba910a48a4dd2abb798c3c998ecb0e006c1895 (patch)
tree	5fa4b3fb1e28ee1027aaa02a78ddf9b9bd7b59f5 /unicode.h
parent	5e92dac7b8ff9d60bfa3dcc69c3f77663a01aea4 (diff)