Implement sections

author: pommicket <pommicket@gmail.com> 2025-09-12 15:40:56 -0400
committer: pommicket <pommicket@gmail.com> 2025-09-12 15:41:24 -0400
commit: ca21dcb53e41919fc255ad736986aa6ff1d5bd85 (patch)
tree: 08b0a41be6dacdbd9d1cc22e4931d150ef78e2ce
parent: 802f11352a1f8ab25a222111d2ba254ed9f7113f (diff)
5 files changed, 114 insertions, 25 deletions
diff --git a/Doxyfile b/Doxyfile
index 98c8a1e..eacb913 100644
--- a/Doxyfile
+++ b/Doxyfile
@@ -23,3 +23,4 @@ COLLABORATION_GRAPH    = NO
 WARN_IF_UNDOCUMENTED   = NO
 QUIET                  = YES
 INCLUDE_GRAPH          = NO
+WARN_AS_ERROR          = FAIL_ON_WARNINGS
diff --git a/examples/read_conf.c b/examples/read_conf.c
index 78e4847..f153c15 100644
--- a/examples/read_conf.c
+++ b/examples/read_conf.c
@@ -21,8 +21,8 @@ int main(int argc, char **argv) {
 	}
 	const pom_item *item;
 	pom_item_iter *iter = NULL;
-	while ((item = pom_conf_next_item(conf, &iter))) {
-		printf("Key: %s, Value: %s\n", item->key, item->value);
+	while ((item = pom_conf_next_item(pom_conf_section(conf, "number"), &iter))) {
+		printf("Number: %s, Value: %s\n", item->key, item->value);
 	}
 	pom_unread_key_iter *unread = NULL;
 	const char *key;
diff --git a/pom.c b/pom.c
index 5bb8c74..9cf4f98 100644
--- a/pom.c
+++ b/pom.c
@@ -7,6 +7,7 @@
 #include <errno.h>
 #include <limits.h>
 #include <inttypes.h>
+#include <assert.h>
 
 #if __GNUC__ >= 6
 #define ATTRIBUTE_PRINTF(fmt, args) __attribute__ ((format(printf, fmt, args)))
@@ -35,6 +36,7 @@ struct pom_error {
 struct conf_item {
 	const char *key, *value, *file;
 	uint64_t line;
+	// whether key has been read or pom_conf_unread_keys
 #if HAVE_ATOMICS
 	atomic_bool read;
 #else
@@ -42,7 +44,7 @@ struct conf_item {
 #endif
 };
 
-
+// linked list of things we have to free when we free a configuration
 struct to_free {
 	struct to_free *next;
 	// fool's max_align_t
@@ -55,20 +57,28 @@ struct to_free {
 
 struct pom_conf {
 	struct main_conf *main;
+	// prefix length of keys which should be ignored.
+	// (this is set by pom_conf_section(conf, section) to strlen(section) + 1)
 	size_t prefix_len;
+	// items in this configuration
 	struct conf_item *items;
 	size_t items_count;
+	// sections/sub-sections of this configuration
 	struct conf_section *sections;
 	size_t sections_count;
 };
 
 struct conf_section {
 	const char *key;
-	struct pom_conf section;
+	struct pom_conf conf;
 };
 
+// holds the "root" of a configuration
 struct main_conf {
+	// stuff we have to free.
 	struct to_free *to_free_head, *to_free_tail;
+	// can return this from pom_conf_section when the section is mpety
+	// (so we don't have to store empty sections)
 	struct pom_conf empty_section;
 };
 
@@ -154,11 +164,22 @@ struct parser {
 		size_t capacity;
 		size_t count;
 	} items;
-	bool short_read, eof, out_of_memory, leftover_cr;
-	// see enum utf8_state -- starting state for future calls to read_func
+	bool
+		// last call to read_func returned <size
+		short_read,
+		// end-of-file reached
+		eof,
+		// memory allocation failed
+		out_of_memory,
+		// last call to read_func had a `\r` at the end
+		leftover_cr;
+	// see enum utf8_state -- starting state for future calls to `read_func`
 	uint8_t utf8_state;
+	// current position in `buf`
 	uint16_t buf_pos;
+	// number of bytes set in `buf`.
 	uint16_t buf_count;
+	// buffers data from `read_func`.
 	char buf[4096];
 };
 
@@ -712,6 +733,30 @@ conf_binary_search(const pom_conf *conf, const char *key, char nxt_char, bool *f
 	return lo;
 }
 
+static size_t
+conf_binary_search_sections(const pom_conf *conf, const char *key, char nxt_char, bool *found) {
+	size_t lo = 0;
+	size_t hi = conf->sections_count;
+	size_t key_len = strlen(key);
+	while (lo < hi) {
+		size_t mid = (lo + hi) / 2;
+		const char *mid_key = conf->sections[mid].key + conf->prefix_len;
+		int cmp = memcmp(key, mid_key, key_len);
+		if (cmp == 0)
+			cmp = nxt_char - mid_key[key_len];
+		if (cmp < 0) {
+			hi = mid;
+		} else if (cmp > 0) {
+			lo = mid + 1;
+		} else {
+			if (found) *found = true;
+			return mid;
+		}
+	}
+	if (found) *found = false;
+	return lo;
+}
+
 pom_conf *
 parser_finish(struct parser *parser) {
 	if (parser->out_of_memory || parser->errors.count) {
@@ -761,30 +806,56 @@ parser_finish(struct parser *parser) {
 	root->prefix_len = 0;
 	root->items = items;
 	root->items_count = items_count;
-	#if 0
-	size_t section_count = 0;
-	for (size_t i = 0; i + 1 < conf->items_count; i++) {
+	size_t sections_count = 0;
+	for (size_t i = 0; i < items_count; i++) {
 		struct conf_item *item = &items[i];
-		struct conf_item *next = &items[i+1];
-		section_count += is_descendant(item->key, next->key);
+		for (const char *p = item->key; p; ) {
+			const char *dot = strchr(p, '.');
+			if (!dot) break;
+			sections_count += i == 0
+				|| strncmp(item->key, items[i-1].key, dot + 1 - item->key) != 0;
+			p = dot + 1;
+		}
 	}
-	pom_conf *sections = conf_calloc(conf, section_count, sizeof *section);
+	struct conf_section *sections = conf_calloc(conf, sections_count, sizeof *sections);
 	if (!sections) goto out_of_memory;
-	for (size_t i = 0; i + 1 < conf->items_count; i++) {
-		struct conf_item *item = &conf->items[i];
-		struct conf_item *next = &conf->items[i+1];
-		if (is_descendant(item->key, next->key)) {
+	root->sections = sections;
+	root->sections_count = sections_count;
+	struct conf_section *section = sections;
+	for (size_t i = 0; i < items_count; i++) {
+		struct conf_item *item = &items[i];
+		for (const char *p = item->key, *dot; p; p = dot + 1) {
+			dot = strchr(p, '.');
+			if (!dot) break;
+			size_t key_len = dot - item->key;
+			if (i && strncmp(item->key, items[i-1].key, key_len + 1) == 0)
+				continue; // section was already created
 			// create section
-			size_t i_start = i + 1;
-			size_t i_end = conf_binary_search(root, item->key, '.' + 1, NULL);
-			section->items = conf->items + i_start;
-			section->items_count = i_end - i_start;
-			section->prefix_len = strlen(item->key) + 1/* dot */;
-			section->main = conf;
+			char *section_key = conf_calloc(conf, key_len + 1, 1);
+			if (!section_key) {
+				conf_free(conf);
+				return NULL;
+			}
+			section->key = section_key;
+			memcpy(section_key, item->key, key_len);
+			section_key[key_len] = 0;
+			//    Note: + (...) is to not include key foo.bar in section(conf, "foo.bar")
+			size_t i_start = i + (item->key[key_len] == 0);
+			size_t i_end = conf_binary_search(root, section_key, '.' + 1, NULL);
+			section->conf.items = items + i_start;
+			section->conf.items_count = i_end - i_start;
+			section->conf.prefix_len = strlen(section_key) + 1/* dot */;
+			section->conf.main = conf;
 			section++;
 		}
 	}
-	#endif
+	assert(section == sections + sections_count);
+	for (size_t i = 0; i < sections_count; i++) {
+		section = &sections[i];
+		// set up sub-sections.
+		section->conf.sections = section;
+		section->conf.sections_count = conf_binary_search_sections(root, section->key, '.' + 1, NULL) - i;
+	}
 	return root;
 }
 
@@ -1073,3 +1144,13 @@ pom_conf_location(const pom_conf *conf, const char *key, const char **file, uint
 		return false;
 	}
 }
+
+const pom_conf *
+pom_conf_section(const pom_conf *conf, const char *key) {
+	bool found;
+	size_t i = conf_binary_search_sections(conf, key, 0, &found);
+	if (found)
+		return &conf->sections[i].conf;
+	else
+		return &conf->main->empty_section;
+}
diff --git a/pom.h b/pom.h
index 7af38c8..4473562 100644
--- a/pom.h
+++ b/pom.h
@@ -106,6 +106,8 @@ typedef struct pom_item {
 /// It must fill out the buffer as much as possible,
 /// and return the number of bytes read.
 /// A return value less than `len` indicates the end of the file was reached.
+/// `read_func` will not be called excessively/with lots of tiny reads—it's
+/// okay to do unbuffered reads in it.
 ///
 /// `filename` is only used for errors.
 POM__MUST_USE_L
diff --git a/pre-commit.sh b/pre-commit.sh
index 2c213bd..6841365 100755
--- a/pre-commit.sh
+++ b/pre-commit.sh
@@ -1,6 +1,11 @@
 #!/bin/sh
 if sed --version | grep -q 'GNU sed'; then
-	# Remove trailing white space
-	sed -i 's/\s\s*$//' pom.c pom.h
+	for file in pom.c pom.h; do
+		# Remove trailing white space
+		# (But only if file actually has trailing white space
+		#  we don't want to mess up last-modified-times otherwise)
+		grep -q '\s\s*$' $file && sed -i 's/\s\s*$//' $file
+	done
 fi
+which doxygen >/dev/null 2>/dev/null && { doxygen || exit 1; }
 git add -u
author	pommicket <pommicket@gmail.com>	2025-09-12 15:40:56 -0400
committer	pommicket <pommicket@gmail.com>	2025-09-12 15:41:24 -0400
commit	ca21dcb53e41919fc255ad736986aa6ff1d5bd85 (patch)
tree	08b0a41be6dacdbd9d1cc22e4931d150ef78e2ce
parent	802f11352a1f8ab25a222111d2ba254ed9f7113f (diff)