summaryrefslogtreecommitdiff
path: root/pom.c
diff options
context:
space:
mode:
authorpommicket <pommicket@gmail.com>2025-09-11 19:17:23 -0400
committerpommicket <pommicket@gmail.com>2025-09-11 19:17:28 -0400
commitaab7700e2bba9216a8343e8e4e0cd0096026ba1a (patch)
treeb98c5cfc0589335c7aa81fc5bdafb8ca3564668b /pom.c
parentec9cf4ef9b4ff2a16172e72adf5fca2a94ba3009 (diff)
Reading lines
Diffstat (limited to 'pom.c')
-rw-r--r--pom.c247
1 files changed, 242 insertions, 5 deletions
diff --git a/pom.c b/pom.c
index 135e0d9..de698f0 100644
--- a/pom.c
+++ b/pom.c
@@ -1,3 +1,8 @@
+/*
+TODO:
+- check for valid UTF-8
+*/
+
#include "pom.h"
#include <stdio.h> // still needed for sprintf, even if POM_NO_STDIO is defined.
@@ -46,6 +51,37 @@ struct main_conf {
size_t items_count;
};
+// temporary error that is eventually converted to a pom_error
+struct parser_error {
+ uint64_t line;
+ // index into parser->error_messages.array
+ uint32_t message;
+};
+
+struct parser {
+ const char *filename;
+ uint64_t line_number;
+ size_t (*read_func)(void *, char *, size_t);
+ void *userdata;
+ pom_error *out_of_memory_error;
+ struct {
+ char *array;
+ size_t count, capacity;
+ } line;
+ struct {
+ struct parser_error *array;
+ size_t count, capacity;
+ } errors;
+ struct {
+ char *array;
+ size_t count, capacity;
+ } error_messages;
+ bool eof, out_of_memory, leftover_cr;
+ uint16_t buf_pos;
+ uint16_t buf_count;
+ char buf[4096];
+};
+
#ifdef POM_NO_STDIO
#define fatal_error(...) abort()
#else
@@ -73,8 +109,8 @@ make_error(const char *file, uint64_t line, const char *fmt, ...) {
bool bad_fmt = false;
int len = vsnprintf(NULL, 0, fmt, args);
if (len < 0 || len > INT_MAX - sizeof(pom_error) - 1) {
- // Should probably never happen? Who knows though.
- // In this case, we just use fmt as the error message.
+ // Could technically happen if %s gets a really long string.
+ // Just use fmt as the error in this case.
bad_fmt = true;
len = strlen(fmt);
}
@@ -84,13 +120,14 @@ make_error(const char *file, uint64_t line, const char *fmt, ...) {
if (bad_fmt) {
strcpy(message, fmt);
} else {
- vsnprintf(message, len + 1, fmt, args);
+ vsnprintf(message, len + 1, fmt, args_copy);
}
err->file = file;
err->line = line;
err->message = message;
err->next = NULL;
}
+ va_end(args_copy);
return err;
}
@@ -135,6 +172,152 @@ pom_error_print(const pom_error *error) {
}
#endif
+static void
+parser_out_of_memory(struct parser *parser) {
+ parser->out_of_memory = true;
+}
+
+static void *
+parser_append_(struct parser *parser, void *ptr, size_t elem_size, size_t *pcount, size_t *pcapacity,
+ size_t need) {
+ size_t count = *pcount;
+ size_t capacity = *pcapacity;
+ // this is bad if not all pointer types have the same representation.
+ // I really hope we don't have to worry about that in 2025.
+ void **parray = ptr;
+ void *array = *parray;
+ // these checks are overly strict to avoid arithmetic overflow.
+ if (count >= SIZE_MAX / 8 / elem_size || need >= SIZE_MAX / 8 / elem_size) {
+ parser_out_of_memory(parser);
+ return NULL;
+ }
+ if (count + need > capacity) {
+ size_t new_capacity = (count + need) * 3 / 2 + 2;
+ array = realloc(array, new_capacity * elem_size);
+ if (!array) {
+ parser_out_of_memory(parser);
+ return NULL;
+ }
+ *parray = array;
+ *pcapacity = new_capacity;
+ }
+ void *items = (char *)array + count * elem_size;
+ *pcount += need;
+ return items;
+}
+
+// Strange resizing-array macro.
+// Adds room for need elements to the array parser.field,
+// and returns a pointer to the first one.
+#define parser_append(parser, field, need) \
+ parser_append_(parser, &parser->field.array, sizeof parser->field.array[0], &parser->field.count, &parser->field.capacity, need)
+#define parser_append_one(parser, field) \
+ parser_append(parser, field, 1)
+
+static void parser_error(struct parser *parser, PRINTF_FORMAT_STRING const char *fmt, ...) ATTRIBUTE_PRINTF(2, 3);
+static void
+parser_error(struct parser *parser, const char *fmt, ...) {
+ if (parser->out_of_memory) return;
+ if (parser->errors.count >= 1000) return; // don't bother at this point.
+ va_list args, args_copy;
+ va_start(args, fmt);
+ va_copy(args_copy, args);
+ bool bad_fmt = false;
+ int error_len = vsnprintf(NULL, 0, fmt, args);
+ va_end(args);
+ if (error_len < 0) {
+ // could happen with a >INT_MAX-sized string, for example
+ bad_fmt = true;
+ error_len = strlen(fmt);
+ va_end(args_copy);
+ }
+ if (error_len > 1000)
+ error_len = 1000; // truncate very long errors
+ char *message = parser_append(parser, error_messages, error_len + 1);
+ uint32_t message_idx = message - parser->error_messages.array;
+ if (bad_fmt) {
+ // use fmt as error message
+ if (message) strcpy(message, fmt);
+ } else {
+ // (does nothing if message is NULL, but ends args_copy)
+ vsnprintf(message, message ? error_len + 1 : 0, fmt, args_copy);
+ }
+ if (!message) return;
+ struct parser_error *error = parser_append_one(parser, errors);
+ if (!error) return;
+ error->line = parser->line_number;
+ error->message = message_idx;
+}
+
+// Reads into parser->line_buf.
+static void
+parser_read_line(struct parser *parser) {
+ parser->line.count = 0;
+ while (true) {
+ char *line_out = parser_append(parser, line, sizeof parser->buf + 1);
+ if (!line_out) return;
+ while (parser->buf_pos < parser->buf_count) {
+ char c = parser->buf[parser->buf_pos++];
+ if (c == '\n') {
+ *line_out = 0;
+ return;
+ }
+ *line_out++ = c;
+ }
+ // ensure next append goes in the right place.
+ parser->line.count = line_out - parser->line.array;
+ // read more data into buf
+ size_t read_count = parser->read_func(parser->userdata, parser->buf, sizeof parser->buf - 1);
+ parser->buf_pos = 0;
+ if (read_count == 0) {
+ // EOF reached.
+ parser->eof = true;
+ *line_out = 0;
+ return;
+ }
+ if (parser->leftover_cr && parser->buf[0] != '\n')
+ parser_error(parser, "Carriage return with no newline after it.");
+ size_t out = 0;
+ uint64_t original_line_number = parser->line_number;
+ for (size_t in = 0; in < read_count; in++) {
+ char c = parser->buf[in];
+ if (c == '\r') {
+ if (in == read_count - 1) {
+ parser->leftover_cr = true;
+ } else if (parser->buf[in + 1] != '\n') {
+ parser_error(parser, "Carriage return with no newline after it.");
+ }
+ } else if (c == '\n') {
+ parser->buf[out++] = c;
+ parser->line_number++;
+ } else if (c >= 0 && c < 32 && c != '\t') {
+ parser_error(parser, "Illegal control character (ASCII code %d)", c);
+ } else {
+ parser->buf[out++] = c;
+ }
+ }
+ parser->line_number = original_line_number;
+ parser->buf_count = out;
+ }
+}
+
+static void
+parse_line(struct parser *parser) {
+ parser_read_line(parser);
+ char *line = parser->line.array;
+ {
+ // remove leading white space
+ size_t i;
+ for (i = 0; line[i] == '\t' || line[i] == ' '; i++);
+ memmove(line, line + i, strlen(line) + 1 - i);
+ }
+ if (line[0] == 0 || line[0] == '#') {
+ // blank line/comment
+ return;
+ }
+ printf("|%s\n",line);
+}
+
pom_conf *
pom_load(const char *filename,
size_t (*read_func)(void *userdata, char *buf, size_t len),
@@ -143,9 +326,63 @@ pom_load(const char *filename,
fatal_error("%s called with NULL file name", __func__);
if (!read_func)
fatal_error("%s called with NULL read function", __func__);
- // TODO
if (error) *error = NULL;
- return NULL;
+ // Start by allocating out-of-memory error, so we can just return
+ // it if we run out of memory.
+ pom_error *out_of_memory = make_error(filename, 1, "Out of memory.");
+ if (!out_of_memory) return NULL;
+ struct parser *parser = calloc(1, sizeof *parser);
+ if (!parser) {
+ if (error) *error = out_of_memory;
+ return NULL;
+ }
+ parser->filename = filename;
+ parser->out_of_memory_error = out_of_memory;
+ parser->read_func = read_func;
+ parser->userdata = userdata;
+ parser->line_number = 1;
+
+ while (!(parser->eof || parser->out_of_memory))
+ parse_line(parser);
+
+ bool success = true;
+ if (parser->out_of_memory) {
+ success = false;
+ if (error) *error = parser->out_of_memory_error;
+ } else if (parser->errors.count) {
+ success = false;
+ if (error) {
+ // shouldn't overflow on 32+-bit systems,
+ // given the limits we impose on errors.
+ size_t len = parser->errors.count * sizeof(pom_error) + parser->error_messages.count;
+ // convert parser_errors to pom_error.
+ pom_error *errors = malloc(len);
+ if (errors) {
+ char *messages = (char *)(errors + parser->errors.count);
+ memcpy(messages,
+ parser->error_messages.array,
+ parser->error_messages.count);
+ for (size_t i = 0; i < parser->errors.count; i++) {
+ const struct parser_error *parser_error = &parser->errors.array[i];
+ errors[i].file = parser->filename;
+ errors[i].line = parser_error->line;
+ errors[i].message = messages + parser_error->message;
+ errors[i].next = i == parser->errors.count - 1 ? NULL : &errors[i+1];
+ }
+ *error = errors;
+ } else {
+ *error = parser->out_of_memory_error;
+ }
+ }
+ free(parser->errors.array);
+ free(parser->error_messages.array);
+ }
+ if (!error || *error != out_of_memory) {
+ free(out_of_memory);
+ }
+ free(parser->line.array);
+ free(parser);
+ return success ? NULL : NULL;
}
static size_t