diff options
author | pommicket <pommicket@gmail.com> | 2025-09-11 19:17:23 -0400 |
---|---|---|
committer | pommicket <pommicket@gmail.com> | 2025-09-11 19:17:28 -0400 |
commit | aab7700e2bba9216a8343e8e4e0cd0096026ba1a (patch) | |
tree | b98c5cfc0589335c7aa81fc5bdafb8ca3564668b /pom.c | |
parent | ec9cf4ef9b4ff2a16172e72adf5fca2a94ba3009 (diff) |
Reading lines
Diffstat (limited to 'pom.c')
-rw-r--r-- | pom.c | 247 |
1 files changed, 242 insertions, 5 deletions
@@ -1,3 +1,8 @@ +/* +TODO: +- check for valid UTF-8 +*/ + #include "pom.h" #include <stdio.h> // still needed for sprintf, even if POM_NO_STDIO is defined. @@ -46,6 +51,37 @@ struct main_conf { size_t items_count; }; +// temporary error that is eventually converted to a pom_error +struct parser_error { + uint64_t line; + // index into parser->error_messages.array + uint32_t message; +}; + +struct parser { + const char *filename; + uint64_t line_number; + size_t (*read_func)(void *, char *, size_t); + void *userdata; + pom_error *out_of_memory_error; + struct { + char *array; + size_t count, capacity; + } line; + struct { + struct parser_error *array; + size_t count, capacity; + } errors; + struct { + char *array; + size_t count, capacity; + } error_messages; + bool eof, out_of_memory, leftover_cr; + uint16_t buf_pos; + uint16_t buf_count; + char buf[4096]; +}; + #ifdef POM_NO_STDIO #define fatal_error(...) abort() #else @@ -73,8 +109,8 @@ make_error(const char *file, uint64_t line, const char *fmt, ...) { bool bad_fmt = false; int len = vsnprintf(NULL, 0, fmt, args); if (len < 0 || len > INT_MAX - sizeof(pom_error) - 1) { - // Should probably never happen? Who knows though. - // In this case, we just use fmt as the error message. + // Could technically happen if %s gets a really long string. + // Just use fmt as the error in this case. bad_fmt = true; len = strlen(fmt); } @@ -84,13 +120,14 @@ make_error(const char *file, uint64_t line, const char *fmt, ...) { if (bad_fmt) { strcpy(message, fmt); } else { - vsnprintf(message, len + 1, fmt, args); + vsnprintf(message, len + 1, fmt, args_copy); } err->file = file; err->line = line; err->message = message; err->next = NULL; } + va_end(args_copy); return err; } @@ -135,6 +172,152 @@ pom_error_print(const pom_error *error) { } #endif +static void +parser_out_of_memory(struct parser *parser) { + parser->out_of_memory = true; +} + +static void * +parser_append_(struct parser *parser, void *ptr, size_t elem_size, size_t *pcount, size_t *pcapacity, + size_t need) { + size_t count = *pcount; + size_t capacity = *pcapacity; + // this is bad if not all pointer types have the same representation. + // I really hope we don't have to worry about that in 2025. + void **parray = ptr; + void *array = *parray; + // these checks are overly strict to avoid arithmetic overflow. + if (count >= SIZE_MAX / 8 / elem_size || need >= SIZE_MAX / 8 / elem_size) { + parser_out_of_memory(parser); + return NULL; + } + if (count + need > capacity) { + size_t new_capacity = (count + need) * 3 / 2 + 2; + array = realloc(array, new_capacity * elem_size); + if (!array) { + parser_out_of_memory(parser); + return NULL; + } + *parray = array; + *pcapacity = new_capacity; + } + void *items = (char *)array + count * elem_size; + *pcount += need; + return items; +} + +// Strange resizing-array macro. +// Adds room for need elements to the array parser.field, +// and returns a pointer to the first one. +#define parser_append(parser, field, need) \ + parser_append_(parser, &parser->field.array, sizeof parser->field.array[0], &parser->field.count, &parser->field.capacity, need) +#define parser_append_one(parser, field) \ + parser_append(parser, field, 1) + +static void parser_error(struct parser *parser, PRINTF_FORMAT_STRING const char *fmt, ...) ATTRIBUTE_PRINTF(2, 3); +static void +parser_error(struct parser *parser, const char *fmt, ...) { + if (parser->out_of_memory) return; + if (parser->errors.count >= 1000) return; // don't bother at this point. + va_list args, args_copy; + va_start(args, fmt); + va_copy(args_copy, args); + bool bad_fmt = false; + int error_len = vsnprintf(NULL, 0, fmt, args); + va_end(args); + if (error_len < 0) { + // could happen with a >INT_MAX-sized string, for example + bad_fmt = true; + error_len = strlen(fmt); + va_end(args_copy); + } + if (error_len > 1000) + error_len = 1000; // truncate very long errors + char *message = parser_append(parser, error_messages, error_len + 1); + uint32_t message_idx = message - parser->error_messages.array; + if (bad_fmt) { + // use fmt as error message + if (message) strcpy(message, fmt); + } else { + // (does nothing if message is NULL, but ends args_copy) + vsnprintf(message, message ? error_len + 1 : 0, fmt, args_copy); + } + if (!message) return; + struct parser_error *error = parser_append_one(parser, errors); + if (!error) return; + error->line = parser->line_number; + error->message = message_idx; +} + +// Reads into parser->line_buf. +static void +parser_read_line(struct parser *parser) { + parser->line.count = 0; + while (true) { + char *line_out = parser_append(parser, line, sizeof parser->buf + 1); + if (!line_out) return; + while (parser->buf_pos < parser->buf_count) { + char c = parser->buf[parser->buf_pos++]; + if (c == '\n') { + *line_out = 0; + return; + } + *line_out++ = c; + } + // ensure next append goes in the right place. + parser->line.count = line_out - parser->line.array; + // read more data into buf + size_t read_count = parser->read_func(parser->userdata, parser->buf, sizeof parser->buf - 1); + parser->buf_pos = 0; + if (read_count == 0) { + // EOF reached. + parser->eof = true; + *line_out = 0; + return; + } + if (parser->leftover_cr && parser->buf[0] != '\n') + parser_error(parser, "Carriage return with no newline after it."); + size_t out = 0; + uint64_t original_line_number = parser->line_number; + for (size_t in = 0; in < read_count; in++) { + char c = parser->buf[in]; + if (c == '\r') { + if (in == read_count - 1) { + parser->leftover_cr = true; + } else if (parser->buf[in + 1] != '\n') { + parser_error(parser, "Carriage return with no newline after it."); + } + } else if (c == '\n') { + parser->buf[out++] = c; + parser->line_number++; + } else if (c >= 0 && c < 32 && c != '\t') { + parser_error(parser, "Illegal control character (ASCII code %d)", c); + } else { + parser->buf[out++] = c; + } + } + parser->line_number = original_line_number; + parser->buf_count = out; + } +} + +static void +parse_line(struct parser *parser) { + parser_read_line(parser); + char *line = parser->line.array; + { + // remove leading white space + size_t i; + for (i = 0; line[i] == '\t' || line[i] == ' '; i++); + memmove(line, line + i, strlen(line) + 1 - i); + } + if (line[0] == 0 || line[0] == '#') { + // blank line/comment + return; + } + printf("|%s\n",line); +} + pom_conf * pom_load(const char *filename, size_t (*read_func)(void *userdata, char *buf, size_t len), @@ -143,9 +326,63 @@ pom_load(const char *filename, fatal_error("%s called with NULL file name", __func__); if (!read_func) fatal_error("%s called with NULL read function", __func__); - // TODO if (error) *error = NULL; - return NULL; + // Start by allocating out-of-memory error, so we can just return + // it if we run out of memory. + pom_error *out_of_memory = make_error(filename, 1, "Out of memory."); + if (!out_of_memory) return NULL; + struct parser *parser = calloc(1, sizeof *parser); + if (!parser) { + if (error) *error = out_of_memory; + return NULL; + } + parser->filename = filename; + parser->out_of_memory_error = out_of_memory; + parser->read_func = read_func; + parser->userdata = userdata; + parser->line_number = 1; + + while (!(parser->eof || parser->out_of_memory)) + parse_line(parser); + + bool success = true; + if (parser->out_of_memory) { + success = false; + if (error) *error = parser->out_of_memory_error; + } else if (parser->errors.count) { + success = false; + if (error) { + // shouldn't overflow on 32+-bit systems, + // given the limits we impose on errors. + size_t len = parser->errors.count * sizeof(pom_error) + parser->error_messages.count; + // convert parser_errors to pom_error. + pom_error *errors = malloc(len); + if (errors) { + char *messages = (char *)(errors + parser->errors.count); + memcpy(messages, + parser->error_messages.array, + parser->error_messages.count); + for (size_t i = 0; i < parser->errors.count; i++) { + const struct parser_error *parser_error = &parser->errors.array[i]; + errors[i].file = parser->filename; + errors[i].line = parser_error->line; + errors[i].message = messages + parser_error->message; + errors[i].next = i == parser->errors.count - 1 ? NULL : &errors[i+1]; + } + *error = errors; + } else { + *error = parser->out_of_memory_error; + } + } + free(parser->errors.array); + free(parser->error_messages.array); + } + if (!error || *error != out_of_memory) { + free(out_of_memory); + } + free(parser->line.array); + free(parser); + return success ? NULL : NULL; } static size_t |