From a835e738761238b891847c8303592b504018f017 Mon Sep 17 00:00:00 2001 From: pommicket Date: Fri, 12 Sep 2025 00:47:39 -0400 Subject: Parse section headers --- examples/conf.pom | 4 +- examples/read_conf.c | 1 + pom.c | 190 +++++++++++++++++++++++++++++++++++++++------------ pom.h | 14 +++- 4 files changed, 160 insertions(+), 49 deletions(-) diff --git a/examples/conf.pom b/examples/conf.pom index efff968..1054e41 100644 --- a/examples/conf.pom +++ b/examples/conf.pom @@ -1,7 +1,9 @@ -[0oo] +[0oo.eeee°] bar = 10 # testing # here's another comment +[jibjabjobdsfajkha] + win = yes diff --git a/examples/read_conf.c b/examples/read_conf.c index b18be3a..091255c 100644 --- a/examples/read_conf.c +++ b/examples/read_conf.c @@ -1,4 +1,5 @@ #include +#include #include "pom.h" diff --git a/pom.c b/pom.c index 2179df3..f135b6b 100644 --- a/pom.c +++ b/pom.c @@ -73,14 +73,14 @@ enum utf8_state { }; struct parser { - const char *filename; + char *filename; uint64_t line_number; size_t (*read_func)(void *, char *, size_t); void *userdata; pom_error *out_of_memory_error; struct { char *array; - size_t count, capacity; + size_t capacity; } line; struct { struct parser_error *array; @@ -90,6 +90,10 @@ struct parser { char *array; size_t count, capacity; } error_messages; + struct { + char *array; + size_t capacity; + } current_section; bool short_read, eof, out_of_memory, leftover_cr; // see enum utf8_state -- starting state for future calls to read_func uint8_t utf8_state; @@ -193,37 +197,55 @@ parser_out_of_memory(struct parser *parser) { parser->out_of_memory = true; } -static void * -parser_append_(struct parser *parser, void *ptr, size_t elem_size, size_t *pcount, size_t *pcapacity, - size_t need) { - size_t count = *pcount; +static POM__MUST_USE_L bool parser_realloc_(struct parser *parser, void *ptr, size_t elem_size, size_t *pcapacity, size_t new_capacity) POM__MUST_USE_R; +static bool +parser_realloc_(struct parser *parser, void *ptr, size_t elem_size, size_t *pcapacity, size_t new_capacity) { size_t capacity = *pcapacity; - // this is bad if not all pointer types have the same representation. - // I really hope we don't have to worry about that in 2025. - void **parray = ptr; - void *array = *parray; - // these checks are overly strict to avoid arithmetic overflow. - if (count >= SIZE_MAX / 8 / elem_size || need >= SIZE_MAX / 8 / elem_size) { - parser_out_of_memory(parser); - return NULL; - } - if (count + need > capacity) { - size_t new_capacity = (count + need) * 3 / 2 + 2; + if (new_capacity > capacity) { + // this check is overly strict to avoid arithmetic overflow. + if (new_capacity >= SIZE_MAX / 4 / elem_size) { + parser_out_of_memory(parser); + return false; + } + // this is bad if not all pointer types have the same representation. + // I really hope we don't have to worry about that in 2025. + void **parray = ptr; + void *array = *parray; + new_capacity = new_capacity * 3 / 2 + 2; array = realloc(array, new_capacity * elem_size); if (!array) { parser_out_of_memory(parser); - return NULL; + return false; } *parray = array; *pcapacity = new_capacity; } - void *items = (char *)array + count * elem_size; - *pcount += need; - return items; + return true; } // Strange resizing-array macro. -// Adds room for need elements to the array parser.field, + + +static void * +parser_append_(struct parser *parser, void *ptr, size_t elem_size, size_t *pcount, size_t *pcapacity, + size_t need) { + size_t old_count = *pcount; + // ensure addition below doesn't overflow + if (need >= SIZE_MAX / 8 - old_count) { + parser_out_of_memory(parser); + return NULL; + } + if (parser_realloc_(parser, ptr, elem_size, pcapacity, old_count + need)) { + *pcount += need; + return *(char **)ptr + elem_size * old_count; + } else { + return NULL; + } +} + +#define parser_realloc(parser, field, new_capacity) \ + parser_realloc_(parser, &parser->field.array, sizeof parser->field.array[0], &parser->field.capacity, new_capacity) +// Adds room for `need` elements to the array `parser.field`, // and returns a pointer to the first one. #define parser_append(parser, field, need) \ parser_append_(parser, &parser->field.array, sizeof parser->field.array[0], &parser->field.count, &parser->field.capacity, need) @@ -394,43 +416,110 @@ parser_read_line(struct parser *parser) { return; } parser->line_number += 1; - parser->line.count = 0; + size_t line_count = 0; while (true) { - char *line_out = parser_append(parser, line, sizeof parser->buf + 1); - if (!line_out) return; + // NB: addition will not realistically overflow. + if (!parser_realloc(parser, line, line_count + sizeof parser->buf + 1)) + return; + char *line = parser->line.array; while (parser->buf_pos < parser->buf_count) { char c = parser->buf[parser->buf_pos++]; if (c == '\n') { - *line_out = 0; + line[line_count] = 0; return; } - *line_out++ = c; + line[line_count++] = c; } - // ensure next append goes in the right place. - parser->line.count = line_out - parser->line.array; if (!parser_read_to_buf(parser, false)) { // reached EOF - *line_out = 0; + line[line_count] = 0; return; } } } +static void +strip_leading_accepted_spaces(char *s) { + size_t i; + for (i = 0; s[i] == '\t' || s[i] == ' '; i++); + memmove(s, s + i, strlen(s) + 1 - i); +} + +static void +strip_trailing_accepted_spaces(char *s) { + size_t i = strlen(s); + while (i > 0) { + i--; + if (!(s[i] == '\t' || s[i] == ' ')) break; + s[i] = 0; + } +} + +static void +check_valid_key(struct parser *parser, const char *key) { + uint8_t c; + if (key[0] == '.') + parser_error(parser, "Key shouldn't begin with .: %s", key); + for (size_t i = 0; (c = key[i]); i++) { + bool bad = false; + if (c < 64) { + if (c == '.') { + if (key[i+1] == 0) { + parser_error(parser, "Key shouldn't end with .: %s", key); + } else if (key[i+1] == '.') { + parser_error(parser, "Key shouldn't contain ..: %s", key); + } + } + // bitmask of disallowed ASCII characters 0-63 + bad = (0xfc001bffffffffffU >> c) & 1; + } else if (c < 128) { + // bitmask of disallowed ASCII characters 64-127 + bad = (0xfc0000017c000001U >> c) & 1; + } + if (bad) { + parser_error(parser, "Invalid character in key: '%c' (ASCII %d)", c, c); + } + } +} + static void parse_line(struct parser *parser) { parser_read_line(parser); char *line = parser->line.array; - { - // remove leading white space - size_t i; - for (i = 0; line[i] == '\t' || line[i] == ' '; i++); - memmove(line, line + i, strlen(line) + 1 - i); - } + if (!line) return; // OOM + strip_leading_accepted_spaces(line); if (line[0] == 0 || line[0] == '#') { // blank line/comment return; } - printf("|%s\n",line); + if (line[0] == '[') { + strip_trailing_accepted_spaces(line); + size_t len = strlen(line); + if (line[len-1] != ']') { + parser_error(parser, "Missing ] to match ["); + return; + } + line += 1; + len -= 2; + if (!parser_realloc(parser, current_section, len + 1)) + return; + char *current_section = parser->current_section.array; + memcpy(current_section, line, len); + current_section[len] = 0; + if (len) + check_valid_key(parser, current_section); + return; + } + printf("%s|%s\n",parser->current_section.array,line); +} + +static void +set_error(pom_error **error, pom_error *e) { + if (error) { + *error = e; + } else { + free(e); + } } pom_conf * @@ -446,15 +535,24 @@ pom_load(const char *filename, // it if we run out of memory. pom_error *out_of_memory = make_error(filename, 1, "Out of memory."); if (!out_of_memory) return NULL; - struct parser *parser = calloc(1, sizeof *parser); + char *current_section = calloc(1, 1); + if (!current_section) { + set_error(error, out_of_memory); + return NULL; + } + struct parser *parser = calloc(1, sizeof *parser + strlen(filename) + 1); if (!parser) { - if (error) *error = out_of_memory; + free(current_section); + set_error(error, out_of_memory); return NULL; } - parser->filename = filename; + // store copy of filename just after parser in memory + strcpy((char *)(parser + 1), filename); + parser->filename = (char *)(parser + 1); parser->out_of_memory_error = out_of_memory; parser->read_func = read_func; parser->userdata = userdata; + parser->current_section.array = current_section; // read into parser->buf, and skip initial BOM if present. parser_read_to_buf(parser, true); while (!(parser->eof || parser->out_of_memory)) @@ -463,13 +561,12 @@ pom_load(const char *filename, bool success = true; if (parser->out_of_memory) { success = false; - if (error) *error = parser->out_of_memory_error; + set_error(error, out_of_memory); } else if (parser->errors.count) { success = false; if (error) { - // shouldn't overflow on 32+-bit systems, - // given the limits we impose on errors. - size_t len = parser->errors.count * sizeof(pom_error) + parser->error_messages.count; + // shouldn't overflow + size_t len = parser->errors.count * sizeof(pom_error) + parser->error_messages.count + strlen(filename) + 1; // convert parser_errors to pom_error. pom_error *errors = malloc(len); if (errors) { @@ -477,9 +574,11 @@ pom_load(const char *filename, memcpy(messages, parser->error_messages.array, parser->error_messages.count); + char *filename = (char *)messages + parser->error_messages.count; + strcpy(filename, parser->filename); for (size_t i = 0; i < parser->errors.count; i++) { const struct parser_error *parser_error = &parser->errors.array[i]; - errors[i].file = parser->filename; + errors[i].file = filename; errors[i].line = parser_error->line; errors[i].message = messages + parser_error->message; errors[i].next = i == parser->errors.count - 1 ? NULL : &errors[i+1]; @@ -496,6 +595,7 @@ pom_load(const char *filename, free(out_of_memory); } free(parser->line.array); + free(parser->current_section.array); free(parser); return success ? NULL : NULL; } diff --git a/pom.h b/pom.h index f76c30d..f37c974 100644 --- a/pom.h +++ b/pom.h @@ -3,8 +3,8 @@ /// /// ## Thread-safety /// -/// Of course, you should not free a configuration while -/// another thread is using it (even through a section +/// Of course, you should not free or \ref pom_conf_merge into +/// a configuration while another thread is using it (even through a section /// obtained via \ref pom_conf_section). /// /// Other than that, all these functions are fully thread-safe @@ -32,6 +32,10 @@ #include #include +#ifdef __cplusplus +extern "C" { +#endif + #ifndef POM__MUST_USE_L #if _MSC_VER >= 1700 // supposedly was added in VS2012 #define POM__MUST_USE_L _Check_return_ @@ -167,7 +171,7 @@ pom_error_message(const pom_error *error); /// Get the name of the file where this error occured. /// -/// The returned pointer is valid until \ref pom_conf_free is called. +/// The returned pointer is valid until the error is freed. const char * pom_error_file(const pom_error *error); @@ -445,4 +449,8 @@ pom_conf_print(const pom_conf *conf); void pom_conf_free(pom_conf *conf); +#ifdef __cplusplus +} // extern "C" #endif + +#endif // POM_H_ -- cgit v1.2.3