/* TODO: - check for valid UTF-8 */ #include "pom.h" #include // still needed for sprintf, even if POM_NO_STDIO is defined. #include #include #include #include #include #include #if __GNUC__ >= 6 #define ATTRIBUTE_PRINTF(fmt, args) __attribute__ ((format(printf, fmt, args))) #else #define ATTRIBUTE_PRINTF(fmt, args) #endif #if _MSC_VER >= 1600 #define PRINTF_FORMAT_STRING _Printf_format_string_ #else #define PRINTF_FORMAT_STRING #endif struct pom_error { const pom_error *next; const char *file; uint64_t line; const char *message; }; struct main_conf; struct pom_conf { struct main_conf *main; size_t prefix_len; const struct conf_item *items; size_t items_count; }; struct conf_items { const char *key; const char *value; const pom_conf *section; }; struct main_conf { struct conf_item *items; size_t items_count; }; // temporary error that is eventually converted to a pom_error struct parser_error { uint64_t line; // index into parser->error_messages.array uint32_t message; }; // type for parser::utf8_state enum utf8_state { UTF8_STATE_DEFAULT = 0, // want 1 continuation byte UTF8_STATE_1CONT = 1, // want 2 continuation bytes UTF8_STATE_2CONT = 2, // want 3 continuation bytes UTF8_STATE_3CONT = 3, // want 2 continuation bytes, first one must be >=0xA0 (otherwise encoding is overlong) UTF8_STATE_2CONT_GTEQ_A0 = 4, // want 2 continuation bytes, first one must be <0xA0 (otherwise encodes a UTF-16 surrogate) UTF8_STATE_2CONT_LT_A0 = 5, // want 3 continuation bytes, first one must be >=0x90 (otherwise encodoing is overlong) UTF8_STATE_3CONT_GTEQ_90 = 6, // want 3 continuation bytes, first one must be <0x90 (otherwise encoding produces oversized code point) UTF8_STATE_3CONT_LT_90 = 7, }; struct parser { const char *filename; uint64_t line_number; size_t (*read_func)(void *, char *, size_t); void *userdata; pom_error *out_of_memory_error; struct { char *array; size_t count, capacity; } line; struct { struct parser_error *array; size_t count, capacity; } errors; struct { char *array; size_t count, capacity; } error_messages; bool short_read, eof, out_of_memory, leftover_cr; // see enum utf8_state -- starting state for future calls to read_func uint8_t utf8_state; uint16_t buf_pos; uint16_t buf_count; char buf[4096]; }; #ifdef POM_NO_STDIO #define fatal_error(...) abort() #else // fatal_error should only be called when the API is misused // (e.g. `NULL` argument that shouldn't be `NULL`). static void fatal_error(PRINTF_FORMAT_STRING const char *fmt, ...) ATTRIBUTE_PRINTF(1, 2); static void fatal_error(const char *fmt, ...) { va_list args; va_start(args, fmt); vfprintf(stderr, fmt, args); va_end(args); fprintf(stderr, "\n"); abort(); } #endif // Make an error with no next-error. static pom_error *make_error(PRINTF_FORMAT_STRING const char *file, uint64_t line, const char *fmt, ...) ATTRIBUTE_PRINTF(3, 4); static pom_error * make_error(const char *file, uint64_t line, const char *fmt, ...) { va_list args, args_copy; va_start(args, fmt); va_copy(args_copy, args); bool bad_fmt = false; int len = vsnprintf(NULL, 0, fmt, args); if (len < 0 || len > INT_MAX - sizeof(pom_error) - 1) { // Could technically happen if %s gets a really long string. // Just use fmt as the error in this case. bad_fmt = true; len = strlen(fmt); } pom_error *err = malloc(sizeof(pom_error) + len + 1); if (err) { char *message = (char *)(err + 1); if (bad_fmt) { strcpy(message, fmt); } else { vsnprintf(message, len + 1, fmt, args_copy); } err->file = file; err->line = line; err->message = message; err->next = NULL; } va_end(args_copy); return err; } const pom_error * pom_error_next(const pom_error *error) { if (!error) return NULL; return error->next; } const char * pom_error_file(const pom_error *error) { if (!error) fatal_error("%s called with NULL argument", __func__); return error->file; } uint64_t pom_error_line(const pom_error *error) { if (!error) fatal_error("%s called with NULL argument", __func__); return error->line; } const char * pom_error_message(const pom_error *error) { if (!error) fatal_error("%s called with NULL argument", __func__); return error->message; } #ifndef POM_NO_STDIO void pom_error_print(const pom_error *error) { if (!error) { fprintf(stderr, "No error.\n"); return; } fprintf(stderr, "Error:\n"); for (; error; error = pom_error_next(error)) { fprintf(stderr, "%s:%" PRIu64 ": %s\n", error->file, error->line, error->message); } } #endif static void parser_out_of_memory(struct parser *parser) { parser->out_of_memory = true; } static void * parser_append_(struct parser *parser, void *ptr, size_t elem_size, size_t *pcount, size_t *pcapacity, size_t need) { size_t count = *pcount; size_t capacity = *pcapacity; // this is bad if not all pointer types have the same representation. // I really hope we don't have to worry about that in 2025. void **parray = ptr; void *array = *parray; // these checks are overly strict to avoid arithmetic overflow. if (count >= SIZE_MAX / 8 / elem_size || need >= SIZE_MAX / 8 / elem_size) { parser_out_of_memory(parser); return NULL; } if (count + need > capacity) { size_t new_capacity = (count + need) * 3 / 2 + 2; array = realloc(array, new_capacity * elem_size); if (!array) { parser_out_of_memory(parser); return NULL; } *parray = array; *pcapacity = new_capacity; } void *items = (char *)array + count * elem_size; *pcount += need; return items; } // Strange resizing-array macro. // Adds room for need elements to the array parser.field, // and returns a pointer to the first one. #define parser_append(parser, field, need) \ parser_append_(parser, &parser->field.array, sizeof parser->field.array[0], &parser->field.count, &parser->field.capacity, need) #define parser_append_one(parser, field) \ parser_append(parser, field, 1) static void parser_error(struct parser *parser, PRINTF_FORMAT_STRING const char *fmt, ...) ATTRIBUTE_PRINTF(2, 3); static void parser_error(struct parser *parser, const char *fmt, ...) { if (parser->out_of_memory) return; if (parser->errors.count >= 1000) return; // don't bother at this point. va_list args, args_copy; va_start(args, fmt); va_copy(args_copy, args); bool bad_fmt = false; int error_len = vsnprintf(NULL, 0, fmt, args); va_end(args); if (error_len < 0) { // could happen with a >INT_MAX-sized string, for example bad_fmt = true; error_len = strlen(fmt); va_end(args_copy); } if (error_len > 1000) error_len = 1000; // truncate very long errors char *message = parser_append(parser, error_messages, error_len + 1); uint32_t message_idx = message - parser->error_messages.array; if (bad_fmt) { // use fmt as error message if (message) strcpy(message, fmt); } else { // (does nothing if message is NULL, but ends args_copy) vsnprintf(message, message ? error_len + 1 : 0, fmt, args_copy); } if (!message) return; struct parser_error *error = parser_append_one(parser, errors); if (!error) return; error->line = parser->line_number; error->message = message_idx; } // read more data into parser->buf. returns false on EOF. static bool parser_read_to_buf(struct parser *parser, bool skip_bom) { if (parser->eof) return false; uint8_t utf8_state = parser->utf8_state; if (parser->short_read) { // last read was short, so we're at EOF // EOF reached. eof: if (utf8_state) { parser_error(parser, "Invalid UTF-8 (want continuation byte, got EOF)."); } parser->eof = true; return false; } char *buf = parser->buf; size_t read_count = parser->read_func(parser->userdata, buf, sizeof parser->buf - 1); parser->buf_pos = 0; if (read_count == 0) goto eof; if (read_count < sizeof parser->buf - 1) parser->short_read = true; parser->utf8_state = utf8_state; if (parser->leftover_cr && buf[0] != '\n') parser_error(parser, "Carriage return with no newline after it."); size_t in = 0, out = 0; uint64_t original_line_number = parser->line_number; if (skip_bom && read_count >= 3 && (uint8_t)parser->buf[0] == 0xEF && (uint8_t)parser->buf[1] == 0xBB && (uint8_t)parser->buf[2] == 0xBF) { // skip byte-order mark in = 3; } for (; in < read_count; in++) { uint8_t byte = buf[in]; if (utf8_state == 0) { if (byte < 0x80) { // ASCII if (byte == '\r') { if (in == read_count - 1) { parser->leftover_cr = true; } else if (buf[in + 1] != '\n') { parser_error(parser, "Carriage return with no newline after it."); } continue; } else if (byte == '\n') { parser->line_number++; } else if (byte >= 0 && byte < 32 && byte != '\t') { parser_error(parser, "Illegal control character (ASCII code %d)", byte); continue; } } else if (byte < 0xC2) { utf8_invalid_start_byte: parser_error(parser, "Invalid UTF-8 (invalid start byte 0x%02X)", byte); continue; } else if (byte < 0xE0) { // 2-byte sequence utf8_state = UTF8_STATE_1CONT; } else if (byte == 0xE0) { // 3-byte sequence; must check for overlongness utf8_state = UTF8_STATE_2CONT_GTEQ_A0; } else if (byte == 0xED) { // 3-byte sequence; must check for UTF-16 surrogate utf8_state = UTF8_STATE_2CONT_LT_A0; } else if (byte < 0xF0) { // 3-byte sequence utf8_state = UTF8_STATE_2CONT; } else if (byte == 0xF0) { // 4-byte sequence; must check for overlongness utf8_state = UTF8_STATE_3CONT_GTEQ_90; } else if (byte < 0xF4) { // 4-byte sequence utf8_state = UTF8_STATE_3CONT; } else if (byte == 0xF4) { // 4-byte sequence; must check for too-big code points utf8_state = UTF8_STATE_3CONT_LT_90; } else { goto utf8_invalid_start_byte; } } else if (utf8_state == UTF8_STATE_1CONT || utf8_state == UTF8_STATE_2CONT || utf8_state == UTF8_STATE_3CONT) { utf8_state -= 1; if ((byte & 0xC0) != 0x80) { parser_error(parser, "Invalid UTF-8 (want continuation byte, got 0x%02X)", byte); continue; } } else if (utf8_state == UTF8_STATE_2CONT_GTEQ_A0) { utf8_state = UTF8_STATE_1CONT; if (byte < 0xA0 || (byte & 0xC0) != 0x80) { parser_error(parser, "Invalid UTF-8 (want continuation byte >= 0xA0, got 0x%02X)", byte); continue; } } else if (utf8_state == UTF8_STATE_2CONT_LT_A0) { utf8_state = UTF8_STATE_1CONT; if (byte >= 0xA0 || (byte & 0xC0) != 0x80) { parser_error(parser, "Invalid UTF-8 (want continuation byte < 0xA0, got 0x%02X)", byte); continue; } } else if (utf8_state == UTF8_STATE_3CONT_GTEQ_90) { utf8_state = UTF8_STATE_2CONT; if (byte < 0x90 || (byte & 0xC0) != 0x80) { parser_error(parser, "Invalid UTF-8 (want continuation byte >= 0x90, got 0x%02X)", byte); continue; } } else if (utf8_state == UTF8_STATE_3CONT_LT_90) { utf8_state = UTF8_STATE_2CONT; if (byte >= 0x90 || (byte & 0xC0) != 0x80) { parser_error(parser, "Invalid UTF-8 (want continuation byte < 0x90, got 0x%02X)", byte); continue; } } else { abort(); // should be unreachable. } buf[out++] = byte; } parser->line_number = original_line_number; parser->buf_count = out; return true; } // Reads into parser->line_buf. static void parser_read_line(struct parser *parser) { if (parser->eof) { parser->line.array[0] = 0; return; } parser->line_number += 1; parser->line.count = 0; while (true) { char *line_out = parser_append(parser, line, sizeof parser->buf + 1); if (!line_out) return; while (parser->buf_pos < parser->buf_count) { char c = parser->buf[parser->buf_pos++]; if (c == '\n') { *line_out = 0; return; } *line_out++ = c; } // ensure next append goes in the right place. parser->line.count = line_out - parser->line.array; if (!parser_read_to_buf(parser, false)) { // reached EOF *line_out = 0; return; } } } static void parse_line(struct parser *parser) { parser_read_line(parser); char *line = parser->line.array; { // remove leading white space size_t i; for (i = 0; line[i] == '\t' || line[i] == ' '; i++); memmove(line, line + i, strlen(line) + 1 - i); } if (line[0] == 0 || line[0] == '#') { // blank line/comment return; } printf("|%s\n",line); } pom_conf * pom_load(const char *filename, size_t (*read_func)(void *userdata, char *buf, size_t len), void *userdata, pom_error **error) { if (!filename) fatal_error("%s called with NULL file name", __func__); if (!read_func) fatal_error("%s called with NULL read function", __func__); if (error) *error = NULL; // Start by allocating out-of-memory error, so we can just return // it if we run out of memory. pom_error *out_of_memory = make_error(filename, 1, "Out of memory."); if (!out_of_memory) return NULL; struct parser *parser = calloc(1, sizeof *parser); if (!parser) { if (error) *error = out_of_memory; return NULL; } parser->filename = filename; parser->out_of_memory_error = out_of_memory; parser->read_func = read_func; parser->userdata = userdata; // read into parser->buf, and skip initial BOM if present. parser_read_to_buf(parser, true); while (!(parser->eof || parser->out_of_memory)) parse_line(parser); bool success = true; if (parser->out_of_memory) { success = false; if (error) *error = parser->out_of_memory_error; } else if (parser->errors.count) { success = false; if (error) { // shouldn't overflow on 32+-bit systems, // given the limits we impose on errors. size_t len = parser->errors.count * sizeof(pom_error) + parser->error_messages.count; // convert parser_errors to pom_error. pom_error *errors = malloc(len); if (errors) { char *messages = (char *)(errors + parser->errors.count); memcpy(messages, parser->error_messages.array, parser->error_messages.count); for (size_t i = 0; i < parser->errors.count; i++) { const struct parser_error *parser_error = &parser->errors.array[i]; errors[i].file = parser->filename; errors[i].line = parser_error->line; errors[i].message = messages + parser_error->message; errors[i].next = i == parser->errors.count - 1 ? NULL : &errors[i+1]; } *error = errors; } else { *error = parser->out_of_memory_error; } } free(parser->errors.array); free(parser->error_messages.array); } if (!error || *error != out_of_memory) { free(out_of_memory); } free(parser->line.array); free(parser); return success ? NULL : NULL; } static size_t read_string(void *vpstring, char *buf, size_t len) { const char **pstring = vpstring; const char *string = *pstring; size_t i; for (i = 0; i < len; i++, string++) { if (*string == 0) break; buf[i] = *string; } *pstring = string; return i; } pom_conf * pom_load_string(const char *filename, const char *string, pom_error **error) { return pom_load(filename, read_string, &string, error); } #ifndef POM_NO_STDIO static size_t read_file(void *file, char *buf, size_t len) { return fread(buf, 1, len, file); } pom_conf * pom_load_file(const char *filename, FILE *file, pom_error **error) { if (!filename) fatal_error("%s called with NULL file name", __func__); if (!file) fatal_error("%s called with NULL file", __func__); return pom_load(filename, read_file, file, error); } pom_conf * pom_load_path(const char *path, pom_error **error) { if (!path) fatal_error("%s called with NULL file name", __func__); FILE *fp = fopen(path, "rb"); if (!fp) { if (error) { const char *message = strerror(errno); *error = make_error(path, 1, "Couldn't open file: %s", message); } return NULL; } pom_conf *conf = pom_load_file(path, fp, error); fclose(fp); return conf; } #endif