From e4f62b4ea93b2e1a5fe18366d99d5fa2220eea34 Mon Sep 17 00:00:00 2001 From: pommicket Date: Sat, 13 Sep 2025 23:09:05 -0400 Subject: Parse quoted values --- examples/conf.pom | 3 +- pom.c | 130 ++++++++++++++++++++++++++++++++++++++++++++++++++++-- pre-commit.sh | 11 +---- 3 files changed, 130 insertions(+), 14 deletions(-) diff --git a/examples/conf.pom b/examples/conf.pom index cc8404f..ba10cca 100644 --- a/examples/conf.pom +++ b/examples/conf.pom @@ -11,6 +11,7 @@ Hello = 5 [number] one = 1 two = 2 - three = 3 + three = "é + yippee" [] thing=yup diff --git a/pom.c b/pom.c index 9cf4f98..abeff17 100644 --- a/pom.c +++ b/pom.c @@ -341,6 +341,13 @@ parser_append_(struct parser *parser, void *ptr, size_t elem_size, size_t *pcoun #define parser_append_one(parser, field) \ parser_append(parser, field, 1) +// append a character to parser->string_data +static void +parser_append_char(struct parser *parser, char c) { + char *pc = parser_append_one(parser, string_data); + if (pc) *pc = c; +} + static void parser_error(struct parser *parser, PRINTF_FORMAT_STRING const char *fmt, ...) ATTRIBUTE_PRINTF(2, 3); static void parser_error(struct parser *parser, const char *fmt, ...) { @@ -571,10 +578,123 @@ check_valid_key(struct parser *parser, const char *key) { } } +static int +parse_hex_digit(char c) { + if (c < '0') return -1; + if (c <= '9') return c - '0'; + c &= 0xdf; + if (c < 'A') return -1; + if (c <= 'F') return c - 'A' + 10; + return -1; +} + +// parse escape sequence in *p_str, advancing *p_str past it. +static void +parse_escape_sequence(struct parser *parser, const char **p_str) { + const char *str = *p_str; + switch (*str++) { + invalid_sequence: { + int len = (int)(str - *p_str); + parser_error(parser, "Invalid escape sequence: \\%.*s", len, *p_str); + return; + } break; + case 'n': + parser_append_char(parser, '\n'); + break; + case 't': + parser_append_char(parser, '\t'); + break; + case 'r': + parser_append_char(parser, '\r'); + break; + case '\\': + parser_append_char(parser, '\\'); + break; + case '"': + parser_append_char(parser, '"'); + break; + case '\'': + parser_append_char(parser, '\''); + break; + case '`': + parser_append_char(parser, '`'); + break; + case ',': + parser_append_char(parser, '\\'); + parser_append_char(parser, ','); + break; + case 'x': { + int dig1 = parse_hex_digit(*str++); + if (dig1 < 0) goto invalid_sequence; + int dig2 = parse_hex_digit(*str++); + if (dig2 < 0) goto invalid_sequence; + int value = dig1 << 4 | dig2; + if (value == 0 || value > 0x7f) goto invalid_sequence; + parser_append_char(parser, value); + } break; + case 'u': { + if (*str++ != '{') goto invalid_sequence; + uint_fast32_t value = 0; + char c; + while ((c = *str++) != '}') { + int digit = parse_hex_digit(c); + if (digit < 0) goto invalid_sequence; + value <<= 4; + value |= digit; + if (value > 0x10ffff) goto invalid_sequence; + } + if (value >= 0xd800 && value <= 0xdfff) + goto invalid_sequence; // utf-16 surrogate + if (value < 0x80) { + // ASCII + parser_append_char(parser, value); + } else if (value < 0x800) { + // two-byte sequence + parser_append_char(parser, 0xc0 | value >> 6); + parser_append_char(parser, 0x80 | (value & 63)); + } else if (value < 0x10000) { + // three-byte sequence + parser_append_char(parser, 0xe0 | value >> 12); + parser_append_char(parser, 0x80 | ((value >> 6) & 63)); + parser_append_char(parser, 0x80 | (value & 63)); + } else { + // four-byte sequence + parser_append_char(parser, 0xf0 | value >> 18); + parser_append_char(parser, 0x80 | ((value >> 12) & 63)); + parser_append_char(parser, 0x80 | ((value >> 6) & 63)); + parser_append_char(parser, 0x80 | (value & 63)); + } + } break; + default: + goto invalid_sequence; + } + *p_str = str; +} + static void parse_quoted_value(struct parser *parser, const char *first_line) { - // TODO - abort(); + const char *line = first_line; + char delimiter = *line++; + assert(delimiter == '"' || delimiter == '`'); + while (!parser->eof && !parser->out_of_memory) { + char c; + while ((c = *line++)) { + if (c == delimiter) + goto finish; + if (c == '\\') { + parse_escape_sequence(parser, &line); + } else { + parser_append_char(parser, c); + } + } + parser_read_line(parser); + char *newline = parser_append_one(parser, string_data); + if (!newline) return; + *newline = '\n'; + line = parser->line.array; + } +finish:; + parser_append_char(parser, 0); } static void @@ -647,10 +767,12 @@ parse_line(struct parser *parser) { char *value = &line[value_start_idx]; strip_trailing_accepted_spaces(value); size_t value_sz = strlen(value) + 1; - memcpy(parser_append(parser, string_data, value_sz), - value, value_sz); + char *value_out = parser_append(parser, string_data, value_sz); + if (!value_out) return; + memcpy(value_out, value, value_sz); } struct parser_item *item = parser_append_one(parser, items); + if (!item) return; item->key = key_idx; item->value = value_idx; item->line = parser->line_number; diff --git a/pre-commit.sh b/pre-commit.sh index 6841365..85253dd 100755 --- a/pre-commit.sh +++ b/pre-commit.sh @@ -1,11 +1,4 @@ #!/bin/sh -if sed --version | grep -q 'GNU sed'; then - for file in pom.c pom.h; do - # Remove trailing white space - # (But only if file actually has trailing white space - # we don't want to mess up last-modified-times otherwise) - grep -q '\s\s*$' $file && sed -i 's/\s\s*$//' $file - done -fi + +# Ensure no doxygen errors which doxygen >/dev/null 2>/dev/null && { doxygen || exit 1; } -git add -u -- cgit v1.2.3