summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorpommicket <pommicket@gmail.com>2025-09-13 23:09:05 -0400
committerpommicket <pommicket@gmail.com>2025-09-13 23:09:05 -0400
commite4f62b4ea93b2e1a5fe18366d99d5fa2220eea34 (patch)
tree08753822add2cc221b8d3c96bd2242e95f2abfc5
parentca21dcb53e41919fc255ad736986aa6ff1d5bd85 (diff)
Parse quoted values
-rw-r--r--examples/conf.pom3
-rw-r--r--pom.c130
-rwxr-xr-xpre-commit.sh11
3 files changed, 130 insertions, 14 deletions
diff --git a/examples/conf.pom b/examples/conf.pom
index cc8404f..ba10cca 100644
--- a/examples/conf.pom
+++ b/examples/conf.pom
@@ -11,6 +11,7 @@ Hello = 5
[number]
one = 1
two = 2
- three = 3
+ three = "é
+ yippee"
[]
thing=yup
diff --git a/pom.c b/pom.c
index 9cf4f98..abeff17 100644
--- a/pom.c
+++ b/pom.c
@@ -341,6 +341,13 @@ parser_append_(struct parser *parser, void *ptr, size_t elem_size, size_t *pcoun
#define parser_append_one(parser, field) \
parser_append(parser, field, 1)
+// append a character to parser->string_data
+static void
+parser_append_char(struct parser *parser, char c) {
+ char *pc = parser_append_one(parser, string_data);
+ if (pc) *pc = c;
+}
+
static void parser_error(struct parser *parser, PRINTF_FORMAT_STRING const char *fmt, ...) ATTRIBUTE_PRINTF(2, 3);
static void
parser_error(struct parser *parser, const char *fmt, ...) {
@@ -571,10 +578,123 @@ check_valid_key(struct parser *parser, const char *key) {
}
}
+static int
+parse_hex_digit(char c) {
+ if (c < '0') return -1;
+ if (c <= '9') return c - '0';
+ c &= 0xdf;
+ if (c < 'A') return -1;
+ if (c <= 'F') return c - 'A' + 10;
+ return -1;
+}
+
+// parse escape sequence in *p_str, advancing *p_str past it.
+static void
+parse_escape_sequence(struct parser *parser, const char **p_str) {
+ const char *str = *p_str;
+ switch (*str++) {
+ invalid_sequence: {
+ int len = (int)(str - *p_str);
+ parser_error(parser, "Invalid escape sequence: \\%.*s", len, *p_str);
+ return;
+ } break;
+ case 'n':
+ parser_append_char(parser, '\n');
+ break;
+ case 't':
+ parser_append_char(parser, '\t');
+ break;
+ case 'r':
+ parser_append_char(parser, '\r');
+ break;
+ case '\\':
+ parser_append_char(parser, '\\');
+ break;
+ case '"':
+ parser_append_char(parser, '"');
+ break;
+ case '\'':
+ parser_append_char(parser, '\'');
+ break;
+ case '`':
+ parser_append_char(parser, '`');
+ break;
+ case ',':
+ parser_append_char(parser, '\\');
+ parser_append_char(parser, ',');
+ break;
+ case 'x': {
+ int dig1 = parse_hex_digit(*str++);
+ if (dig1 < 0) goto invalid_sequence;
+ int dig2 = parse_hex_digit(*str++);
+ if (dig2 < 0) goto invalid_sequence;
+ int value = dig1 << 4 | dig2;
+ if (value == 0 || value > 0x7f) goto invalid_sequence;
+ parser_append_char(parser, value);
+ } break;
+ case 'u': {
+ if (*str++ != '{') goto invalid_sequence;
+ uint_fast32_t value = 0;
+ char c;
+ while ((c = *str++) != '}') {
+ int digit = parse_hex_digit(c);
+ if (digit < 0) goto invalid_sequence;
+ value <<= 4;
+ value |= digit;
+ if (value > 0x10ffff) goto invalid_sequence;
+ }
+ if (value >= 0xd800 && value <= 0xdfff)
+ goto invalid_sequence; // utf-16 surrogate
+ if (value < 0x80) {
+ // ASCII
+ parser_append_char(parser, value);
+ } else if (value < 0x800) {
+ // two-byte sequence
+ parser_append_char(parser, 0xc0 | value >> 6);
+ parser_append_char(parser, 0x80 | (value & 63));
+ } else if (value < 0x10000) {
+ // three-byte sequence
+ parser_append_char(parser, 0xe0 | value >> 12);
+ parser_append_char(parser, 0x80 | ((value >> 6) & 63));
+ parser_append_char(parser, 0x80 | (value & 63));
+ } else {
+ // four-byte sequence
+ parser_append_char(parser, 0xf0 | value >> 18);
+ parser_append_char(parser, 0x80 | ((value >> 12) & 63));
+ parser_append_char(parser, 0x80 | ((value >> 6) & 63));
+ parser_append_char(parser, 0x80 | (value & 63));
+ }
+ } break;
+ default:
+ goto invalid_sequence;
+ }
+ *p_str = str;
+}
+
static void
parse_quoted_value(struct parser *parser, const char *first_line) {
- // TODO
- abort();
+ const char *line = first_line;
+ char delimiter = *line++;
+ assert(delimiter == '"' || delimiter == '`');
+ while (!parser->eof && !parser->out_of_memory) {
+ char c;
+ while ((c = *line++)) {
+ if (c == delimiter)
+ goto finish;
+ if (c == '\\') {
+ parse_escape_sequence(parser, &line);
+ } else {
+ parser_append_char(parser, c);
+ }
+ }
+ parser_read_line(parser);
+ char *newline = parser_append_one(parser, string_data);
+ if (!newline) return;
+ *newline = '\n';
+ line = parser->line.array;
+ }
+finish:;
+ parser_append_char(parser, 0);
}
static void
@@ -647,10 +767,12 @@ parse_line(struct parser *parser) {
char *value = &line[value_start_idx];
strip_trailing_accepted_spaces(value);
size_t value_sz = strlen(value) + 1;
- memcpy(parser_append(parser, string_data, value_sz),
- value, value_sz);
+ char *value_out = parser_append(parser, string_data, value_sz);
+ if (!value_out) return;
+ memcpy(value_out, value, value_sz);
}
struct parser_item *item = parser_append_one(parser, items);
+ if (!item) return;
item->key = key_idx;
item->value = value_idx;
item->line = parser->line_number;
diff --git a/pre-commit.sh b/pre-commit.sh
index 6841365..85253dd 100755
--- a/pre-commit.sh
+++ b/pre-commit.sh
@@ -1,11 +1,4 @@
#!/bin/sh
-if sed --version | grep -q 'GNU sed'; then
- for file in pom.c pom.h; do
- # Remove trailing white space
- # (But only if file actually has trailing white space
- # we don't want to mess up last-modified-times otherwise)
- grep -q '\s\s*$' $file && sed -i 's/\s\s*$//' $file
- done
-fi
+
+# Ensure no doxygen errors
which doxygen >/dev/null 2>/dev/null && { doxygen || exit 1; }
-git add -u