summaryrefslogtreecommitdiff
path: root/src/lib.rs
diff options
context:
space:
mode:
authorpommicket <pommicket@gmail.com>2025-09-07 22:14:28 -0400
committerpommicket <pommicket@gmail.com>2025-09-07 22:14:28 -0400
commit575bb3913b01ec83490d61f0540ccdcb4d861845 (patch)
tree60b811ae1bab1bff5203211f9b0020d64f28564d /src/lib.rs
parent2ca33a61f2d9b65e1473fb909924fac80ea21854 (diff)
Write code for reading quoted values (needs testing)
Diffstat (limited to 'src/lib.rs')
-rw-r--r--src/lib.rs215
1 files changed, 159 insertions, 56 deletions
diff --git a/src/lib.rs b/src/lib.rs
index 1b73bed..9e08e06 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -5,6 +5,7 @@ extern crate alloc;
use alloc::collections::BTreeMap as Map;
use alloc::sync::Arc;
use core::fmt;
+use core::mem::take;
#[cfg(feature = "std")]
use std::collections::HashMap as Map;
@@ -52,6 +53,12 @@ pub enum Error {
InvalidLine(Location),
StrayCharsAfterQuotedString(Location),
UnterminatedString(Location, char),
+ InvalidEscapeSequence(Location, Box<str>),
+ /// Used when there is more than one error in a file.
+ ///
+ /// None of the errors in the array will be [`Error::Multiple`]'s,
+ /// and the array will contain at least two elements.
+ Multiple(Box<[Error]>),
}
impl fmt::Display for Error {
@@ -210,65 +217,153 @@ impl Read for &[u8] {
}
}
-fn check_valid_key(location: &Location, s: &str) -> Result<()> {
- if s.bytes().all(|c| {
- c >= 0x80 || c.is_ascii_alphanumeric() || matches!(c, b'.' | b'_' | b'/' | b'*' | b'-')
- }) {
- Ok(())
- } else {
- Err(Error::InvalidKey(location.clone(), s.into()))
- }
+#[derive(Default)]
+struct Parser {
+ nonfatal_errors: Vec<Error>,
}
-/// Returns (unquoted value, new line number)
-fn read_quoted_value(
- quoted: &str,
- reader: &mut dyn Read,
- start_location: &Location,
-) -> Result<(String, u64)> {
- let delimiter: char = quoted.chars().next().unwrap();
- let mut unquoted = String::new();
- let mut line_number = start_location.line;
- let location = |line_number: u64| Location {
- file: start_location.file.clone(),
- line: line_number,
- };
- let mut line_buf = vec![];
- let mut first = true;
- loop {
- let line = if first {
- first = false;
- quoted
- } else {
- line_buf.truncate(0);
- if !reader.read_until_lf(&mut line_buf)? {
- break;
- }
- line_number += 1;
- line_buf.pop_if(|c| *c == b'\r');
- str::from_utf8(&line_buf).map_err(|_| Error::InvalidUtf8(location(line_number)))?
+impl Parser {
+ fn check_valid_key(&mut self, location: &Location, s: &str) {
+ if !s.bytes().all(|c| {
+ c >= 0x80 || c.is_ascii_alphanumeric() || matches!(c, b'.' | b'_' | b'/' | b'*' | b'-')
+ }) {
+ self.nonfatal_errors
+ .push(Error::InvalidKey(location.clone(), s.into()))
+ }
+ }
+
+ /// Returns (unquoted value, new line number)
+ fn read_quoted_value(
+ &mut self,
+ quoted: &str,
+ reader: &mut dyn Read,
+ start_location: &Location,
+ ) -> Result<(String, u64)> {
+ let delimiter: char = quoted.chars().next().unwrap();
+ let mut unquoted = String::new();
+ let mut line_number = start_location.line;
+ let location = |line_number: u64| Location {
+ file: start_location.file.clone(),
+ line: line_number,
};
- let mut chars = line.chars();
- while let Some(c) = chars.next() {
- if c == delimiter {
- if !chars.all(|c| c == ' ' || c == '\t') {
- return Err(Error::StrayCharsAfterQuotedString(location(line_number)));
- }
- return Ok((unquoted, line_number));
- } else if c == '\\' {
- todo!() // parse escape sequence
- } else if c == '\0' {
- return Err(Error::InvalidValue(location(line_number)));
+ let mut line_buf = vec![];
+ let mut first = true;
+ loop {
+ let line = if first {
+ first = false;
+ quoted
} else {
- unquoted.push(c);
+ line_buf.truncate(0);
+ if !reader.read_until_lf(&mut line_buf)? {
+ break;
+ }
+ line_number += 1;
+ line_buf.pop_if(|c| *c == b'\r');
+ str::from_utf8(&line_buf).map_err(|_| Error::InvalidUtf8(location(line_number)))?
+ };
+ let mut chars = line.chars();
+ while let Some(c) = chars.next() {
+ let invalid_escape =
+ move |s: String| Error::InvalidEscapeSequence(location(line_number), s.into());
+ if c == delimiter {
+ if !chars.all(|c| c == ' ' || c == '\t') {
+ self.nonfatal_errors
+ .push(Error::StrayCharsAfterQuotedString(location(line_number)));
+ }
+ return Ok((unquoted, line_number));
+ } else if c == '\\' {
+ let Some(c) = chars.next() else {
+ self.nonfatal_errors
+ .push(invalid_escape("\\(newline)".into()));
+ break;
+ };
+ fn parse_hex_digit(c: char) -> Option<u32> {
+ Some(match c {
+ '0'..='9' => (c as u32) - ('0' as u32),
+ 'a'..='f' => (c as u32) - ('a' as u32) + 10,
+ 'A'..='F' => (c as u32) - ('A' as u32) + 10,
+ _ => return None,
+ })
+ }
+ match c {
+ 'n' => unquoted.push('\n'),
+ 'r' => unquoted.push('\r'),
+ 't' => unquoted.push('\t'),
+ '\\' | '\'' | '"' | '`' => unquoted.push(c),
+ ',' => unquoted.push_str("\\,"),
+ 'x' => {
+ let Some(c1) = chars.next() else {
+ self.nonfatal_errors.push(invalid_escape("\\x".into()));
+ break;
+ };
+ let Some(c2) = chars.next() else {
+ self.nonfatal_errors
+ .push(invalid_escape(format!("\\x{c1}")));
+ break;
+ };
+ let (Some(nibble1), Some(nibble2)) =
+ (parse_hex_digit(c1), parse_hex_digit(c2))
+ else {
+ self.nonfatal_errors
+ .push(invalid_escape(format!("\\x{c1}{c2}")));
+ continue;
+ };
+ if nibble1 == 0 && nibble2 == 0 {
+ self.nonfatal_errors
+ .push(Error::InvalidValue(location(line_number)));
+ }
+ unquoted.push(char::try_from(nibble1 << 8 | nibble2).unwrap());
+ }
+ 'u' => {
+ let mut c = chars.next();
+ if c != Some('{') {
+ self.nonfatal_errors.push(invalid_escape("\\u".into()));
+ continue;
+ }
+ let mut code = 0u32;
+ for i in 0..7 {
+ c = chars.next();
+ if i == 6 {
+ break;
+ }
+ let Some(c) = c else {
+ break;
+ };
+ if c == '}' {
+ break;
+ }
+ code <<= 4;
+ code |= parse_hex_digit(c)
+ .ok_or_else(|| invalid_escape(format!("\\u{{{code:x}{c}")))?;
+ }
+ if c != Some('}') {
+ self.nonfatal_errors
+ .push(invalid_escape("\\u{ has no matching }".into()));
+ continue;
+ }
+ let Ok(c) = char::try_from(code) else {
+ self.nonfatal_errors
+ .push(invalid_escape(format!("\\u{{{code:x}}}")));
+ continue;
+ };
+ unquoted.push(c);
+ }
+ _ => {
+ self.nonfatal_errors.push(invalid_escape(format!("\\{c}")));
+ }
+ }
+ } else if c == '\0' {
+ self.nonfatal_errors
+ .push(Error::InvalidValue(location(line_number)));
+ } else {
+ unquoted.push(c);
+ }
}
}
+ Err(Error::UnterminatedString(start_location.clone(), delimiter))
}
- Err(Error::UnterminatedString(start_location.clone(), delimiter))
-}
-impl Configuration {
- fn load_dyn(filename: &str, reader: &mut dyn Read) -> Result<Self> {
+ fn load(&mut self, filename: &str, reader: &mut dyn Read) -> Result<Configuration> {
let mut config = Configuration::default();
let mut line = vec![];
let mut line_number = 0;
@@ -304,7 +399,7 @@ impl Configuration {
}
let new_section = line[1..line.len() - 1].into();
current_section = &mut config;
- check_valid_key(&location, new_section)?;
+ self.check_valid_key(&location, new_section);
if !new_section.is_empty() {
for component in new_section.split('.') {
current_section = Arc::get_mut(&mut current_section.children)
@@ -317,7 +412,7 @@ impl Configuration {
let (mut relative_key, mut value) = line
.split_once('=')
.ok_or_else(|| Error::InvalidLine(location.clone()))?;
- check_valid_key(&location, relative_key)?;
+ self.check_valid_key(&location, relative_key);
relative_key = relative_key.trim_end_matches(['\t', ' ']);
value = value.trim_start_matches(['\t', ' ']);
@@ -345,7 +440,8 @@ impl Configuration {
);
}
if value.starts_with(['`', '"', '\'']) {
- let (value, new_line_number) = read_quoted_value(value, reader, &location)?;
+ let (value, new_line_number) =
+ self.read_quoted_value(value, reader, &location)?;
insert(current_section, location, relative_key, &value);
line_number = new_line_number;
} else {
@@ -357,12 +453,19 @@ impl Configuration {
}
}
}
- Ok(config)
+ match self.nonfatal_errors.len() {
+ 0 => Ok(config),
+ 1 => Err(self.nonfatal_errors.pop().unwrap()),
+ 2.. => Err(Error::Multiple(take(&mut self.nonfatal_errors).into())),
+ }
}
+}
+
+impl Configuration {
pub fn load<R: Read>(filename: &str, mut reader: R) -> Result<Self> {
// avoid big code size by using dyn reference.
// the impact on performance is not really important.
- Configuration::load_dyn(filename, &mut reader)
+ Parser::default().load(filename, &mut reader)
}
#[cfg(feature = "std")]
pub fn load_path<P: AsRef<std::path::Path>>(path: P) -> Result<Self> {