diff options
author | pommicket <pommicket@gmail.com> | 2025-09-07 21:39:02 -0400 |
---|---|---|
committer | pommicket <pommicket@gmail.com> | 2025-09-07 21:39:39 -0400 |
commit | 2ca33a61f2d9b65e1473fb909924fac80ea21854 (patch) | |
tree | eb0ff539dee8327231527110e490bac4f83f1708 /src/lib.rs | |
parent | 8253cbea32f431aa9c5cd077690b2166704c989d (diff) |
More work on parser
Diffstat (limited to 'src/lib.rs')
-rw-r--r-- | src/lib.rs | 258 |
1 files changed, 246 insertions, 12 deletions
@@ -4,6 +4,7 @@ extern crate alloc; #[cfg(not(feature = "std"))] use alloc::collections::BTreeMap as Map; use alloc::sync::Arc; +use core::fmt; #[cfg(feature = "std")] use std::collections::HashMap as Map; @@ -14,8 +15,8 @@ pub struct Location { line: u64, } -impl core::fmt::Display for Location { - fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { +impl fmt::Display for Location { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "{}:{}", self.file, self.line) } } @@ -35,19 +36,27 @@ pub struct Configuration { } #[non_exhaustive] +#[derive(Debug)] pub enum Error { #[cfg(feature = "std")] - IO(std::io::Error), + IO(Box<str>, std::io::Error), + IllegalCharacter(Location, char), + InvalidUtf8(Location), BadInt(Location, Box<str>), BadUInt(Location, Box<str>), BadFloat(Location, Box<str>), BadBool(Location, Box<str>), + UnmatchedLeftBrace(Location), + InvalidKey(Location, Box<str>), + InvalidValue(Location), + InvalidLine(Location), + StrayCharsAfterQuotedString(Location), + UnterminatedString(Location, char), } -#[cfg(feature = "std")] -impl From<std::io::Error> for Error { - fn from(value: std::io::Error) -> Self { - Self::IO(value) +impl fmt::Display for Error { + fn fmt(&self, _f: &mut fmt::Formatter<'_>) -> fmt::Result { + todo!() } } @@ -129,13 +138,238 @@ fn parse_list(_location: &Location, _string: &str) -> Vec<String> { todo!() } +/// Trait for reading configurations. +/// +/// Ordinarily you won't need to implement this trait, since it is +/// already implemented by any `T: `[`std::io::BufRead`] (or else `&str` and `&[u8]`, +/// if the `std` feature is not enabled). +pub trait Read { + /// Read up to the next line feed (or EOF), not including the line feed itself. + /// + /// Puts the line in `line` and returns `Ok(true)`, if the end of file has been reached, + /// `line` is unmodified and `Ok(false)` is returned. + /// + /// You don't need to check for valid UTF-8 here — that is already done in the code which uses + /// this trait. + fn read_until_lf(&mut self, line: &mut Vec<u8>) -> Result<bool>; +} + +#[cfg(feature = "std")] +impl<R: std::io::BufRead> Read for R { + fn read_until_lf(&mut self, line: &mut Vec<u8>) -> Result<bool> { + self.read_until(b'\n', line) + .map_err(|e| Error::IO("read error".into(), e))?; + if line.ends_with(b"\n") { + line.pop(); + Ok(true) + } else { + Ok(!line.is_empty()) + } + } +} + +#[cfg(not(feature = "std"))] +impl Read for &str { + fn read_until_lf(&mut self, line: &mut Vec<u8>) -> Result<bool> { + match self.split_once('\n') { + Some((pre, post)) => { + *self = post; + line.extend_from_slice(pre.as_bytes()); + Ok(true) + } + None => { + if self.is_empty() { + return Ok(false); + } + line.extend_from_slice(self.as_bytes()); + *self = ""; + Ok(true) + } + } + } +} + +#[cfg(not(feature = "std"))] +impl Read for &[u8] { + fn read_until_lf(&mut self, line: &mut Vec<u8>) -> Result<bool> { + match self.iter().position(|&c| c == b'\n') { + Some(i) => { + line.extend_from_slice(&self[..i]); + *self = &self[i + 1..]; + Ok(true) + } + None => { + if self.is_empty() { + return Ok(false); + } + line.extend_from_slice(self); + *self = b""; + Ok(true) + } + } + } +} + +fn check_valid_key(location: &Location, s: &str) -> Result<()> { + if s.bytes().all(|c| { + c >= 0x80 || c.is_ascii_alphanumeric() || matches!(c, b'.' | b'_' | b'/' | b'*' | b'-') + }) { + Ok(()) + } else { + Err(Error::InvalidKey(location.clone(), s.into())) + } +} + +/// Returns (unquoted value, new line number) +fn read_quoted_value( + quoted: &str, + reader: &mut dyn Read, + start_location: &Location, +) -> Result<(String, u64)> { + let delimiter: char = quoted.chars().next().unwrap(); + let mut unquoted = String::new(); + let mut line_number = start_location.line; + let location = |line_number: u64| Location { + file: start_location.file.clone(), + line: line_number, + }; + let mut line_buf = vec![]; + let mut first = true; + loop { + let line = if first { + first = false; + quoted + } else { + line_buf.truncate(0); + if !reader.read_until_lf(&mut line_buf)? { + break; + } + line_number += 1; + line_buf.pop_if(|c| *c == b'\r'); + str::from_utf8(&line_buf).map_err(|_| Error::InvalidUtf8(location(line_number)))? + }; + let mut chars = line.chars(); + while let Some(c) = chars.next() { + if c == delimiter { + if !chars.all(|c| c == ' ' || c == '\t') { + return Err(Error::StrayCharsAfterQuotedString(location(line_number))); + } + return Ok((unquoted, line_number)); + } else if c == '\\' { + todo!() // parse escape sequence + } else if c == '\0' { + return Err(Error::InvalidValue(location(line_number))); + } else { + unquoted.push(c); + } + } + } + Err(Error::UnterminatedString(start_location.clone(), delimiter)) +} + impl Configuration { - #[cfg(feature = "std")] - pub fn load<R: std::io::BufRead>(_reader: R) -> Result<Configuration> { - todo!() + fn load_dyn(filename: &str, reader: &mut dyn Read) -> Result<Self> { + let mut config = Configuration::default(); + let mut line = vec![]; + let mut line_number = 0; + let mut current_section = &mut config; + let filename: Arc<str> = filename.into(); + loop { + line.truncate(0); + if !reader.read_until_lf(&mut line)? { + break; + } + line_number += 1; + let location = Location { + file: filename.clone(), + line: line_number, + }; + line.pop_if(|c| *c == b'\r'); + for c in &line { + if (0..0x1f).contains(c) && *c != b'\t' { + return Err(Error::IllegalCharacter(location, char::from(*c))); + } + } + let mut line = + str::from_utf8(&line).map_err(|_| Error::InvalidUtf8(location.clone()))?; + line = line.trim_start_matches(['\t', ' ']); + if line.is_empty() || line.starts_with('#') { + // comment/blank line + continue; + } + if line.starts_with('[') { + line = line.trim_end_matches(['\t', ' ']); + if !line.ends_with(']') { + return Err(Error::UnmatchedLeftBrace(location)); + } + let new_section = line[1..line.len() - 1].into(); + current_section = &mut config; + check_valid_key(&location, new_section)?; + if !new_section.is_empty() { + for component in new_section.split('.') { + current_section = Arc::get_mut(&mut current_section.children) + .unwrap() + .entry(component.into()) + .or_default(); + } + } + } else { + let (mut relative_key, mut value) = line + .split_once('=') + .ok_or_else(|| Error::InvalidLine(location.clone()))?; + check_valid_key(&location, relative_key)?; + relative_key = relative_key.trim_end_matches(['\t', ' ']); + value = value.trim_start_matches(['\t', ' ']); + + fn insert( + mut section: &mut Configuration, + location: Location, + mut key: &str, + value: &str, + ) { + if let Some(last_dot) = key.rfind('.') { + for component in key[..last_dot].split('.') { + section = Arc::get_mut(&mut section.children) + .unwrap() + .entry(component.into()) + .or_default() + } + key = &key[last_dot + 1..]; + } + Arc::get_mut(&mut section.values).unwrap().insert( + key.into(), + Value { + value: value.into(), + defined_at: location, + }, + ); + } + if value.starts_with(['`', '"', '\'']) { + let (value, new_line_number) = read_quoted_value(value, reader, &location)?; + insert(current_section, location, relative_key, &value); + line_number = new_line_number; + } else { + value = value.trim_end_matches(['\t', ' ']); + if value.contains('\0') { + return Err(Error::InvalidValue(location)); + } + insert(current_section, location, relative_key, value); + } + } + } + Ok(config) } - pub fn load_str(_s: &str) -> Result<Configuration> { - todo!() + pub fn load<R: Read>(filename: &str, mut reader: R) -> Result<Self> { + // avoid big code size by using dyn reference. + // the impact on performance is not really important. + Configuration::load_dyn(filename, &mut reader) + } + #[cfg(feature = "std")] + pub fn load_path<P: AsRef<std::path::Path>>(path: P) -> Result<Self> { + let p = path.as_ref(); + let filename = p.to_string_lossy(); + let file = std::fs::File::open(p).map_err(|e| Error::IO(filename.clone().into(), e))?; + Configuration::load(&filename, std::io::BufReader::new(file)) } pub fn section(&self, key: &str) -> Configuration { let mut node = self; |