summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorpommicket <pommicket@gmail.com>2025-09-07 21:39:02 -0400
committerpommicket <pommicket@gmail.com>2025-09-07 21:39:39 -0400
commit2ca33a61f2d9b65e1473fb909924fac80ea21854 (patch)
treeeb0ff539dee8327231527110e490bac4f83f1708
parent8253cbea32f431aa9c5cd077690b2166704c989d (diff)
More work on parser
-rw-r--r--Cargo.lock2
-rw-r--r--Cargo.toml2
-rwxr-xr-xpre-commit.sh4
-rw-r--r--src/lib.rs258
4 files changed, 252 insertions, 14 deletions
diff --git a/Cargo.lock b/Cargo.lock
index 61170a9..5e5481f 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -3,5 +3,5 @@
version = 4
[[package]]
-name = "pom-rs"
+name = "pom-parser"
version = "0.1.0"
diff --git a/Cargo.toml b/Cargo.toml
index fbe4df6..f9b48bb 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,5 +1,5 @@
[package]
-name = "pom-rs"
+name = "pom-parser"
version = "0.1.0"
edition = "2024"
diff --git a/pre-commit.sh b/pre-commit.sh
new file mode 100755
index 0000000..c1abd05
--- /dev/null
+++ b/pre-commit.sh
@@ -0,0 +1,4 @@
+#!/bin/sh
+cargo fmt || exit 1
+cargo clippy || exit 1
+git add -u || exit 1
diff --git a/src/lib.rs b/src/lib.rs
index 465f927..1b73bed 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -4,6 +4,7 @@ extern crate alloc;
#[cfg(not(feature = "std"))]
use alloc::collections::BTreeMap as Map;
use alloc::sync::Arc;
+use core::fmt;
#[cfg(feature = "std")]
use std::collections::HashMap as Map;
@@ -14,8 +15,8 @@ pub struct Location {
line: u64,
}
-impl core::fmt::Display for Location {
- fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+impl fmt::Display for Location {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}:{}", self.file, self.line)
}
}
@@ -35,19 +36,27 @@ pub struct Configuration {
}
#[non_exhaustive]
+#[derive(Debug)]
pub enum Error {
#[cfg(feature = "std")]
- IO(std::io::Error),
+ IO(Box<str>, std::io::Error),
+ IllegalCharacter(Location, char),
+ InvalidUtf8(Location),
BadInt(Location, Box<str>),
BadUInt(Location, Box<str>),
BadFloat(Location, Box<str>),
BadBool(Location, Box<str>),
+ UnmatchedLeftBrace(Location),
+ InvalidKey(Location, Box<str>),
+ InvalidValue(Location),
+ InvalidLine(Location),
+ StrayCharsAfterQuotedString(Location),
+ UnterminatedString(Location, char),
}
-#[cfg(feature = "std")]
-impl From<std::io::Error> for Error {
- fn from(value: std::io::Error) -> Self {
- Self::IO(value)
+impl fmt::Display for Error {
+ fn fmt(&self, _f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ todo!()
}
}
@@ -129,13 +138,238 @@ fn parse_list(_location: &Location, _string: &str) -> Vec<String> {
todo!()
}
+/// Trait for reading configurations.
+///
+/// Ordinarily you won't need to implement this trait, since it is
+/// already implemented by any `T: `[`std::io::BufRead`] (or else `&str` and `&[u8]`,
+/// if the `std` feature is not enabled).
+pub trait Read {
+ /// Read up to the next line feed (or EOF), not including the line feed itself.
+ ///
+ /// Puts the line in `line` and returns `Ok(true)`, if the end of file has been reached,
+ /// `line` is unmodified and `Ok(false)` is returned.
+ ///
+ /// You don't need to check for valid UTF-8 here — that is already done in the code which uses
+ /// this trait.
+ fn read_until_lf(&mut self, line: &mut Vec<u8>) -> Result<bool>;
+}
+
+#[cfg(feature = "std")]
+impl<R: std::io::BufRead> Read for R {
+ fn read_until_lf(&mut self, line: &mut Vec<u8>) -> Result<bool> {
+ self.read_until(b'\n', line)
+ .map_err(|e| Error::IO("read error".into(), e))?;
+ if line.ends_with(b"\n") {
+ line.pop();
+ Ok(true)
+ } else {
+ Ok(!line.is_empty())
+ }
+ }
+}
+
+#[cfg(not(feature = "std"))]
+impl Read for &str {
+ fn read_until_lf(&mut self, line: &mut Vec<u8>) -> Result<bool> {
+ match self.split_once('\n') {
+ Some((pre, post)) => {
+ *self = post;
+ line.extend_from_slice(pre.as_bytes());
+ Ok(true)
+ }
+ None => {
+ if self.is_empty() {
+ return Ok(false);
+ }
+ line.extend_from_slice(self.as_bytes());
+ *self = "";
+ Ok(true)
+ }
+ }
+ }
+}
+
+#[cfg(not(feature = "std"))]
+impl Read for &[u8] {
+ fn read_until_lf(&mut self, line: &mut Vec<u8>) -> Result<bool> {
+ match self.iter().position(|&c| c == b'\n') {
+ Some(i) => {
+ line.extend_from_slice(&self[..i]);
+ *self = &self[i + 1..];
+ Ok(true)
+ }
+ None => {
+ if self.is_empty() {
+ return Ok(false);
+ }
+ line.extend_from_slice(self);
+ *self = b"";
+ Ok(true)
+ }
+ }
+ }
+}
+
+fn check_valid_key(location: &Location, s: &str) -> Result<()> {
+ if s.bytes().all(|c| {
+ c >= 0x80 || c.is_ascii_alphanumeric() || matches!(c, b'.' | b'_' | b'/' | b'*' | b'-')
+ }) {
+ Ok(())
+ } else {
+ Err(Error::InvalidKey(location.clone(), s.into()))
+ }
+}
+
+/// Returns (unquoted value, new line number)
+fn read_quoted_value(
+ quoted: &str,
+ reader: &mut dyn Read,
+ start_location: &Location,
+) -> Result<(String, u64)> {
+ let delimiter: char = quoted.chars().next().unwrap();
+ let mut unquoted = String::new();
+ let mut line_number = start_location.line;
+ let location = |line_number: u64| Location {
+ file: start_location.file.clone(),
+ line: line_number,
+ };
+ let mut line_buf = vec![];
+ let mut first = true;
+ loop {
+ let line = if first {
+ first = false;
+ quoted
+ } else {
+ line_buf.truncate(0);
+ if !reader.read_until_lf(&mut line_buf)? {
+ break;
+ }
+ line_number += 1;
+ line_buf.pop_if(|c| *c == b'\r');
+ str::from_utf8(&line_buf).map_err(|_| Error::InvalidUtf8(location(line_number)))?
+ };
+ let mut chars = line.chars();
+ while let Some(c) = chars.next() {
+ if c == delimiter {
+ if !chars.all(|c| c == ' ' || c == '\t') {
+ return Err(Error::StrayCharsAfterQuotedString(location(line_number)));
+ }
+ return Ok((unquoted, line_number));
+ } else if c == '\\' {
+ todo!() // parse escape sequence
+ } else if c == '\0' {
+ return Err(Error::InvalidValue(location(line_number)));
+ } else {
+ unquoted.push(c);
+ }
+ }
+ }
+ Err(Error::UnterminatedString(start_location.clone(), delimiter))
+}
+
impl Configuration {
- #[cfg(feature = "std")]
- pub fn load<R: std::io::BufRead>(_reader: R) -> Result<Configuration> {
- todo!()
+ fn load_dyn(filename: &str, reader: &mut dyn Read) -> Result<Self> {
+ let mut config = Configuration::default();
+ let mut line = vec![];
+ let mut line_number = 0;
+ let mut current_section = &mut config;
+ let filename: Arc<str> = filename.into();
+ loop {
+ line.truncate(0);
+ if !reader.read_until_lf(&mut line)? {
+ break;
+ }
+ line_number += 1;
+ let location = Location {
+ file: filename.clone(),
+ line: line_number,
+ };
+ line.pop_if(|c| *c == b'\r');
+ for c in &line {
+ if (0..0x1f).contains(c) && *c != b'\t' {
+ return Err(Error::IllegalCharacter(location, char::from(*c)));
+ }
+ }
+ let mut line =
+ str::from_utf8(&line).map_err(|_| Error::InvalidUtf8(location.clone()))?;
+ line = line.trim_start_matches(['\t', ' ']);
+ if line.is_empty() || line.starts_with('#') {
+ // comment/blank line
+ continue;
+ }
+ if line.starts_with('[') {
+ line = line.trim_end_matches(['\t', ' ']);
+ if !line.ends_with(']') {
+ return Err(Error::UnmatchedLeftBrace(location));
+ }
+ let new_section = line[1..line.len() - 1].into();
+ current_section = &mut config;
+ check_valid_key(&location, new_section)?;
+ if !new_section.is_empty() {
+ for component in new_section.split('.') {
+ current_section = Arc::get_mut(&mut current_section.children)
+ .unwrap()
+ .entry(component.into())
+ .or_default();
+ }
+ }
+ } else {
+ let (mut relative_key, mut value) = line
+ .split_once('=')
+ .ok_or_else(|| Error::InvalidLine(location.clone()))?;
+ check_valid_key(&location, relative_key)?;
+ relative_key = relative_key.trim_end_matches(['\t', ' ']);
+ value = value.trim_start_matches(['\t', ' ']);
+
+ fn insert(
+ mut section: &mut Configuration,
+ location: Location,
+ mut key: &str,
+ value: &str,
+ ) {
+ if let Some(last_dot) = key.rfind('.') {
+ for component in key[..last_dot].split('.') {
+ section = Arc::get_mut(&mut section.children)
+ .unwrap()
+ .entry(component.into())
+ .or_default()
+ }
+ key = &key[last_dot + 1..];
+ }
+ Arc::get_mut(&mut section.values).unwrap().insert(
+ key.into(),
+ Value {
+ value: value.into(),
+ defined_at: location,
+ },
+ );
+ }
+ if value.starts_with(['`', '"', '\'']) {
+ let (value, new_line_number) = read_quoted_value(value, reader, &location)?;
+ insert(current_section, location, relative_key, &value);
+ line_number = new_line_number;
+ } else {
+ value = value.trim_end_matches(['\t', ' ']);
+ if value.contains('\0') {
+ return Err(Error::InvalidValue(location));
+ }
+ insert(current_section, location, relative_key, value);
+ }
+ }
+ }
+ Ok(config)
}
- pub fn load_str(_s: &str) -> Result<Configuration> {
- todo!()
+ pub fn load<R: Read>(filename: &str, mut reader: R) -> Result<Self> {
+ // avoid big code size by using dyn reference.
+ // the impact on performance is not really important.
+ Configuration::load_dyn(filename, &mut reader)
+ }
+ #[cfg(feature = "std")]
+ pub fn load_path<P: AsRef<std::path::Path>>(path: P) -> Result<Self> {
+ let p = path.as_ref();
+ let filename = p.to_string_lossy();
+ let file = std::fs::File::open(p).map_err(|e| Error::IO(filename.clone().into(), e))?;
+ Configuration::load(&filename, std::io::BufReader::new(file))
}
pub fn section(&self, key: &str) -> Configuration {
let mut node = self;