diff options
-rwxr-xr-x | pre-commit.sh | 2 | ||||
-rw-r--r-- | src/lib.rs | 187 |
2 files changed, 158 insertions, 31 deletions
diff --git a/pre-commit.sh b/pre-commit.sh index c1abd05..bcffdca 100755 --- a/pre-commit.sh +++ b/pre-commit.sh @@ -1,4 +1,4 @@ #!/bin/sh cargo fmt || exit 1 -cargo clippy || exit 1 +cargo clippy -- -D warnings || exit 1 git add -u || exit 1 @@ -1,4 +1,8 @@ +//! Parser for the [POM file format](https://www.pom.computer/index.html). #![cfg_attr(not(feature = "std"), no_std)] +#![deny(missing_docs)] +#![warn(clippy::semicolon_if_nothing_returned)] +#![warn(clippy::redundant_closure_for_method_calls)] extern crate alloc; #[cfg(not(feature = "std"))] @@ -16,6 +20,20 @@ pub struct Location { line: u64, } +impl Location { + /// File name + #[must_use] + pub fn file(&self) -> &str { + &self.file + } + + /// Line number + #[must_use] + pub fn line(&self) -> u64 { + self.line + } +} + impl fmt::Display for Location { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "{}:{}", self.file, self.line) @@ -29,6 +47,7 @@ struct Value { defined_at: Location, } +/// A parsed POM configuration. #[derive(Clone, Debug, Default)] pub struct Configuration { // wrap in an Arc for cheap cloning @@ -38,7 +57,6 @@ pub struct Configuration { impl fmt::Display for Configuration { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let mut lines = vec![]; fn add_lines(lines: &mut Vec<String>, prefix: &str, conf: &Configuration) { for (key, val) in conf.values.iter() { lines.push(format!("{prefix}{key} = {:?}", val.value)); @@ -47,6 +65,7 @@ impl fmt::Display for Configuration { add_lines(lines, &format!("{prefix}{key}."), child); } } + let mut lines = vec![]; add_lines(&mut lines, "", self); lines.sort(); for line in lines { @@ -56,23 +75,50 @@ impl fmt::Display for Configuration { } } +/// A parsing or schema error. #[non_exhaustive] #[derive(Debug)] pub enum Error { + /// I/O error + /// + /// The first field is a description of what led to the error. #[cfg(feature = "std")] IO(Box<str>, std::io::Error), + /// Illegal character in POM file + /// + /// Specifically, an ASCII control character other than LF, CR immediately followed by LF, and tab. IllegalCharacter(Location, char), + /// Invalid UTF-8 in POM file InvalidUtf8(Location), + /// Couldn't parse signed integer. BadInt(Location, Box<str>), + /// Couldn't parse unsigned integer. BadUInt(Location, Box<str>), + /// Couldn't parse floating-point number. BadFloat(Location, Box<str>), + /// Couldn't parse boolean. BadBool(Location, Box<str>), + /// Opening \[ without matching \]. UnmatchedLeftBrace(Location), + /// Key contains invalid characters. + /// + /// The valid characters are anything outside of ASCII, + /// as well as `a`–`z`, `A`–`Z`, `0`-`9`, and each of `/.-*_`. InvalidKey(Location, Box<str>), + /// Value contains a null character. + /// + /// These are not allowed for interoperability with languages + /// with null-terminated strings (C). InvalidValue(Location), + /// Line is not a `[section-header]` or `key = value`. InvalidLine(Location), - StrayCharsAfterQuotedString(Location), + /// Characters appear after a quoted value. + /// + /// e.g. `key = "value" foo` + StrayCharsAfterString(Location), + /// String opened but never closed with a matching character. UnterminatedString(Location, char), + /// Invalid escape sequence appears in a quoted value. InvalidEscapeSequence(Location, Box<str>), /// Used when there is more than one error in a file. /// @@ -111,7 +157,7 @@ impl fmt::Display for Error { f, "{location}: line should either start with [ or contain =" ), - Self::StrayCharsAfterQuotedString(location) => { + Self::StrayCharsAfterString(location) => { write!(f, "{location}: stray characters after string value") } Self::UnterminatedString(location, delimiter) => { @@ -146,6 +192,7 @@ impl core::error::Error for Error { } } +/// Type alias for [`std::result::Result`] with [`Error`] as the error. pub type Result<T> = std::result::Result<T, Error>; fn parse_int(location: &Location, string: &str) -> Result<i64> { @@ -227,7 +274,7 @@ fn parse_list(_location: &Location, _string: &str) -> Vec<String> { /// Trait for reading configurations. /// /// Ordinarily you won't need to implement this trait, since it is -/// already implemented by any `T: `[`std::io::BufRead`] (or else `&str` and `&[u8]`, +/// already implemented by any `T` implementing [`std::io::BufRead`] (or else `&str` and `&[u8]`, /// if the `std` feature is not enabled). pub trait Read { /// Read up to the next line feed (or EOF), not including the line feed itself. @@ -296,6 +343,15 @@ impl Read for &[u8] { } } +fn parse_hex_digit(c: char) -> Option<u32> { + Some(match c { + '0'..='9' => (c as u32) - ('0' as u32), + 'a'..='f' => (c as u32) - ('a' as u32) + 10, + 'A'..='F' => (c as u32) - ('A' as u32) + 10, + _ => return None, + }) +} + #[derive(Default)] struct Parser { nonfatal_errors: Vec<Error>, @@ -307,7 +363,7 @@ impl Parser { c >= 0x80 || c.is_ascii_alphanumeric() || matches!(c, b'.' | b'_' | b'/' | b'*' | b'-') }) { self.nonfatal_errors - .push(Error::InvalidKey(location.clone(), s.into())) + .push(Error::InvalidKey(location.clone(), s.into())); } } @@ -347,7 +403,7 @@ impl Parser { if c == delimiter { if !chars.all(|c| c == ' ' || c == '\t') { self.nonfatal_errors - .push(Error::StrayCharsAfterQuotedString(location(line_number))); + .push(Error::StrayCharsAfterString(location(line_number))); } return Ok((unquoted, line_number)); } else if c == '\\' { @@ -356,14 +412,6 @@ impl Parser { .push(invalid_escape("\\(newline)".into())); break; }; - fn parse_hex_digit(c: char) -> Option<u32> { - Some(match c { - '0'..='9' => (c as u32) - ('0' as u32), - 'a'..='f' => (c as u32) - ('a' as u32) + 10, - 'A'..='F' => (c as u32) - ('A' as u32) + 10, - _ => return None, - }) - } match c { 'n' => unquoted.push('\n'), 'r' => unquoted.push('\r'), @@ -489,13 +537,6 @@ impl Parser { } } } else { - let (mut relative_key, mut value) = line - .split_once('=') - .ok_or_else(|| Error::InvalidLine(location.clone()))?; - relative_key = relative_key.trim_end_matches(['\t', ' ']); - self.check_valid_key(&location, relative_key); - value = value.trim_start_matches(['\t', ' ']); - fn insert( mut section: &mut Configuration, location: Location, @@ -507,7 +548,7 @@ impl Parser { section = Arc::get_mut(&mut section.children) .unwrap() .entry(component.into()) - .or_default() + .or_default(); } key = &key[last_dot + 1..]; } @@ -519,6 +560,13 @@ impl Parser { }, ); } + + let (mut relative_key, mut value) = line + .split_once('=') + .ok_or_else(|| Error::InvalidLine(location.clone()))?; + relative_key = relative_key.trim_end_matches(['\t', ' ']); + self.check_valid_key(&location, relative_key); + value = value.trim_start_matches(['\t', ' ']); if value.starts_with(['`', '"', '\'']) { let (value, new_line_number) = self.read_quoted_value(value, reader, &location)?; @@ -542,11 +590,16 @@ impl Parser { } impl Configuration { + /// Load a configuration. + /// + /// `reader` can be `&str`, `&[u8]`, or anything that implements [`std::io::BufRead`] + /// (if the `std` feature is enabled) such as `std::io::BufReader<std::fs::File>`. pub fn load<R: Read>(filename: &str, mut reader: R) -> Result<Self> { // avoid big code size by using dyn reference. // the impact on performance is not really important. Parser::default().load(filename, &mut reader) } + /// Load a configuration from a file path. #[cfg(feature = "std")] pub fn load_path<P: AsRef<std::path::Path>>(path: P) -> Result<Self> { let p = path.as_ref(); @@ -554,6 +607,11 @@ impl Configuration { let file = std::fs::File::open(p).map_err(|e| Error::IO(filename.clone().into(), e))?; Configuration::load(&filename, std::io::BufReader::new(file)) } + /// Extract a section out of a configuration. + /// + /// More specifically, this will give you the configuration consisting of all + /// keys starting with `key.` in `self`, together with their values. + #[must_use] pub fn section(&self, key: &str) -> Configuration { let mut node = self; for component in key.split('.') { @@ -564,6 +622,14 @@ impl Configuration { } node.clone() } + /// Get the list of all “direct keys” in this configuration. + /// + /// More specifically, this returns an iterator of all unique + /// first components of keys in `self`. + /// + /// (So if there were keys `sheep.age`, `sheep.colour`, and `farmer-name`, + /// this would give an iterator yielding + /// `"farmer-name"` and `"sheep"` in some order.) pub fn keys(&self) -> impl '_ + Iterator<Item = &str> { self.values .keys() @@ -572,7 +638,7 @@ impl Configuration { .keys() .filter(|&k| !self.values.contains_key(k)), ) - .map(|x| x.as_ref()) + .map(AsRef::as_ref) } fn get_val(&self, key: &str) -> Option<&Value> { let Some((path, last_component)) = key.rsplit_once('.') else { @@ -584,56 +650,109 @@ impl Configuration { } node.values.get(last_component) } + /// Get value associated with `key`, if any. + #[must_use] pub fn get(&self, key: &str) -> Option<&str> { Some(self.get_val(key)?.value.as_ref()) } + /// Get location in the configuration file where `key` is defined, if any. + #[must_use] pub fn location(&self, key: &str) -> Option<Location> { Some(self.get_val(key)?.defined_at.clone()) } + /// Returns `true` if `key` is defined in this configuration. + #[must_use] pub fn has(&self, key: &str) -> bool { self.get(key).is_some() } + /// Get value associated with `key`, or else use `default` if it isn't defined. + #[must_use] pub fn get_or_default<'a>(&'a self, key: &str, default: &'a str) -> &'a str { self.get(key).unwrap_or(default) } + /// Get value associated with `key`, and parse it as an integer. + /// + /// Returns `None` if `key` is not defined, + /// and `Some(Err(…))` if `key` is defined but not an integer. + #[must_use] pub fn get_int(&self, key: &str) -> Option<Result<i64>> { let Value { value, defined_at } = self.get_val(key)?; Some(parse_int(defined_at, value.as_ref())) } + /// Get value associated with `key`, and parse it as an integer, or else use `default`. + /// + /// Returns `Err(…)` if `key` is defined but not an integer. pub fn get_int_or_default(&self, key: &str, default: i64) -> Result<i64> { self.get_int(key).unwrap_or(Ok(default)) } + /// Get value associated with `key`, and parse it as an unsigned integer. + /// + /// Returns `None` if `key` is not defined, + /// and `Some(Err(…))` if `key` is defined but not an unsigned integer. + #[must_use] pub fn get_uint(&self, key: &str) -> Option<Result<u64>> { let Value { value, defined_at } = self.get_val(key)?; Some(parse_uint(defined_at, value.as_ref())) } + /// Get value associated with `key`, and parse it as an unsinged integer, or else use `default`. + /// + /// Returns `Err(…)` if `key` is defined but not an unsigned integer. pub fn get_uint_or_default(&self, key: &str, default: u64) -> Result<u64> { self.get_uint(key).unwrap_or(Ok(default)) } + /// Get value associated with `key`, and parse it as a float. + /// + /// Returns `None` if `key` is not defined, + /// and `Some(Err(…))` if `key` is defined but not a float. + #[must_use] pub fn get_float(&self, key: &str) -> Option<Result<f64>> { let Value { value, defined_at } = self.get_val(key)?; Some(parse_float(defined_at, value.as_ref())) } + /// Get value associated with `key`, and parse it as a float, or else use `default`. + /// + /// Returns `Err(…)` if `key` is defined but not a float. pub fn get_float_or_default(&self, key: &str, default: f64) -> Result<f64> { self.get_float(key).unwrap_or(Ok(default)) } + /// Get value associated with `key`, and parse it as a boolean. + /// + /// Returns `None` if `key` is not defined, + /// and `Some(Err(…))` if `key` is defined but not equal to one of + /// `off`, `no`, `false`, `on`, `yes`, `true`. + #[must_use] pub fn get_bool(&self, key: &str) -> Option<Result<bool>> { let Value { value, defined_at } = self.get_val(key)?; Some(parse_bool(defined_at, value.as_ref())) } + /// Get value associated with `key`, and parse it as a boolean, or else use `default`. + /// + /// Returns `Err(…)` if `key` is defined but not equal to one of + /// `off`, `no`, `false`, `on`, `yes`, `true`. pub fn get_bool_or_default(&self, key: &str, default: bool) -> Result<bool> { self.get_bool(key).unwrap_or(Ok(default)) } + /// Get value associated with `key`, and parse it as a comma-separated list. + /// + /// Commas in list entries can be escaped with `\,`. + #[must_use] pub fn get_list(&self, key: &str) -> Option<Vec<String>> { let Value { value, defined_at } = self.get_val(key)?; Some(parse_list(defined_at, value.as_ref())) } - pub fn get_list_or_default( - &self, - key: &str, - default: impl FnOnce() -> Vec<String>, - ) -> Vec<String> { - self.get_list(key).unwrap_or_else(default) + /// Get value associated with `key`, and parse it as a comma-separated list, or else use `default`. + /// + /// Commas in list entries can be escaped with `\,`. + /// + /// `default` can be anything that can be converted into an iterator of strings, + /// e.g. `Vec<&str>`, `Vec<String>`, `&[&str]`, etc. + pub fn get_list_or_default<L>(&self, key: &str, default: L) -> Vec<String> + where + L: IntoIterator, + L::Item: AsRef<str>, + { + self.get_list(key) + .unwrap_or_else(|| default.into_iter().map(|s| s.as_ref().to_owned()).collect()) } /// Merge `conf` into `self`, preferring values in `conf`. pub fn merge(&mut self, conf: &Configuration) { @@ -645,7 +764,15 @@ impl Configuration { // merge conf.children into self.children let new_children = Arc::make_mut(&mut self.children); for (key, child) in conf.children.iter() { - new_children.entry(key.clone()).or_default().merge(child) + new_children.entry(key.clone()).or_default().merge(child); } } + /// Check that `self` follows the given schema. + /// + /// See the [POM specification](https://www.pom.computer/spec.html) for a description + /// of schemas. + pub fn check_against_schema(&self, schema: &Configuration) -> Result<()> { + _ = schema; + todo!() + } } |