summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorpommicket <pommicket@gmail.com>2025-09-08 11:53:33 -0400
committerpommicket <pommicket@gmail.com>2025-09-08 11:53:33 -0400
commit20417bbb3fe8a75d648d20c0d2cf6c112bfdf114 (patch)
treef278a6c443ca69dd3252691f6b4134246bb510fe
parent21e1a3e05e5f687b31aa69e7d628c050c8876673 (diff)
Cleanup, various fixes
-rw-r--r--src/lib.rs376
1 files changed, 196 insertions, 180 deletions
diff --git a/src/lib.rs b/src/lib.rs
index 7815571..935fa96 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -5,13 +5,10 @@
#![warn(clippy::redundant_closure_for_method_calls)]
extern crate alloc;
-#[cfg(not(feature = "std"))]
-use alloc::collections::BTreeMap as Map;
use alloc::sync::Arc;
+use alloc::vec::Vec;
use core::fmt;
use core::mem::take;
-#[cfg(feature = "std")]
-use std::collections::HashMap as Map;
/// File and line information
#[derive(Clone, Debug)]
@@ -50,26 +47,14 @@ struct Value {
/// A parsed POM configuration.
#[derive(Clone, Debug, Default)]
pub struct Configuration {
- // wrap in an Arc for cheap cloning
- children: Arc<Map<Box<str>, Configuration>>,
- values: Arc<Map<Box<str>, Value>>,
+ /// List of items in configuration, sorted by key.
+ items: Vec<(Box<str>, Value)>,
}
impl fmt::Display for Configuration {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
- fn add_lines(lines: &mut Vec<String>, prefix: &str, conf: &Configuration) {
- for (key, val) in conf.values.iter() {
- lines.push(format!("{prefix}{key} = {:?}", val.value));
- }
- for (key, child) in conf.children.iter() {
- add_lines(lines, &format!("{prefix}{key}."), child);
- }
- }
- let mut lines = vec![];
- add_lines(&mut lines, "", self);
- lines.sort();
- for line in lines {
- writeln!(f, "{line}")?;
+ for (key, value) in &self.items {
+ writeln!(f, "{key} = {value:?}")?;
}
Ok(())
}
@@ -120,6 +105,8 @@ pub enum Error {
UnterminatedString(Location, char),
/// Invalid escape sequence appears in a quoted value.
InvalidEscapeSequence(Location, Box<str>),
+ /// Key is defined twice in a file
+ DuplicateKey(Box<(Box<str>, Location, Location)>),
/// Used when there is more than one error in a file.
///
/// None of the errors in the array will be [`Error::Multiple`]'s,
@@ -167,6 +154,10 @@ impl fmt::Display for Error {
f,
"{location}: invalid escape sequence {sequence:?} (try using \\\\ instead of \\ maybe?)",
),
+ Self::DuplicateKey(info) => {
+ let (key, loc1, loc2) = info.as_ref();
+ write!(f, "{loc2}: key {key} was already defined at {loc1}")
+ }
Self::Multiple(errs) => {
let mut first = true;
for err in errs {
@@ -343,6 +334,20 @@ impl Read for &[u8] {
}
}
+fn is_illegal_byte(c: u8) -> bool {
+ (0..0x1f).contains(&c) && c != b'\t'
+}
+
+fn process_line(line_buf: &mut Vec<u8>, location: impl Fn() -> Location) -> Result<&str> {
+ line_buf.pop_if(|c| *c == b'\r');
+ for c in line_buf.iter() {
+ if is_illegal_byte(*c) {
+ return Err(Error::IllegalCharacter(location(), char::from(*c)));
+ }
+ }
+ str::from_utf8(line_buf).map_err(|_| Error::InvalidUtf8(location()))
+}
+
fn parse_hex_digit(c: char) -> Option<u32> {
Some(match c {
'0'..='9' => (c as u32) - ('0' as u32),
@@ -359,14 +364,102 @@ struct Parser {
impl Parser {
fn check_valid_key(&mut self, location: &Location, s: &str) {
- if !s.bytes().all(|c| {
- c >= 0x80 || c.is_ascii_alphanumeric() || matches!(c, b'.' | b'_' | b'/' | b'*' | b'-')
- }) {
+ if s.starts_with('.')
+ || s.ends_with('.')
+ || s.contains("..")
+ || !s.bytes().all(|c| {
+ c >= 0x80
+ || c.is_ascii_alphanumeric()
+ || matches!(c, b'.' | b'_' | b'/' | b'*' | b'-')
+ }) {
self.nonfatal_errors
.push(Error::InvalidKey(location.clone(), s.into()));
}
}
+ fn parse_escape_sequence(
+ &mut self,
+ chars: &mut core::str::Chars,
+ value: &mut String,
+ location: impl Fn() -> Location,
+ ) {
+ let invalid_escape = |s: String| Error::InvalidEscapeSequence(location(), s.into());
+ let Some(c) = chars.next() else {
+ self.nonfatal_errors
+ .push(invalid_escape("\\(newline)".into()));
+ return;
+ };
+ match c {
+ 'n' => value.push('\n'),
+ 'r' => value.push('\r'),
+ 't' => value.push('\t'),
+ '\\' | '\'' | '"' | '`' => value.push(c),
+ ',' => value.push_str("\\,"),
+ 'x' => {
+ let Some(c1) = chars.next() else {
+ self.nonfatal_errors.push(invalid_escape("\\x".into()));
+ return;
+ };
+ let Some(c2) = chars.next() else {
+ self.nonfatal_errors
+ .push(invalid_escape(format!("\\x{c1}")));
+ return;
+ };
+ let (Some(nibble1), Some(nibble2)) = (parse_hex_digit(c1), parse_hex_digit(c2))
+ else {
+ self.nonfatal_errors
+ .push(invalid_escape(format!("\\x{c1}{c2}")));
+ return;
+ };
+ if nibble1 == 0 && nibble2 == 0 {
+ self.nonfatal_errors.push(Error::InvalidValue(location()));
+ }
+ value.push(char::try_from(nibble1 << 4 | nibble2).unwrap());
+ }
+ 'u' => {
+ let mut c = chars.next();
+ if c != Some('{') {
+ self.nonfatal_errors.push(invalid_escape("\\u".into()));
+ return;
+ }
+ let mut code = 0u32;
+ for i in 0..7 {
+ c = chars.next();
+ if i == 6 {
+ break;
+ }
+ let Some(c) = c else {
+ break;
+ };
+ if c == '}' {
+ break;
+ }
+ code <<= 4;
+ let Some(digit) = parse_hex_digit(c) else {
+ self.nonfatal_errors
+ .push(invalid_escape(format!("\\u{{{code:x}{c}")));
+ return;
+ };
+ code |= digit;
+ }
+ if c != Some('}') {
+ self.nonfatal_errors
+ .push(invalid_escape("\\u{ has no matching }".into()));
+ return;
+ }
+ let Ok(c) = char::try_from(code) else {
+ self.nonfatal_errors
+ .push(invalid_escape(format!("\\u{{{code:x}}}")));
+ return;
+ };
+ value.push(c);
+ }
+ _ => {
+ self.nonfatal_errors.push(invalid_escape(format!("\\{c}")));
+ }
+ }
+ }
+
/// Returns (unquoted value, new line number)
fn read_quoted_value(
&mut self,
@@ -393,13 +486,10 @@ impl Parser {
break;
}
line_number += 1;
- line_buf.pop_if(|c| *c == b'\r');
- str::from_utf8(&line_buf).map_err(|_| Error::InvalidUtf8(location(line_number)))?
+ process_line(&mut line_buf, || location(line_number))?
};
let mut chars = line.chars();
while let Some(c) = chars.next() {
- let invalid_escape =
- move |s: String| Error::InvalidEscapeSequence(location(line_number), s.into());
if c == delimiter {
if !chars.all(|c| c == ' ' || c == '\t') {
self.nonfatal_errors
@@ -407,78 +497,7 @@ impl Parser {
}
return Ok((unquoted, line_number));
} else if c == '\\' {
- let Some(c) = chars.next() else {
- self.nonfatal_errors
- .push(invalid_escape("\\(newline)".into()));
- break;
- };
- match c {
- 'n' => unquoted.push('\n'),
- 'r' => unquoted.push('\r'),
- 't' => unquoted.push('\t'),
- '\\' | '\'' | '"' | '`' => unquoted.push(c),
- ',' => unquoted.push_str("\\,"),
- 'x' => {
- let Some(c1) = chars.next() else {
- self.nonfatal_errors.push(invalid_escape("\\x".into()));
- break;
- };
- let Some(c2) = chars.next() else {
- self.nonfatal_errors
- .push(invalid_escape(format!("\\x{c1}")));
- break;
- };
- let (Some(nibble1), Some(nibble2)) =
- (parse_hex_digit(c1), parse_hex_digit(c2))
- else {
- self.nonfatal_errors
- .push(invalid_escape(format!("\\x{c1}{c2}")));
- continue;
- };
- if nibble1 == 0 && nibble2 == 0 {
- self.nonfatal_errors
- .push(Error::InvalidValue(location(line_number)));
- }
- unquoted.push(char::try_from(nibble1 << 4 | nibble2).unwrap());
- }
- 'u' => {
- let mut c = chars.next();
- if c != Some('{') {
- self.nonfatal_errors.push(invalid_escape("\\u".into()));
- continue;
- }
- let mut code = 0u32;
- for i in 0..7 {
- c = chars.next();
- if i == 6 {
- break;
- }
- let Some(c) = c else {
- break;
- };
- if c == '}' {
- break;
- }
- code <<= 4;
- code |= parse_hex_digit(c)
- .ok_or_else(|| invalid_escape(format!("\\u{{{code:x}{c}")))?;
- }
- if c != Some('}') {
- self.nonfatal_errors
- .push(invalid_escape("\\u{ has no matching }".into()));
- continue;
- }
- let Ok(c) = char::try_from(code) else {
- self.nonfatal_errors
- .push(invalid_escape(format!("\\u{{{code:x}}}")));
- continue;
- };
- unquoted.push(c);
- }
- _ => {
- self.nonfatal_errors.push(invalid_escape(format!("\\{c}")));
- }
- }
+ self.parse_escape_sequence(&mut chars, &mut unquoted, || location(line_number));
} else if c == '\0' {
self.nonfatal_errors
.push(Error::InvalidValue(location(line_number)));
@@ -492,29 +511,25 @@ impl Parser {
}
fn load(&mut self, filename: &str, reader: &mut dyn Read) -> Result<Configuration> {
- let mut config = Configuration::default();
- let mut line = vec![];
- let mut line_number = 0;
- let mut current_section = &mut config;
+ let mut items: Vec<(Box<str>, Value)> = vec![];
+ let mut line: Vec<u8> = vec![];
+ let mut line_number: u64 = 0;
+ let mut current_section = String::new();
let filename: Arc<str> = filename.into();
loop {
line.truncate(0);
if !reader.read_until_lf(&mut line)? {
break;
}
+ if line_number == 0 && line.starts_with(b"\xEF\xBB\xBF") {
+ line.drain(..3);
+ }
line_number += 1;
let location = Location {
file: filename.clone(),
line: line_number,
};
- line.pop_if(|c| *c == b'\r');
- for c in &line {
- if (0..0x1f).contains(c) && *c != b'\t' {
- return Err(Error::IllegalCharacter(location, char::from(*c)));
- }
- }
- let mut line =
- str::from_utf8(&line).map_err(|_| Error::InvalidUtf8(location.clone()))?;
+ let mut line = process_line(&mut line, || location.clone())?;
line = line.trim_start_matches(['\t', ' ']);
if line.is_empty() || line.starts_with('#') {
// comment/blank line
@@ -525,64 +540,59 @@ impl Parser {
if !line.ends_with(']') {
return Err(Error::UnmatchedLeftBrace(location));
}
- let new_section = line[1..line.len() - 1].into();
- current_section = &mut config;
- self.check_valid_key(&location, new_section);
- if !new_section.is_empty() {
- for component in new_section.split('.') {
- current_section = Arc::get_mut(&mut current_section.children)
- .unwrap()
- .entry(component.into())
- .or_default();
- }
- }
+ current_section = line[1..line.len() - 1].into();
+ self.check_valid_key(&location, &current_section);
} else {
- fn insert(
- mut section: &mut Configuration,
- location: Location,
- mut key: &str,
- value: &str,
- ) {
- if let Some(last_dot) = key.rfind('.') {
- for component in key[..last_dot].split('.') {
- section = Arc::get_mut(&mut section.children)
- .unwrap()
- .entry(component.into())
- .or_default();
- }
- key = &key[last_dot + 1..];
- }
- Arc::get_mut(&mut section.values).unwrap().insert(
- key.into(),
- Value {
- value: value.into(),
- defined_at: location,
- },
- );
- }
-
let (mut relative_key, mut value) = line
.split_once('=')
.ok_or_else(|| Error::InvalidLine(location.clone()))?;
relative_key = relative_key.trim_end_matches(['\t', ' ']);
self.check_valid_key(&location, relative_key);
+ let key: String = if current_section.is_empty() {
+ relative_key.into()
+ } else {
+ format!("{current_section}.{relative_key}")
+ };
value = value.trim_start_matches(['\t', ' ']);
if value.starts_with(['`', '"', '\'']) {
let (value, new_line_number) =
self.read_quoted_value(value, reader, &location)?;
- insert(current_section, location, relative_key, &value);
+ items.push((
+ key.into(),
+ Value {
+ value: value.into(),
+ defined_at: location,
+ },
+ ));
line_number = new_line_number;
} else {
value = value.trim_end_matches(['\t', ' ']);
if value.contains('\0') {
return Err(Error::InvalidValue(location));
}
- insert(current_section, location, relative_key, value);
+ items.push((
+ key.into(),
+ Value {
+ value: value.into(),
+ defined_at: location,
+ },
+ ));
}
}
}
+ items.sort_by(|(k1, _), (k2, _)| k1.cmp(k2));
+ for window in items.windows(2) {
+ if window[0].0 == window[1].0 {
+ // duplicate key
+ self.nonfatal_errors.push(Error::DuplicateKey(Box::new((
+ window[0].0.clone(),
+ window[0].1.defined_at.clone(),
+ window[1].1.defined_at.clone(),
+ ))));
+ }
+ }
match self.nonfatal_errors.len() {
- 0 => Ok(config),
+ 0 => Ok(Configuration { items }),
1 => Err(self.nonfatal_errors.pop().unwrap()),
2.. => Err(Error::Multiple(take(&mut self.nonfatal_errors).into())),
}
@@ -613,14 +623,20 @@ impl Configuration {
/// keys starting with `key.` in `self`, together with their values.
#[must_use]
pub fn section(&self, key: &str) -> Configuration {
- let mut node = self;
- for component in key.split('.') {
- node = match self.children.get(component) {
- Some(x) => x,
- None => return Configuration::default(),
- };
+ let key_dot = format!("{key}.");
+ let start_idx = self
+ .items
+ .binary_search_by(|(k, _)| k.as_ref().cmp(&key_dot))
+ .expect_err("items should not contain a key ending in .");
+ // NB: / is the next ASCII character after .
+ let key_slash = format!("{key}/");
+ let end_idx = self
+ .items
+ .binary_search_by(|(k, _)| k.as_ref().cmp(&key_slash))
+ .unwrap_or_else(|i| i);
+ Configuration {
+ items: self.items[start_idx..end_idx].to_owned(),
}
- node.clone()
}
/// Get the list of all “direct keys” in this configuration.
///
@@ -631,24 +647,22 @@ impl Configuration {
/// this would give an iterator yielding
/// `"farmer-name"` and `"sheep"` in some order.)
pub fn keys(&self) -> impl '_ + Iterator<Item = &str> {
- self.values
- .keys()
- .chain(
- self.children
- .keys()
- .filter(|&k| !self.values.contains_key(k)),
- )
- .map(AsRef::as_ref)
+ let mut prev = None;
+ self.items.iter().filter_map(move |(key, _)| {
+ let first_component: &str = key.split_once('.').map_or(key.as_ref(), |(c, _)| c);
+ if prev == Some(first_component) {
+ return None;
+ }
+ prev = Some(first_component);
+ Some(first_component)
+ })
}
fn get_val(&self, key: &str) -> Option<&Value> {
- let Some((path, last_component)) = key.rsplit_once('.') else {
- return self.values.get(key);
- };
- let mut node = self;
- for component in path.split('.') {
- node = node.children.get(component)?;
- }
- node.values.get(last_component)
+ let idx = self
+ .items
+ .binary_search_by(|(k, _)| k.as_ref().cmp(key))
+ .ok()?;
+ Some(&self.items[idx].1)
}
/// Get value associated with `key`, if any.
#[must_use]
@@ -756,15 +770,17 @@ impl Configuration {
}
/// Merge `conf` into `self`, preferring values in `conf`.
pub fn merge(&mut self, conf: &Configuration) {
- let new_values = Arc::make_mut(&mut self.values);
- // merge conf.values into self.values
- for (key, val) in conf.values.iter() {
- new_values.insert(key.clone(), val.clone());
+ let mut must_sort = false;
+ for (key, value) in &conf.items {
+ if let Ok(i) = self.items.binary_search_by(|(k, _)| k.cmp(key)) {
+ self.items[i].1 = value.clone();
+ } else {
+ self.items.push((key.clone(), value.clone()));
+ must_sort = true;
+ }
}
- // merge conf.children into self.children
- let new_children = Arc::make_mut(&mut self.children);
- for (key, child) in conf.children.iter() {
- new_children.entry(key.clone()).or_default().merge(child);
+ if must_sort {
+ self.items.sort_by(|(k1, _), (k2, _)| k1.cmp(k2));
}
}
/// Check that `self` follows the given schema.