From 20417bbb3fe8a75d648d20c0d2cf6c112bfdf114 Mon Sep 17 00:00:00 2001 From: pommicket Date: Mon, 8 Sep 2025 11:53:33 -0400 Subject: Cleanup, various fixes --- src/lib.rs | 376 ++++++++++++++++++++++++++++++++----------------------------- 1 file changed, 196 insertions(+), 180 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 7815571..935fa96 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -5,13 +5,10 @@ #![warn(clippy::redundant_closure_for_method_calls)] extern crate alloc; -#[cfg(not(feature = "std"))] -use alloc::collections::BTreeMap as Map; use alloc::sync::Arc; +use alloc::vec::Vec; use core::fmt; use core::mem::take; -#[cfg(feature = "std")] -use std::collections::HashMap as Map; /// File and line information #[derive(Clone, Debug)] @@ -50,26 +47,14 @@ struct Value { /// A parsed POM configuration. #[derive(Clone, Debug, Default)] pub struct Configuration { - // wrap in an Arc for cheap cloning - children: Arc, Configuration>>, - values: Arc, Value>>, + /// List of items in configuration, sorted by key. + items: Vec<(Box, Value)>, } impl fmt::Display for Configuration { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - fn add_lines(lines: &mut Vec, prefix: &str, conf: &Configuration) { - for (key, val) in conf.values.iter() { - lines.push(format!("{prefix}{key} = {:?}", val.value)); - } - for (key, child) in conf.children.iter() { - add_lines(lines, &format!("{prefix}{key}."), child); - } - } - let mut lines = vec![]; - add_lines(&mut lines, "", self); - lines.sort(); - for line in lines { - writeln!(f, "{line}")?; + for (key, value) in &self.items { + writeln!(f, "{key} = {value:?}")?; } Ok(()) } @@ -120,6 +105,8 @@ pub enum Error { UnterminatedString(Location, char), /// Invalid escape sequence appears in a quoted value. InvalidEscapeSequence(Location, Box), + /// Key is defined twice in a file + DuplicateKey(Box<(Box, Location, Location)>), /// Used when there is more than one error in a file. /// /// None of the errors in the array will be [`Error::Multiple`]'s, @@ -167,6 +154,10 @@ impl fmt::Display for Error { f, "{location}: invalid escape sequence {sequence:?} (try using \\\\ instead of \\ maybe?)", ), + Self::DuplicateKey(info) => { + let (key, loc1, loc2) = info.as_ref(); + write!(f, "{loc2}: key {key} was already defined at {loc1}") + } Self::Multiple(errs) => { let mut first = true; for err in errs { @@ -343,6 +334,20 @@ impl Read for &[u8] { } } +fn is_illegal_byte(c: u8) -> bool { + (0..0x1f).contains(&c) && c != b'\t' +} + +fn process_line(line_buf: &mut Vec, location: impl Fn() -> Location) -> Result<&str> { + line_buf.pop_if(|c| *c == b'\r'); + for c in line_buf.iter() { + if is_illegal_byte(*c) { + return Err(Error::IllegalCharacter(location(), char::from(*c))); + } + } + str::from_utf8(line_buf).map_err(|_| Error::InvalidUtf8(location())) +} + fn parse_hex_digit(c: char) -> Option { Some(match c { '0'..='9' => (c as u32) - ('0' as u32), @@ -359,14 +364,102 @@ struct Parser { impl Parser { fn check_valid_key(&mut self, location: &Location, s: &str) { - if !s.bytes().all(|c| { - c >= 0x80 || c.is_ascii_alphanumeric() || matches!(c, b'.' | b'_' | b'/' | b'*' | b'-') - }) { + if s.starts_with('.') + || s.ends_with('.') + || s.contains("..") + || !s.bytes().all(|c| { + c >= 0x80 + || c.is_ascii_alphanumeric() + || matches!(c, b'.' | b'_' | b'/' | b'*' | b'-') + }) { self.nonfatal_errors .push(Error::InvalidKey(location.clone(), s.into())); } } + fn parse_escape_sequence( + &mut self, + chars: &mut core::str::Chars, + value: &mut String, + location: impl Fn() -> Location, + ) { + let invalid_escape = |s: String| Error::InvalidEscapeSequence(location(), s.into()); + let Some(c) = chars.next() else { + self.nonfatal_errors + .push(invalid_escape("\\(newline)".into())); + return; + }; + match c { + 'n' => value.push('\n'), + 'r' => value.push('\r'), + 't' => value.push('\t'), + '\\' | '\'' | '"' | '`' => value.push(c), + ',' => value.push_str("\\,"), + 'x' => { + let Some(c1) = chars.next() else { + self.nonfatal_errors.push(invalid_escape("\\x".into())); + return; + }; + let Some(c2) = chars.next() else { + self.nonfatal_errors + .push(invalid_escape(format!("\\x{c1}"))); + return; + }; + let (Some(nibble1), Some(nibble2)) = (parse_hex_digit(c1), parse_hex_digit(c2)) + else { + self.nonfatal_errors + .push(invalid_escape(format!("\\x{c1}{c2}"))); + return; + }; + if nibble1 == 0 && nibble2 == 0 { + self.nonfatal_errors.push(Error::InvalidValue(location())); + } + value.push(char::try_from(nibble1 << 4 | nibble2).unwrap()); + } + 'u' => { + let mut c = chars.next(); + if c != Some('{') { + self.nonfatal_errors.push(invalid_escape("\\u".into())); + return; + } + let mut code = 0u32; + for i in 0..7 { + c = chars.next(); + if i == 6 { + break; + } + let Some(c) = c else { + break; + }; + if c == '}' { + break; + } + code <<= 4; + let Some(digit) = parse_hex_digit(c) else { + self.nonfatal_errors + .push(invalid_escape(format!("\\u{{{code:x}{c}"))); + return; + }; + code |= digit; + } + if c != Some('}') { + self.nonfatal_errors + .push(invalid_escape("\\u{ has no matching }".into())); + return; + } + let Ok(c) = char::try_from(code) else { + self.nonfatal_errors + .push(invalid_escape(format!("\\u{{{code:x}}}"))); + return; + }; + value.push(c); + } + _ => { + self.nonfatal_errors.push(invalid_escape(format!("\\{c}"))); + } + } + } + /// Returns (unquoted value, new line number) fn read_quoted_value( &mut self, @@ -393,13 +486,10 @@ impl Parser { break; } line_number += 1; - line_buf.pop_if(|c| *c == b'\r'); - str::from_utf8(&line_buf).map_err(|_| Error::InvalidUtf8(location(line_number)))? + process_line(&mut line_buf, || location(line_number))? }; let mut chars = line.chars(); while let Some(c) = chars.next() { - let invalid_escape = - move |s: String| Error::InvalidEscapeSequence(location(line_number), s.into()); if c == delimiter { if !chars.all(|c| c == ' ' || c == '\t') { self.nonfatal_errors @@ -407,78 +497,7 @@ impl Parser { } return Ok((unquoted, line_number)); } else if c == '\\' { - let Some(c) = chars.next() else { - self.nonfatal_errors - .push(invalid_escape("\\(newline)".into())); - break; - }; - match c { - 'n' => unquoted.push('\n'), - 'r' => unquoted.push('\r'), - 't' => unquoted.push('\t'), - '\\' | '\'' | '"' | '`' => unquoted.push(c), - ',' => unquoted.push_str("\\,"), - 'x' => { - let Some(c1) = chars.next() else { - self.nonfatal_errors.push(invalid_escape("\\x".into())); - break; - }; - let Some(c2) = chars.next() else { - self.nonfatal_errors - .push(invalid_escape(format!("\\x{c1}"))); - break; - }; - let (Some(nibble1), Some(nibble2)) = - (parse_hex_digit(c1), parse_hex_digit(c2)) - else { - self.nonfatal_errors - .push(invalid_escape(format!("\\x{c1}{c2}"))); - continue; - }; - if nibble1 == 0 && nibble2 == 0 { - self.nonfatal_errors - .push(Error::InvalidValue(location(line_number))); - } - unquoted.push(char::try_from(nibble1 << 4 | nibble2).unwrap()); - } - 'u' => { - let mut c = chars.next(); - if c != Some('{') { - self.nonfatal_errors.push(invalid_escape("\\u".into())); - continue; - } - let mut code = 0u32; - for i in 0..7 { - c = chars.next(); - if i == 6 { - break; - } - let Some(c) = c else { - break; - }; - if c == '}' { - break; - } - code <<= 4; - code |= parse_hex_digit(c) - .ok_or_else(|| invalid_escape(format!("\\u{{{code:x}{c}")))?; - } - if c != Some('}') { - self.nonfatal_errors - .push(invalid_escape("\\u{ has no matching }".into())); - continue; - } - let Ok(c) = char::try_from(code) else { - self.nonfatal_errors - .push(invalid_escape(format!("\\u{{{code:x}}}"))); - continue; - }; - unquoted.push(c); - } - _ => { - self.nonfatal_errors.push(invalid_escape(format!("\\{c}"))); - } - } + self.parse_escape_sequence(&mut chars, &mut unquoted, || location(line_number)); } else if c == '\0' { self.nonfatal_errors .push(Error::InvalidValue(location(line_number))); @@ -492,29 +511,25 @@ impl Parser { } fn load(&mut self, filename: &str, reader: &mut dyn Read) -> Result { - let mut config = Configuration::default(); - let mut line = vec![]; - let mut line_number = 0; - let mut current_section = &mut config; + let mut items: Vec<(Box, Value)> = vec![]; + let mut line: Vec = vec![]; + let mut line_number: u64 = 0; + let mut current_section = String::new(); let filename: Arc = filename.into(); loop { line.truncate(0); if !reader.read_until_lf(&mut line)? { break; } + if line_number == 0 && line.starts_with(b"\xEF\xBB\xBF") { + line.drain(..3); + } line_number += 1; let location = Location { file: filename.clone(), line: line_number, }; - line.pop_if(|c| *c == b'\r'); - for c in &line { - if (0..0x1f).contains(c) && *c != b'\t' { - return Err(Error::IllegalCharacter(location, char::from(*c))); - } - } - let mut line = - str::from_utf8(&line).map_err(|_| Error::InvalidUtf8(location.clone()))?; + let mut line = process_line(&mut line, || location.clone())?; line = line.trim_start_matches(['\t', ' ']); if line.is_empty() || line.starts_with('#') { // comment/blank line @@ -525,64 +540,59 @@ impl Parser { if !line.ends_with(']') { return Err(Error::UnmatchedLeftBrace(location)); } - let new_section = line[1..line.len() - 1].into(); - current_section = &mut config; - self.check_valid_key(&location, new_section); - if !new_section.is_empty() { - for component in new_section.split('.') { - current_section = Arc::get_mut(&mut current_section.children) - .unwrap() - .entry(component.into()) - .or_default(); - } - } + current_section = line[1..line.len() - 1].into(); + self.check_valid_key(&location, ¤t_section); } else { - fn insert( - mut section: &mut Configuration, - location: Location, - mut key: &str, - value: &str, - ) { - if let Some(last_dot) = key.rfind('.') { - for component in key[..last_dot].split('.') { - section = Arc::get_mut(&mut section.children) - .unwrap() - .entry(component.into()) - .or_default(); - } - key = &key[last_dot + 1..]; - } - Arc::get_mut(&mut section.values).unwrap().insert( - key.into(), - Value { - value: value.into(), - defined_at: location, - }, - ); - } - let (mut relative_key, mut value) = line .split_once('=') .ok_or_else(|| Error::InvalidLine(location.clone()))?; relative_key = relative_key.trim_end_matches(['\t', ' ']); self.check_valid_key(&location, relative_key); + let key: String = if current_section.is_empty() { + relative_key.into() + } else { + format!("{current_section}.{relative_key}") + }; value = value.trim_start_matches(['\t', ' ']); if value.starts_with(['`', '"', '\'']) { let (value, new_line_number) = self.read_quoted_value(value, reader, &location)?; - insert(current_section, location, relative_key, &value); + items.push(( + key.into(), + Value { + value: value.into(), + defined_at: location, + }, + )); line_number = new_line_number; } else { value = value.trim_end_matches(['\t', ' ']); if value.contains('\0') { return Err(Error::InvalidValue(location)); } - insert(current_section, location, relative_key, value); + items.push(( + key.into(), + Value { + value: value.into(), + defined_at: location, + }, + )); } } } + items.sort_by(|(k1, _), (k2, _)| k1.cmp(k2)); + for window in items.windows(2) { + if window[0].0 == window[1].0 { + // duplicate key + self.nonfatal_errors.push(Error::DuplicateKey(Box::new(( + window[0].0.clone(), + window[0].1.defined_at.clone(), + window[1].1.defined_at.clone(), + )))); + } + } match self.nonfatal_errors.len() { - 0 => Ok(config), + 0 => Ok(Configuration { items }), 1 => Err(self.nonfatal_errors.pop().unwrap()), 2.. => Err(Error::Multiple(take(&mut self.nonfatal_errors).into())), } @@ -613,14 +623,20 @@ impl Configuration { /// keys starting with `key.` in `self`, together with their values. #[must_use] pub fn section(&self, key: &str) -> Configuration { - let mut node = self; - for component in key.split('.') { - node = match self.children.get(component) { - Some(x) => x, - None => return Configuration::default(), - }; + let key_dot = format!("{key}."); + let start_idx = self + .items + .binary_search_by(|(k, _)| k.as_ref().cmp(&key_dot)) + .expect_err("items should not contain a key ending in ."); + // NB: / is the next ASCII character after . + let key_slash = format!("{key}/"); + let end_idx = self + .items + .binary_search_by(|(k, _)| k.as_ref().cmp(&key_slash)) + .unwrap_or_else(|i| i); + Configuration { + items: self.items[start_idx..end_idx].to_owned(), } - node.clone() } /// Get the list of all “direct keys” in this configuration. /// @@ -631,24 +647,22 @@ impl Configuration { /// this would give an iterator yielding /// `"farmer-name"` and `"sheep"` in some order.) pub fn keys(&self) -> impl '_ + Iterator { - self.values - .keys() - .chain( - self.children - .keys() - .filter(|&k| !self.values.contains_key(k)), - ) - .map(AsRef::as_ref) + let mut prev = None; + self.items.iter().filter_map(move |(key, _)| { + let first_component: &str = key.split_once('.').map_or(key.as_ref(), |(c, _)| c); + if prev == Some(first_component) { + return None; + } + prev = Some(first_component); + Some(first_component) + }) } fn get_val(&self, key: &str) -> Option<&Value> { - let Some((path, last_component)) = key.rsplit_once('.') else { - return self.values.get(key); - }; - let mut node = self; - for component in path.split('.') { - node = node.children.get(component)?; - } - node.values.get(last_component) + let idx = self + .items + .binary_search_by(|(k, _)| k.as_ref().cmp(key)) + .ok()?; + Some(&self.items[idx].1) } /// Get value associated with `key`, if any. #[must_use] @@ -756,15 +770,17 @@ impl Configuration { } /// Merge `conf` into `self`, preferring values in `conf`. pub fn merge(&mut self, conf: &Configuration) { - let new_values = Arc::make_mut(&mut self.values); - // merge conf.values into self.values - for (key, val) in conf.values.iter() { - new_values.insert(key.clone(), val.clone()); + let mut must_sort = false; + for (key, value) in &conf.items { + if let Ok(i) = self.items.binary_search_by(|(k, _)| k.cmp(key)) { + self.items[i].1 = value.clone(); + } else { + self.items.push((key.clone(), value.clone())); + must_sort = true; + } } - // merge conf.children into self.children - let new_children = Arc::make_mut(&mut self.children); - for (key, child) in conf.children.iter() { - new_children.entry(key.clone()).or_default().merge(child); + if must_sort { + self.items.sort_by(|(k1, _), (k2, _)| k1.cmp(k2)); } } /// Check that `self` follows the given schema. -- cgit v1.2.3