diff options
-rw-r--r-- | src/lib.rs | 122 | ||||
-rw-r--r-- | src/schema.rs | 396 | ||||
-rw-r--r-- | src/tests/mod.rs | 19 |
3 files changed, 70 insertions, 467 deletions
@@ -13,9 +13,8 @@ use alloc::vec::Vec; use alloc::{format, vec}; use core::fmt; use core::mem::take; +use core::sync::atomic::{AtomicBool, Ordering}; -mod schema; -pub use schema::Schema; #[cfg(test)] mod tests; @@ -38,19 +37,6 @@ impl Location { pub fn line(&self) -> u64 { self.line } - - /// Dummy location for internal use - fn dummy() -> Self { - Self { - file: Arc::from(""), - line: 0, - } - } - - fn is_dummy(&self) -> bool { - // we never use line number of 0 ordinarily - self.line == 0 - } } impl fmt::Display for Location { @@ -60,17 +46,18 @@ impl fmt::Display for Location { } /// A string value, together with location information about where it is defined. -#[derive(Clone, Debug)] +#[derive(Debug)] struct Value { value: Box<str>, defined_at: Location, + read: AtomicBool, } /// A parsed POM configuration. #[derive(Clone, Debug, Default)] pub struct Configuration { /// List of items in configuration, sorted by key. - items: Vec<(Box<str>, Value)>, + items: Vec<(Box<str>, Arc<Value>)>, } impl fmt::Display for Configuration { @@ -82,7 +69,7 @@ impl fmt::Display for Configuration { } } -/// A parsing or schema error. +/// A parsing error. #[non_exhaustive] #[derive(Debug)] pub enum Error { @@ -134,18 +121,6 @@ pub enum Error { /// None of the errors in the array will be [`Error::Multiple`]'s, /// and the array will contain at least two elements. Multiple(Box<[Error]>), - /// Bad type in schema - SchemaBadType(Location, Box<str>), - /// Bad maxlength in schema - SchemaBadMaxLength(Location, Box<str>), - /// Invalid schema key - SchemaBadKey(Location, Box<str>), - /// Value is less than the schema-imposed minimum - SchemaValueLessThanMin(Location, Box<str>, f64, f64), - /// Value is greater than the schema-imposed maximum - SchemaValueGreaterThanMax(Location, Box<str>, f64, f64), - /// Value is greater than the schema-imposed maxlength - SchemaValueTooLong(Location, Box<str>, usize, usize), } impl fmt::Display for Error { @@ -204,21 +179,6 @@ impl fmt::Display for Error { } Ok(()) } - Self::SchemaBadType(l, t) => write!(f, "{l}: invalid type: {t:?}"), - Self::SchemaBadMaxLength(l, m) => write!(f, "{l}: invalid maxlength: {m:?}"), - Self::SchemaBadKey(l, k) => write!(f, "{l}: invalid schema key: {k}"), - Self::SchemaValueLessThanMin(l, key, val, min) => write!( - f, - "{l}: {key}'s value of {val} is less than the minimum ({min})" - ), - Self::SchemaValueGreaterThanMax(l, key, val, max) => write!( - f, - "{l}: {key}'s value of {val} is greater than the maximum ({max})" - ), - Self::SchemaValueTooLong(l, key, len, maxlen) => write!( - f, - "{l}: {key}'s value has length {len}, which exceeds the maximum of {maxlen}" - ), } } } @@ -621,7 +581,7 @@ impl Parser { } fn load(&mut self, filename: &str, reader: &mut dyn Read) -> Result<Configuration> { - let mut items: Vec<(Box<str>, Value)> = vec![]; + let mut items: Vec<(Box<str>, Arc<Value>)> = vec![]; let mut line: Vec<u8> = vec![]; let mut line_number: u64 = 0; let mut current_section = String::new(); @@ -673,10 +633,11 @@ impl Parser { self.read_quoted_value(value, reader, &location)?; items.push(( key.into(), - Value { + Arc::new(Value { value: value.into(), defined_at: location, - }, + read: AtomicBool::new(false), + }), )); line_number = new_line_number; } else { @@ -686,10 +647,11 @@ impl Parser { } items.push(( key.into(), - Value { + Arc::new(Value { value: value.into(), defined_at: location, - }, + read: AtomicBool::new(false), + }), )); } } @@ -801,29 +763,25 @@ impl Configuration { self.into_iter() } - /// Same as `iter()` (for now), but explicitly marks that keys should be in sorted order. - /// - /// This is used internally so that we know what we have to fix if `iter()` - /// is ever changed to return a non-sorted iterator. - fn iter_sorted(&self) -> ConfigurationIter<'_> { - self.iter() - } - - fn get_val(&self, key: &str) -> Option<&Value> { + fn get_val(&self, key: &str, mark_read: bool) -> Option<&Value> { let idx = self.binary_search_for(key).ok()?; - Some(&self.items[idx].1) + let v = &self.items[idx].1; + if mark_read { + v.read.store(true, Ordering::Relaxed); + } + Some(v) } /// Get value associated with `key`, if any. #[must_use] pub fn get(&self, key: &str) -> Option<&str> { - Some(self.get_val(key)?.value.as_ref()) + Some(self.get_val(key, true)?.value.as_ref()) } /// Get location in the configuration file where `key` is defined, if any. #[must_use] pub fn location(&self, key: &str) -> Option<Location> { - if let Some(val) = self.get_val(key) { + if let Some(val) = self.get_val(key, false) { Some(val.defined_at.clone()) } else { // Check if `key` has any defined subkeys @@ -855,7 +813,9 @@ impl Configuration { /// and `Some(Err(…))` if `key` is defined but not an integer. #[must_use] pub fn get_int(&self, key: &str) -> Option<Result<i64>> { - let Value { value, defined_at } = self.get_val(key)?; + let Value { + value, defined_at, .. + } = self.get_val(key, true)?; Some(parse_int(defined_at, value.as_ref())) } @@ -872,7 +832,9 @@ impl Configuration { /// and `Some(Err(…))` if `key` is defined but not an unsigned integer. #[must_use] pub fn get_uint(&self, key: &str) -> Option<Result<u64>> { - let Value { value, defined_at } = self.get_val(key)?; + let Value { + value, defined_at, .. + } = self.get_val(key, true)?; Some(parse_uint(defined_at, value.as_ref())) } @@ -889,7 +851,9 @@ impl Configuration { /// and `Some(Err(…))` if `key` is defined but not a float. #[must_use] pub fn get_float(&self, key: &str) -> Option<Result<f64>> { - let Value { value, defined_at } = self.get_val(key)?; + let Value { + value, defined_at, .. + } = self.get_val(key, true)?; Some(parse_float(defined_at, value.as_ref())) } @@ -907,7 +871,9 @@ impl Configuration { /// `off`, `no`, `false`, `on`, `yes`, `true`. #[must_use] pub fn get_bool(&self, key: &str) -> Option<Result<bool>> { - let Value { value, defined_at } = self.get_val(key)?; + let Value { + value, defined_at, .. + } = self.get_val(key, true)?; Some(parse_bool(defined_at, value.as_ref())) } @@ -924,7 +890,7 @@ impl Configuration { /// Commas in list entries can be escaped with `\,`. #[must_use] pub fn get_list(&self, key: &str) -> Option<Vec<String>> { - let value = &self.get_val(key)?.value; + let value = &self.get_val(key, true)?.value; Some(parse_list(value.as_ref())) } @@ -967,15 +933,29 @@ impl Configuration { } } - /// Parse `self` as a [`Schema`]. - pub fn to_schema(&self) -> Result<Schema> { - Schema::try_from(self) + /// Returns an iterator over all keys whose values have not been read. + /// + /// This includes getting them through [`Self::get`], [`Self::get_or_default`], [`Self::get_int`], etc. + /// It also includes getting them through [`Self::get`] called on a section obtained via [`Self::section`]. + /// + /// The order of the items returned is arbitrary and may change in future versions without notice. + /// + /// Beware of race conditions when using this function in a multithreaded program + /// (you should wait for all threads to finish reading the configuration before calling this). + pub fn unread_keys(&self) -> impl '_ + Iterator<Item = &str> { + self.items.iter().filter_map(|(k, v)| { + if !v.read.load(Ordering::Relaxed) { + Some(k.as_ref()) + } else { + None + } + }) } } /// Opaque type returned by [`Configuration::iter`]. #[derive(Clone, Debug)] -pub struct ConfigurationIter<'a>(core::slice::Iter<'a, (Box<str>, Value)>); +pub struct ConfigurationIter<'a>(core::slice::Iter<'a, (Box<str>, Arc<Value>)>); impl<'a> Iterator for ConfigurationIter<'a> { type Item = (&'a str, &'a str); diff --git a/src/schema.rs b/src/schema.rs deleted file mode 100644 index ba7c440..0000000 --- a/src/schema.rs +++ /dev/null @@ -1,396 +0,0 @@ -use crate::{Arc, Box, Configuration, Error, Location, Result, Vec, vec}; -use core::num::NonZeroU32; -use core::ops::Range; - -/// Like `Option<f64>` but uses NaN as its "`None`" (to save memory). -/// -/// hopefully this will be added to rust std eventually… -#[derive(Clone, Copy)] -struct OptionF64(f64); - -impl OptionF64 { - fn is_some(self) -> bool { - !self.0.is_nan() - } - fn or(self, other: OptionF64) -> Self { - if self.is_some() { self } else { other } - } -} - -impl Default for OptionF64 { - fn default() -> Self { - Self(f64::NAN) - } -} - -impl core::fmt::Debug for OptionF64 { - fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - write!(f, "{:?}", Option::<f64>::from(*self)) - } -} - -impl From<OptionF64> for Option<f64> { - fn from(value: OptionF64) -> Self { - if value.0.is_nan() { - None - } else { - Some(value.0) - } - } -} -impl From<Option<f64>> for OptionF64 { - fn from(value: Option<f64>) -> Self { - match value { - Some(x) if x.is_nan() => { - // just panic-- this isn't a public API - panic!("OptionF64 can't contain NaN") - } - Some(x) => Self(x), - None => Self::default(), - } - } -} - -#[derive(Clone, Debug)] -#[allow(dead_code)] // TODO -enum Type { - Any, - None, - Bool, - UInt, - Int, - Float, - Literal(Arc<str>), - Optional(Arc<Type>), - List(Arc<Type>), - Union(Arc<[Type]>), -} - -impl Type { - fn parse(location: &Location, s: &str) -> Result<Self> { - let s = s.trim_matches(['\t', ' ']); - match s { - "Int" => return Ok(Self::Int), - "None" => return Ok(Self::None), - "Float" => return Ok(Self::Float), - "UInt" => return Ok(Self::UInt), - "Any" | "String" => return Ok(Self::Any), - "Bool" => return Ok(Self::Bool), - "Empty" => return Ok(Self::Literal("".into())), - _ => {} - } - let mut in_string = false; - let mut brackets = 0isize; - let mut ors = vec![]; - for (i, c) in s.char_indices() { - if c == '[' { - brackets += 1; - } - if c == ']' { - brackets -= 1; - } - if c == '\'' { - in_string = !in_string; - } - if c == '|' && brackets == 0 { - ors.push(i + 1); - } - } - if ors.len() > 1 { - ors.push(s.len() + 1); - let mut subtypes: Vec<Type> = Vec::with_capacity(ors.len() - 1); - for w in ors.windows(2) { - let [start, end] = w else { unreachable!() }; - subtypes.push(Self::parse(location, &s[*start..*end - 1])?); - } - return Ok(Self::Union(subtypes.into())); - } - if let Some(lit) = s.strip_prefix('\'').and_then(|s| s.strip_suffix('\'')) { - return Ok(Self::Literal(lit.into())); - } - if let Some(opt) = s - .strip_prefix("Optional[") - .and_then(|s| s.strip_suffix(']')) - { - return Ok(Self::Optional(Arc::new(Self::parse(location, opt)?))); - } - if let Some(of) = s.strip_prefix("List[").and_then(|s| s.strip_suffix(']')) { - return Ok(Self::Optional(Arc::new(Self::parse(location, of)?))); - } - Err(Error::SchemaBadType(location.clone(), s.into())) - } -} - -#[derive(Debug)] -enum Rule { - Type(Type), - MaxLength(NonZeroU32), - Min(f64), - Max(f64), - Default(Arc<str>), - AllowUnknown(bool), -} - -impl Rule { - fn parse(location: &Location, key: &str, val: &str) -> Result<Rule> { - Ok(if key.ends_with(".type") { - let r#type = Type::parse(location, val)?; - Rule::Type(r#type) - } else if key.ends_with(".min") { - let max = crate::parse_float(location, val)?; - Rule::Min(max) - } else if key.ends_with(".max") { - let max = crate::parse_float(location, val)?; - Rule::Max(max) - } else if key.ends_with(".maxlength") { - let max = val - .parse::<NonZeroU32>() - .map_err(|_| Error::SchemaBadMaxLength(location.clone(), val.into()))?; - if max.get() > 0x7fff_ffff { - return Err(Error::SchemaBadMaxLength(location.clone(), val.into())); - } - Rule::MaxLength(max) - } else if key.ends_with(".default") { - Rule::Default(val.into()) - } else if key.ends_with(".allow_unknown") { - Rule::AllowUnknown(crate::parse_bool(location, val)?) - } else { - return Err(Error::SchemaBadKey(location.clone(), key.into())); - }) - } -} - -#[derive(Debug, Clone)] -struct RuleSet { - location: Location, - r#type: Option<Type>, - maxlength: Option<NonZeroU32>, - min: OptionF64, - max: OptionF64, - default: Option<Arc<str>>, - allow_unknown: Option<bool>, -} - -impl RuleSet { - fn new(location: Location) -> Self { - Self { - location, - r#type: None, - maxlength: None, - min: OptionF64::default(), - max: OptionF64::default(), - default: None, - allow_unknown: None, - } - } -} - -impl RuleSet { - fn add_rule(&mut self, rule: Rule) { - match rule { - Rule::Type(t) => self.r#type = Some(t), - Rule::MaxLength(m) => self.maxlength = Some(m), - Rule::Default(d) => self.default = Some(d), - Rule::AllowUnknown(a) => self.allow_unknown = Some(a), - Rule::Min(m) => self.min = OptionF64(m), - Rule::Max(m) => self.max = OptionF64(m), - } - } - // merge rule sets, giving `self` precedence - fn fall_back_on(&mut self, other: &RuleSet) { - let RuleSet { - location, - allow_unknown, - default, - r#type, - maxlength, - min, - max, - } = other; - if self.location.is_dummy() { - self.location = location.clone(); - } - self.allow_unknown = self.allow_unknown.or(*allow_unknown); - if self.r#type.is_none() { - self.r#type = r#type.clone(); - } - self.min = self.min.or(*min); - self.max = self.max.or(*max); - self.maxlength = self.maxlength.or(*maxlength); - if self.default.is_none() { - self.default = default.clone(); - } - } -} - -/// A POM schema. -/// -/// See the [POM specification](https://www.pom.computer/spec.html) for a description -/// of schemas. -#[allow(dead_code)] // TODO -#[derive(Default)] -pub struct Schema { - rules: Vec<(Box<str>, RuleSet)>, -} - -impl Schema { - /// Load a schema from a file path. - #[cfg(feature = "std")] - pub fn load_path<P: AsRef<std::path::Path>>(path: P) -> Result<Self> { - Self::try_from(&Configuration::load_path(path)?) - } - - /// Load a schema. - /// - /// See [`Configuration::load`] for more information. - pub fn load<R: crate::Read>(filename: &str, reader: R) -> Result<Self> { - Self::try_from(&Configuration::load(filename, reader)?) - } - - fn binary_search_range( - &self, - range: Range<usize>, - key: &str, - ) -> core::result::Result<usize, usize> { - self.rules[range.clone()] - .binary_search_by(|(k, _)| k.as_ref().cmp(key)) - .map(|i| i + range.start) - .map_err(|i| i + range.start) - } - fn get_rule_set_(&self, prefix: &str, key: &str, range: Range<usize>) -> RuleSet { - // definitely overwrought. but it was fun to write. - if range.is_empty() { - return RuleSet::new(Location::dummy()); - } - if let Some((first_component, rest)) = key.split_once('.') { - let exact_key_dot = format!("{prefix}{first_component}."); - // NB: / is the first ASCII character after . - let exact_key_slash = format!("{prefix}{first_component}/"); - let star_key_dot = format!("{prefix}*."); - let star_key_slash = format!("{prefix}*/"); - let exact_keys_start = self - .binary_search_range(range.clone(), &exact_key_dot) - .expect_err("key should not end in ."); - let exact_keys_end = self - .binary_search_range(range.clone(), &exact_key_slash) - .unwrap_or_else(|x| x); - let mut exact_rule_set = - self.get_rule_set_(&exact_key_dot, rest, exact_keys_start..exact_keys_end); - let star_keys_start = self - .binary_search_range(range.clone(), &star_key_dot) - .expect_err("key should not end in ."); - let star_keys_end = self - .binary_search_range(range.clone(), &star_key_slash) - .unwrap_or_else(|x| x); - let star_rule_set = - self.get_rule_set_(&star_key_dot, rest, star_keys_start..star_keys_end); - exact_rule_set.fall_back_on(&star_rule_set); - exact_rule_set - } else { - let mut rule_set = RuleSet::new(Location::dummy()); - if let Ok(exact_key_rule_idx) = - self.binary_search_range(range.clone(), &format!("{prefix}{key}")) - { - rule_set = self.rules[exact_key_rule_idx].1.clone(); - } - if let Ok(star_key_rule_idx) = - self.binary_search_range(range.clone(), &format!("{prefix}*")) - { - rule_set.fall_back_on(&self.rules[star_key_rule_idx].1); - } - rule_set - } - } - fn get_rule_set(&self, key: &str) -> RuleSet { - self.get_rule_set_("", key, 0..self.rules.len()) - } - /// Check that `conf` follows this schema, and fill in default values. - pub fn check_and_fill_defaults(&self, conf: &mut Configuration) -> Result<()> { - let mut errors = vec![]; - for (key, val) in conf.iter() { - let location = || conf.location(key).unwrap(); - let rule_set = self.get_rule_set(key); - if (rule_set.min.is_some() || rule_set.max.is_some()) - && let Ok(val) = crate::parse_float(&Location::dummy(), val) - { - if let Some(min) = rule_set.min.into() - && val < min - { - errors.push(Error::SchemaValueLessThanMin( - location(), - key.into(), - val, - min, - )) - } - if let Some(max) = rule_set.max.into() - && val > max - { - errors.push(Error::SchemaValueGreaterThanMax( - location(), - key.into(), - val, - max, - )); - } - } - if let Some(max_length) = rule_set - .maxlength - .and_then(|x| usize::try_from(x.get()).ok()) - { - if val.len() > max_length { - errors.push(Error::SchemaValueTooLong( - location(), - key.into(), - val.len(), - max_length, - )); - } - } - // TODO: type, allow_unknown - } - // TODO: replace default values, report missing values - crate::check_error_vec(errors) - } -} - -impl TryFrom<&Configuration> for Schema { - type Error = crate::Error; - fn try_from(conf: &Configuration) -> Result<Self> { - let mut errors = vec![]; - let mut rules: Vec<(&str, Location, Rule)> = vec![]; - for (key, val) in conf.iter_sorted() { - let location = conf.location(key).unwrap(); - let rule = match Rule::parse(&location, key, val) { - Ok(r) => r, - Err(e) => { - errors.push(e); - continue; - } - }; - let Some((affected_key, _rule_name)) = key.rsplit_once('.') else { - unreachable!("Rule::parse shouldn't accept this key"); - }; - rules.push((affected_key, location, rule)); - } - let mut rule_sets: Vec<(Box<str>, RuleSet)> = vec![]; - for (key, location, rule) in rules { - if rule_sets.last().is_none_or(|(k, _)| k.as_ref() != key) { - // add new rule set - rule_sets.push((key.into(), RuleSet::new(location))); - } - rule_sets.last_mut().unwrap().1.add_rule(rule); - } - for (_, rule_set) in &rule_sets { - if let Err(e) = rule_set.check_consistency() { - errors.push(e); - } - } - if errors.len() == 1 { - return Err(errors.pop().unwrap()); - } else if !errors.is_empty() { - return Err(Error::Multiple(errors.into())); - } - Ok(Schema { rules: rule_sets }) - } -} diff --git a/src/tests/mod.rs b/src/tests/mod.rs index 6c40f6f..b7f9d3a 100644 --- a/src/tests/mod.rs +++ b/src/tests/mod.rs @@ -70,4 +70,23 @@ bar.y = 6 let mut keys: Vec<_> = conf.keys().collect(); keys.sort(); assert_eq!(keys, ["a", "foo"]); + + let conf = Configuration::load( + "<test configuration 2>", + " +x = 5 +x.y = 6 +x.y.z = 7 +foo = 12 +bar = 16 +" + .as_bytes(), + ) + .unwrap(); + conf.get_int("foo").unwrap().unwrap(); + conf.get_int("x").unwrap().unwrap(); + conf.section("x.y").get_int("z").unwrap().unwrap(); + let mut unread: Vec<&str> = conf.unread_keys().collect(); + unread.sort(); + assert_eq!(unread, ["bar", "x.y"]); } |