summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorpommicket <pommicket@gmail.com>2025-09-10 11:57:03 -0400
committerpommicket <pommicket@gmail.com>2025-09-10 11:57:03 -0400
commitf1767908837d0e6a0da2aa73009ce11de6fb359d (patch)
tree8cb03ac742d3e7d2f36ccf38a319b5bc13cf4685
parented0182736a20e0987c6dc9c5e086a30fd1b02f8b (diff)
Remove schemas, add unread_keys
-rw-r--r--src/lib.rs122
-rw-r--r--src/schema.rs396
-rw-r--r--src/tests/mod.rs19
3 files changed, 70 insertions, 467 deletions
diff --git a/src/lib.rs b/src/lib.rs
index 0763fa7..33d1d2d 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -13,9 +13,8 @@ use alloc::vec::Vec;
use alloc::{format, vec};
use core::fmt;
use core::mem::take;
+use core::sync::atomic::{AtomicBool, Ordering};
-mod schema;
-pub use schema::Schema;
#[cfg(test)]
mod tests;
@@ -38,19 +37,6 @@ impl Location {
pub fn line(&self) -> u64 {
self.line
}
-
- /// Dummy location for internal use
- fn dummy() -> Self {
- Self {
- file: Arc::from(""),
- line: 0,
- }
- }
-
- fn is_dummy(&self) -> bool {
- // we never use line number of 0 ordinarily
- self.line == 0
- }
}
impl fmt::Display for Location {
@@ -60,17 +46,18 @@ impl fmt::Display for Location {
}
/// A string value, together with location information about where it is defined.
-#[derive(Clone, Debug)]
+#[derive(Debug)]
struct Value {
value: Box<str>,
defined_at: Location,
+ read: AtomicBool,
}
/// A parsed POM configuration.
#[derive(Clone, Debug, Default)]
pub struct Configuration {
/// List of items in configuration, sorted by key.
- items: Vec<(Box<str>, Value)>,
+ items: Vec<(Box<str>, Arc<Value>)>,
}
impl fmt::Display for Configuration {
@@ -82,7 +69,7 @@ impl fmt::Display for Configuration {
}
}
-/// A parsing or schema error.
+/// A parsing error.
#[non_exhaustive]
#[derive(Debug)]
pub enum Error {
@@ -134,18 +121,6 @@ pub enum Error {
/// None of the errors in the array will be [`Error::Multiple`]'s,
/// and the array will contain at least two elements.
Multiple(Box<[Error]>),
- /// Bad type in schema
- SchemaBadType(Location, Box<str>),
- /// Bad maxlength in schema
- SchemaBadMaxLength(Location, Box<str>),
- /// Invalid schema key
- SchemaBadKey(Location, Box<str>),
- /// Value is less than the schema-imposed minimum
- SchemaValueLessThanMin(Location, Box<str>, f64, f64),
- /// Value is greater than the schema-imposed maximum
- SchemaValueGreaterThanMax(Location, Box<str>, f64, f64),
- /// Value is greater than the schema-imposed maxlength
- SchemaValueTooLong(Location, Box<str>, usize, usize),
}
impl fmt::Display for Error {
@@ -204,21 +179,6 @@ impl fmt::Display for Error {
}
Ok(())
}
- Self::SchemaBadType(l, t) => write!(f, "{l}: invalid type: {t:?}"),
- Self::SchemaBadMaxLength(l, m) => write!(f, "{l}: invalid maxlength: {m:?}"),
- Self::SchemaBadKey(l, k) => write!(f, "{l}: invalid schema key: {k}"),
- Self::SchemaValueLessThanMin(l, key, val, min) => write!(
- f,
- "{l}: {key}'s value of {val} is less than the minimum ({min})"
- ),
- Self::SchemaValueGreaterThanMax(l, key, val, max) => write!(
- f,
- "{l}: {key}'s value of {val} is greater than the maximum ({max})"
- ),
- Self::SchemaValueTooLong(l, key, len, maxlen) => write!(
- f,
- "{l}: {key}'s value has length {len}, which exceeds the maximum of {maxlen}"
- ),
}
}
}
@@ -621,7 +581,7 @@ impl Parser {
}
fn load(&mut self, filename: &str, reader: &mut dyn Read) -> Result<Configuration> {
- let mut items: Vec<(Box<str>, Value)> = vec![];
+ let mut items: Vec<(Box<str>, Arc<Value>)> = vec![];
let mut line: Vec<u8> = vec![];
let mut line_number: u64 = 0;
let mut current_section = String::new();
@@ -673,10 +633,11 @@ impl Parser {
self.read_quoted_value(value, reader, &location)?;
items.push((
key.into(),
- Value {
+ Arc::new(Value {
value: value.into(),
defined_at: location,
- },
+ read: AtomicBool::new(false),
+ }),
));
line_number = new_line_number;
} else {
@@ -686,10 +647,11 @@ impl Parser {
}
items.push((
key.into(),
- Value {
+ Arc::new(Value {
value: value.into(),
defined_at: location,
- },
+ read: AtomicBool::new(false),
+ }),
));
}
}
@@ -801,29 +763,25 @@ impl Configuration {
self.into_iter()
}
- /// Same as `iter()` (for now), but explicitly marks that keys should be in sorted order.
- ///
- /// This is used internally so that we know what we have to fix if `iter()`
- /// is ever changed to return a non-sorted iterator.
- fn iter_sorted(&self) -> ConfigurationIter<'_> {
- self.iter()
- }
-
- fn get_val(&self, key: &str) -> Option<&Value> {
+ fn get_val(&self, key: &str, mark_read: bool) -> Option<&Value> {
let idx = self.binary_search_for(key).ok()?;
- Some(&self.items[idx].1)
+ let v = &self.items[idx].1;
+ if mark_read {
+ v.read.store(true, Ordering::Relaxed);
+ }
+ Some(v)
}
/// Get value associated with `key`, if any.
#[must_use]
pub fn get(&self, key: &str) -> Option<&str> {
- Some(self.get_val(key)?.value.as_ref())
+ Some(self.get_val(key, true)?.value.as_ref())
}
/// Get location in the configuration file where `key` is defined, if any.
#[must_use]
pub fn location(&self, key: &str) -> Option<Location> {
- if let Some(val) = self.get_val(key) {
+ if let Some(val) = self.get_val(key, false) {
Some(val.defined_at.clone())
} else {
// Check if `key` has any defined subkeys
@@ -855,7 +813,9 @@ impl Configuration {
/// and `Some(Err(…))` if `key` is defined but not an integer.
#[must_use]
pub fn get_int(&self, key: &str) -> Option<Result<i64>> {
- let Value { value, defined_at } = self.get_val(key)?;
+ let Value {
+ value, defined_at, ..
+ } = self.get_val(key, true)?;
Some(parse_int(defined_at, value.as_ref()))
}
@@ -872,7 +832,9 @@ impl Configuration {
/// and `Some(Err(…))` if `key` is defined but not an unsigned integer.
#[must_use]
pub fn get_uint(&self, key: &str) -> Option<Result<u64>> {
- let Value { value, defined_at } = self.get_val(key)?;
+ let Value {
+ value, defined_at, ..
+ } = self.get_val(key, true)?;
Some(parse_uint(defined_at, value.as_ref()))
}
@@ -889,7 +851,9 @@ impl Configuration {
/// and `Some(Err(…))` if `key` is defined but not a float.
#[must_use]
pub fn get_float(&self, key: &str) -> Option<Result<f64>> {
- let Value { value, defined_at } = self.get_val(key)?;
+ let Value {
+ value, defined_at, ..
+ } = self.get_val(key, true)?;
Some(parse_float(defined_at, value.as_ref()))
}
@@ -907,7 +871,9 @@ impl Configuration {
/// `off`, `no`, `false`, `on`, `yes`, `true`.
#[must_use]
pub fn get_bool(&self, key: &str) -> Option<Result<bool>> {
- let Value { value, defined_at } = self.get_val(key)?;
+ let Value {
+ value, defined_at, ..
+ } = self.get_val(key, true)?;
Some(parse_bool(defined_at, value.as_ref()))
}
@@ -924,7 +890,7 @@ impl Configuration {
/// Commas in list entries can be escaped with `\,`.
#[must_use]
pub fn get_list(&self, key: &str) -> Option<Vec<String>> {
- let value = &self.get_val(key)?.value;
+ let value = &self.get_val(key, true)?.value;
Some(parse_list(value.as_ref()))
}
@@ -967,15 +933,29 @@ impl Configuration {
}
}
- /// Parse `self` as a [`Schema`].
- pub fn to_schema(&self) -> Result<Schema> {
- Schema::try_from(self)
+ /// Returns an iterator over all keys whose values have not been read.
+ ///
+ /// This includes getting them through [`Self::get`], [`Self::get_or_default`], [`Self::get_int`], etc.
+ /// It also includes getting them through [`Self::get`] called on a section obtained via [`Self::section`].
+ ///
+ /// The order of the items returned is arbitrary and may change in future versions without notice.
+ ///
+ /// Beware of race conditions when using this function in a multithreaded program
+ /// (you should wait for all threads to finish reading the configuration before calling this).
+ pub fn unread_keys(&self) -> impl '_ + Iterator<Item = &str> {
+ self.items.iter().filter_map(|(k, v)| {
+ if !v.read.load(Ordering::Relaxed) {
+ Some(k.as_ref())
+ } else {
+ None
+ }
+ })
}
}
/// Opaque type returned by [`Configuration::iter`].
#[derive(Clone, Debug)]
-pub struct ConfigurationIter<'a>(core::slice::Iter<'a, (Box<str>, Value)>);
+pub struct ConfigurationIter<'a>(core::slice::Iter<'a, (Box<str>, Arc<Value>)>);
impl<'a> Iterator for ConfigurationIter<'a> {
type Item = (&'a str, &'a str);
diff --git a/src/schema.rs b/src/schema.rs
deleted file mode 100644
index ba7c440..0000000
--- a/src/schema.rs
+++ /dev/null
@@ -1,396 +0,0 @@
-use crate::{Arc, Box, Configuration, Error, Location, Result, Vec, vec};
-use core::num::NonZeroU32;
-use core::ops::Range;
-
-/// Like `Option<f64>` but uses NaN as its "`None`" (to save memory).
-///
-/// hopefully this will be added to rust std eventually…
-#[derive(Clone, Copy)]
-struct OptionF64(f64);
-
-impl OptionF64 {
- fn is_some(self) -> bool {
- !self.0.is_nan()
- }
- fn or(self, other: OptionF64) -> Self {
- if self.is_some() { self } else { other }
- }
-}
-
-impl Default for OptionF64 {
- fn default() -> Self {
- Self(f64::NAN)
- }
-}
-
-impl core::fmt::Debug for OptionF64 {
- fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
- write!(f, "{:?}", Option::<f64>::from(*self))
- }
-}
-
-impl From<OptionF64> for Option<f64> {
- fn from(value: OptionF64) -> Self {
- if value.0.is_nan() {
- None
- } else {
- Some(value.0)
- }
- }
-}
-impl From<Option<f64>> for OptionF64 {
- fn from(value: Option<f64>) -> Self {
- match value {
- Some(x) if x.is_nan() => {
- // just panic-- this isn't a public API
- panic!("OptionF64 can't contain NaN")
- }
- Some(x) => Self(x),
- None => Self::default(),
- }
- }
-}
-
-#[derive(Clone, Debug)]
-#[allow(dead_code)] // TODO
-enum Type {
- Any,
- None,
- Bool,
- UInt,
- Int,
- Float,
- Literal(Arc<str>),
- Optional(Arc<Type>),
- List(Arc<Type>),
- Union(Arc<[Type]>),
-}
-
-impl Type {
- fn parse(location: &Location, s: &str) -> Result<Self> {
- let s = s.trim_matches(['\t', ' ']);
- match s {
- "Int" => return Ok(Self::Int),
- "None" => return Ok(Self::None),
- "Float" => return Ok(Self::Float),
- "UInt" => return Ok(Self::UInt),
- "Any" | "String" => return Ok(Self::Any),
- "Bool" => return Ok(Self::Bool),
- "Empty" => return Ok(Self::Literal("".into())),
- _ => {}
- }
- let mut in_string = false;
- let mut brackets = 0isize;
- let mut ors = vec![];
- for (i, c) in s.char_indices() {
- if c == '[' {
- brackets += 1;
- }
- if c == ']' {
- brackets -= 1;
- }
- if c == '\'' {
- in_string = !in_string;
- }
- if c == '|' && brackets == 0 {
- ors.push(i + 1);
- }
- }
- if ors.len() > 1 {
- ors.push(s.len() + 1);
- let mut subtypes: Vec<Type> = Vec::with_capacity(ors.len() - 1);
- for w in ors.windows(2) {
- let [start, end] = w else { unreachable!() };
- subtypes.push(Self::parse(location, &s[*start..*end - 1])?);
- }
- return Ok(Self::Union(subtypes.into()));
- }
- if let Some(lit) = s.strip_prefix('\'').and_then(|s| s.strip_suffix('\'')) {
- return Ok(Self::Literal(lit.into()));
- }
- if let Some(opt) = s
- .strip_prefix("Optional[")
- .and_then(|s| s.strip_suffix(']'))
- {
- return Ok(Self::Optional(Arc::new(Self::parse(location, opt)?)));
- }
- if let Some(of) = s.strip_prefix("List[").and_then(|s| s.strip_suffix(']')) {
- return Ok(Self::Optional(Arc::new(Self::parse(location, of)?)));
- }
- Err(Error::SchemaBadType(location.clone(), s.into()))
- }
-}
-
-#[derive(Debug)]
-enum Rule {
- Type(Type),
- MaxLength(NonZeroU32),
- Min(f64),
- Max(f64),
- Default(Arc<str>),
- AllowUnknown(bool),
-}
-
-impl Rule {
- fn parse(location: &Location, key: &str, val: &str) -> Result<Rule> {
- Ok(if key.ends_with(".type") {
- let r#type = Type::parse(location, val)?;
- Rule::Type(r#type)
- } else if key.ends_with(".min") {
- let max = crate::parse_float(location, val)?;
- Rule::Min(max)
- } else if key.ends_with(".max") {
- let max = crate::parse_float(location, val)?;
- Rule::Max(max)
- } else if key.ends_with(".maxlength") {
- let max = val
- .parse::<NonZeroU32>()
- .map_err(|_| Error::SchemaBadMaxLength(location.clone(), val.into()))?;
- if max.get() > 0x7fff_ffff {
- return Err(Error::SchemaBadMaxLength(location.clone(), val.into()));
- }
- Rule::MaxLength(max)
- } else if key.ends_with(".default") {
- Rule::Default(val.into())
- } else if key.ends_with(".allow_unknown") {
- Rule::AllowUnknown(crate::parse_bool(location, val)?)
- } else {
- return Err(Error::SchemaBadKey(location.clone(), key.into()));
- })
- }
-}
-
-#[derive(Debug, Clone)]
-struct RuleSet {
- location: Location,
- r#type: Option<Type>,
- maxlength: Option<NonZeroU32>,
- min: OptionF64,
- max: OptionF64,
- default: Option<Arc<str>>,
- allow_unknown: Option<bool>,
-}
-
-impl RuleSet {
- fn new(location: Location) -> Self {
- Self {
- location,
- r#type: None,
- maxlength: None,
- min: OptionF64::default(),
- max: OptionF64::default(),
- default: None,
- allow_unknown: None,
- }
- }
-}
-
-impl RuleSet {
- fn add_rule(&mut self, rule: Rule) {
- match rule {
- Rule::Type(t) => self.r#type = Some(t),
- Rule::MaxLength(m) => self.maxlength = Some(m),
- Rule::Default(d) => self.default = Some(d),
- Rule::AllowUnknown(a) => self.allow_unknown = Some(a),
- Rule::Min(m) => self.min = OptionF64(m),
- Rule::Max(m) => self.max = OptionF64(m),
- }
- }
- // merge rule sets, giving `self` precedence
- fn fall_back_on(&mut self, other: &RuleSet) {
- let RuleSet {
- location,
- allow_unknown,
- default,
- r#type,
- maxlength,
- min,
- max,
- } = other;
- if self.location.is_dummy() {
- self.location = location.clone();
- }
- self.allow_unknown = self.allow_unknown.or(*allow_unknown);
- if self.r#type.is_none() {
- self.r#type = r#type.clone();
- }
- self.min = self.min.or(*min);
- self.max = self.max.or(*max);
- self.maxlength = self.maxlength.or(*maxlength);
- if self.default.is_none() {
- self.default = default.clone();
- }
- }
-}
-
-/// A POM schema.
-///
-/// See the [POM specification](https://www.pom.computer/spec.html) for a description
-/// of schemas.
-#[allow(dead_code)] // TODO
-#[derive(Default)]
-pub struct Schema {
- rules: Vec<(Box<str>, RuleSet)>,
-}
-
-impl Schema {
- /// Load a schema from a file path.
- #[cfg(feature = "std")]
- pub fn load_path<P: AsRef<std::path::Path>>(path: P) -> Result<Self> {
- Self::try_from(&Configuration::load_path(path)?)
- }
-
- /// Load a schema.
- ///
- /// See [`Configuration::load`] for more information.
- pub fn load<R: crate::Read>(filename: &str, reader: R) -> Result<Self> {
- Self::try_from(&Configuration::load(filename, reader)?)
- }
-
- fn binary_search_range(
- &self,
- range: Range<usize>,
- key: &str,
- ) -> core::result::Result<usize, usize> {
- self.rules[range.clone()]
- .binary_search_by(|(k, _)| k.as_ref().cmp(key))
- .map(|i| i + range.start)
- .map_err(|i| i + range.start)
- }
- fn get_rule_set_(&self, prefix: &str, key: &str, range: Range<usize>) -> RuleSet {
- // definitely overwrought. but it was fun to write.
- if range.is_empty() {
- return RuleSet::new(Location::dummy());
- }
- if let Some((first_component, rest)) = key.split_once('.') {
- let exact_key_dot = format!("{prefix}{first_component}.");
- // NB: / is the first ASCII character after .
- let exact_key_slash = format!("{prefix}{first_component}/");
- let star_key_dot = format!("{prefix}*.");
- let star_key_slash = format!("{prefix}*/");
- let exact_keys_start = self
- .binary_search_range(range.clone(), &exact_key_dot)
- .expect_err("key should not end in .");
- let exact_keys_end = self
- .binary_search_range(range.clone(), &exact_key_slash)
- .unwrap_or_else(|x| x);
- let mut exact_rule_set =
- self.get_rule_set_(&exact_key_dot, rest, exact_keys_start..exact_keys_end);
- let star_keys_start = self
- .binary_search_range(range.clone(), &star_key_dot)
- .expect_err("key should not end in .");
- let star_keys_end = self
- .binary_search_range(range.clone(), &star_key_slash)
- .unwrap_or_else(|x| x);
- let star_rule_set =
- self.get_rule_set_(&star_key_dot, rest, star_keys_start..star_keys_end);
- exact_rule_set.fall_back_on(&star_rule_set);
- exact_rule_set
- } else {
- let mut rule_set = RuleSet::new(Location::dummy());
- if let Ok(exact_key_rule_idx) =
- self.binary_search_range(range.clone(), &format!("{prefix}{key}"))
- {
- rule_set = self.rules[exact_key_rule_idx].1.clone();
- }
- if let Ok(star_key_rule_idx) =
- self.binary_search_range(range.clone(), &format!("{prefix}*"))
- {
- rule_set.fall_back_on(&self.rules[star_key_rule_idx].1);
- }
- rule_set
- }
- }
- fn get_rule_set(&self, key: &str) -> RuleSet {
- self.get_rule_set_("", key, 0..self.rules.len())
- }
- /// Check that `conf` follows this schema, and fill in default values.
- pub fn check_and_fill_defaults(&self, conf: &mut Configuration) -> Result<()> {
- let mut errors = vec![];
- for (key, val) in conf.iter() {
- let location = || conf.location(key).unwrap();
- let rule_set = self.get_rule_set(key);
- if (rule_set.min.is_some() || rule_set.max.is_some())
- && let Ok(val) = crate::parse_float(&Location::dummy(), val)
- {
- if let Some(min) = rule_set.min.into()
- && val < min
- {
- errors.push(Error::SchemaValueLessThanMin(
- location(),
- key.into(),
- val,
- min,
- ))
- }
- if let Some(max) = rule_set.max.into()
- && val > max
- {
- errors.push(Error::SchemaValueGreaterThanMax(
- location(),
- key.into(),
- val,
- max,
- ));
- }
- }
- if let Some(max_length) = rule_set
- .maxlength
- .and_then(|x| usize::try_from(x.get()).ok())
- {
- if val.len() > max_length {
- errors.push(Error::SchemaValueTooLong(
- location(),
- key.into(),
- val.len(),
- max_length,
- ));
- }
- }
- // TODO: type, allow_unknown
- }
- // TODO: replace default values, report missing values
- crate::check_error_vec(errors)
- }
-}
-
-impl TryFrom<&Configuration> for Schema {
- type Error = crate::Error;
- fn try_from(conf: &Configuration) -> Result<Self> {
- let mut errors = vec![];
- let mut rules: Vec<(&str, Location, Rule)> = vec![];
- for (key, val) in conf.iter_sorted() {
- let location = conf.location(key).unwrap();
- let rule = match Rule::parse(&location, key, val) {
- Ok(r) => r,
- Err(e) => {
- errors.push(e);
- continue;
- }
- };
- let Some((affected_key, _rule_name)) = key.rsplit_once('.') else {
- unreachable!("Rule::parse shouldn't accept this key");
- };
- rules.push((affected_key, location, rule));
- }
- let mut rule_sets: Vec<(Box<str>, RuleSet)> = vec![];
- for (key, location, rule) in rules {
- if rule_sets.last().is_none_or(|(k, _)| k.as_ref() != key) {
- // add new rule set
- rule_sets.push((key.into(), RuleSet::new(location)));
- }
- rule_sets.last_mut().unwrap().1.add_rule(rule);
- }
- for (_, rule_set) in &rule_sets {
- if let Err(e) = rule_set.check_consistency() {
- errors.push(e);
- }
- }
- if errors.len() == 1 {
- return Err(errors.pop().unwrap());
- } else if !errors.is_empty() {
- return Err(Error::Multiple(errors.into()));
- }
- Ok(Schema { rules: rule_sets })
- }
-}
diff --git a/src/tests/mod.rs b/src/tests/mod.rs
index 6c40f6f..b7f9d3a 100644
--- a/src/tests/mod.rs
+++ b/src/tests/mod.rs
@@ -70,4 +70,23 @@ bar.y = 6
let mut keys: Vec<_> = conf.keys().collect();
keys.sort();
assert_eq!(keys, ["a", "foo"]);
+
+ let conf = Configuration::load(
+ "<test configuration 2>",
+ "
+x = 5
+x.y = 6
+x.y.z = 7
+foo = 12
+bar = 16
+"
+ .as_bytes(),
+ )
+ .unwrap();
+ conf.get_int("foo").unwrap().unwrap();
+ conf.get_int("x").unwrap().unwrap();
+ conf.section("x.y").get_int("z").unwrap().unwrap();
+ let mut unread: Vec<&str> = conf.unread_keys().collect();
+ unread.sort();
+ assert_eq!(unread, ["bar", "x.y"]);
}