diff options
author | pommicket <pommicket@gmail.com> | 2025-09-10 02:14:54 -0400 |
---|---|---|
committer | pommicket <pommicket@gmail.com> | 2025-09-10 02:14:54 -0400 |
commit | ed0182736a20e0987c6dc9c5e086a30fd1b02f8b (patch) | |
tree | 4113e76e73e60fe43d72c707bcbbdb47a1ced0f7 /src | |
parent | 8439ed183206659ad581caf525e078c6bb2c6a64 (diff) |
More schemas, but probably going to get rid of it
Diffstat (limited to 'src')
-rw-r--r-- | src/lib.rs | 55 | ||||
-rw-r--r-- | src/schema.rs | 332 |
2 files changed, 290 insertions, 97 deletions
@@ -38,6 +38,19 @@ impl Location { pub fn line(&self) -> u64 { self.line } + + /// Dummy location for internal use + fn dummy() -> Self { + Self { + file: Arc::from(""), + line: 0, + } + } + + fn is_dummy(&self) -> bool { + // we never use line number of 0 ordinarily + self.line == 0 + } } impl fmt::Display for Location { @@ -127,6 +140,12 @@ pub enum Error { SchemaBadMaxLength(Location, Box<str>), /// Invalid schema key SchemaBadKey(Location, Box<str>), + /// Value is less than the schema-imposed minimum + SchemaValueLessThanMin(Location, Box<str>, f64, f64), + /// Value is greater than the schema-imposed maximum + SchemaValueGreaterThanMax(Location, Box<str>, f64, f64), + /// Value is greater than the schema-imposed maxlength + SchemaValueTooLong(Location, Box<str>, usize, usize), } impl fmt::Display for Error { @@ -188,6 +207,18 @@ impl fmt::Display for Error { Self::SchemaBadType(l, t) => write!(f, "{l}: invalid type: {t:?}"), Self::SchemaBadMaxLength(l, m) => write!(f, "{l}: invalid maxlength: {m:?}"), Self::SchemaBadKey(l, k) => write!(f, "{l}: invalid schema key: {k}"), + Self::SchemaValueLessThanMin(l, key, val, min) => write!( + f, + "{l}: {key}'s value of {val} is less than the minimum ({min})" + ), + Self::SchemaValueGreaterThanMax(l, key, val, max) => write!( + f, + "{l}: {key}'s value of {val} is greater than the maximum ({max})" + ), + Self::SchemaValueTooLong(l, key, len, maxlen) => write!( + f, + "{l}: {key}'s value has length {len}, which exceeds the maximum of {maxlen}" + ), } } } @@ -417,6 +448,16 @@ fn parse_hex_digit(c: char) -> Option<u32> { }) } +/// Returns `Ok(())` if `errors` is empty, otherwise a compound error +/// containing all the `errors`. +fn check_error_vec(mut errors: Vec<Error>) -> Result<()> { + match errors.len() { + 0 => Ok(()), + 1 => Err(errors.pop().unwrap()), + _ => Err(Error::Multiple(errors.into())), + } +} + #[derive(Default)] struct Parser { nonfatal_errors: Vec<Error>, @@ -664,11 +705,8 @@ impl Parser { )))); } } - match self.nonfatal_errors.len() { - 0 => Ok(Configuration { items }), - 1 => Err(self.nonfatal_errors.pop().unwrap()), - 2.. => Err(Error::Multiple(take(&mut self.nonfatal_errors).into())), - } + check_error_vec(take(&mut self.nonfatal_errors))?; + Ok(Configuration { items }) } } @@ -933,13 +971,6 @@ impl Configuration { pub fn to_schema(&self) -> Result<Schema> { Schema::try_from(self) } - - /// Check that `self` follows the given schema. - /// - /// Equivalent to `schema.check(self)`. - pub fn check_against(&self, schema: &Schema) -> Result<()> { - schema.check(self) - } } /// Opaque type returned by [`Configuration::iter`]. diff --git a/src/schema.rs b/src/schema.rs index 2b17c7c..ba7c440 100644 --- a/src/schema.rs +++ b/src/schema.rs @@ -1,5 +1,6 @@ -use crate::{Box, Configuration, Error, Location, Result, Vec, vec}; +use crate::{Arc, Box, Configuration, Error, Location, Result, Vec, vec}; use core::num::NonZeroU32; +use core::ops::Range; /// Like `Option<f64>` but uses NaN as its "`None`" (to save memory). /// @@ -8,12 +9,17 @@ use core::num::NonZeroU32; struct OptionF64(f64); impl OptionF64 { - const NONE: Self = Self(f64::NAN); + fn is_some(self) -> bool { + !self.0.is_nan() + } + fn or(self, other: OptionF64) -> Self { + if self.is_some() { self } else { other } + } } impl Default for OptionF64 { fn default() -> Self { - Self::NONE + Self(f64::NAN) } } @@ -44,76 +50,8 @@ impl From<Option<f64>> for OptionF64 { } } } -impl From<f64> for OptionF64 { - fn from(value: f64) -> Self { - // just panic-- this isn't a public API - assert!(!value.is_nan(), "OptionF64 can't contain NaN"); - Self(value) - } -} - -#[derive(Default)] -struct RuleSet { - r#type: Option<Type>, - maxlength: Option<NonZeroU32>, - min: OptionF64, - max: OptionF64, - default: Option<Box<str>>, - allow_unknown: Option<bool>, -} - -#[allow(dead_code)] // TODO -enum Rule { - Type(Type), - MaxLength(NonZeroU32), - Min(f64), - Max(f64), - Default(Box<str>), - AllowUnknown(bool), -} - -impl Rule { - fn parse(location: &Location, key: &str, val: &str) -> Result<Rule> { - Ok(if key.ends_with(".type") { - let r#type = Type::parse(location, val)?; - Rule::Type(r#type) - } else if key.ends_with(".min") { - let max = crate::parse_float(location, val)?; - Rule::Min(max) - } else if key.ends_with(".max") { - let max = crate::parse_float(location, val)?; - Rule::Max(max) - } else if key.ends_with(".maxlength") { - let max = val - .parse::<NonZeroU32>() - .map_err(|_| Error::SchemaBadMaxLength(location.clone(), val.into()))?; - if max.get() > 0x7fff_ffff { - return Err(Error::SchemaBadMaxLength(location.clone(), val.into())); - } - Rule::MaxLength(max) - } else if key.ends_with(".default") { - Rule::Default(val.into()) - } else if key.ends_with(".allow_unknown") { - Rule::AllowUnknown(crate::parse_bool(location, val)?) - } else { - return Err(Error::SchemaBadKey(location.clone(), key.into())); - }) - } -} - -impl RuleSet { - fn add_rule(&mut self, rule: Rule) { - match rule { - Rule::Type(t) => self.r#type = Some(t), - Rule::MaxLength(m) => self.maxlength = Some(m), - Rule::Default(d) => self.default = Some(d), - Rule::AllowUnknown(a) => self.allow_unknown = Some(a), - Rule::Min(m) => self.min = m.into(), - Rule::Max(m) => self.max = m.into(), - } - } -} +#[derive(Clone, Debug)] #[allow(dead_code)] // TODO enum Type { Any, @@ -122,10 +60,10 @@ enum Type { UInt, Int, Float, - Literal(Box<str>), - Optional(Box<Type>), - List(Box<Type>), - Union(Box<[Type]>), + Literal(Arc<str>), + Optional(Arc<Type>), + List(Arc<Type>), + Union(Arc<[Type]>), } impl Type { @@ -174,15 +112,117 @@ impl Type { .strip_prefix("Optional[") .and_then(|s| s.strip_suffix(']')) { - return Ok(Self::Optional(Box::new(Self::parse(location, opt)?))); + return Ok(Self::Optional(Arc::new(Self::parse(location, opt)?))); } if let Some(of) = s.strip_prefix("List[").and_then(|s| s.strip_suffix(']')) { - return Ok(Self::Optional(Box::new(Self::parse(location, of)?))); + return Ok(Self::Optional(Arc::new(Self::parse(location, of)?))); } Err(Error::SchemaBadType(location.clone(), s.into())) } } +#[derive(Debug)] +enum Rule { + Type(Type), + MaxLength(NonZeroU32), + Min(f64), + Max(f64), + Default(Arc<str>), + AllowUnknown(bool), +} + +impl Rule { + fn parse(location: &Location, key: &str, val: &str) -> Result<Rule> { + Ok(if key.ends_with(".type") { + let r#type = Type::parse(location, val)?; + Rule::Type(r#type) + } else if key.ends_with(".min") { + let max = crate::parse_float(location, val)?; + Rule::Min(max) + } else if key.ends_with(".max") { + let max = crate::parse_float(location, val)?; + Rule::Max(max) + } else if key.ends_with(".maxlength") { + let max = val + .parse::<NonZeroU32>() + .map_err(|_| Error::SchemaBadMaxLength(location.clone(), val.into()))?; + if max.get() > 0x7fff_ffff { + return Err(Error::SchemaBadMaxLength(location.clone(), val.into())); + } + Rule::MaxLength(max) + } else if key.ends_with(".default") { + Rule::Default(val.into()) + } else if key.ends_with(".allow_unknown") { + Rule::AllowUnknown(crate::parse_bool(location, val)?) + } else { + return Err(Error::SchemaBadKey(location.clone(), key.into())); + }) + } +} + +#[derive(Debug, Clone)] +struct RuleSet { + location: Location, + r#type: Option<Type>, + maxlength: Option<NonZeroU32>, + min: OptionF64, + max: OptionF64, + default: Option<Arc<str>>, + allow_unknown: Option<bool>, +} + +impl RuleSet { + fn new(location: Location) -> Self { + Self { + location, + r#type: None, + maxlength: None, + min: OptionF64::default(), + max: OptionF64::default(), + default: None, + allow_unknown: None, + } + } +} + +impl RuleSet { + fn add_rule(&mut self, rule: Rule) { + match rule { + Rule::Type(t) => self.r#type = Some(t), + Rule::MaxLength(m) => self.maxlength = Some(m), + Rule::Default(d) => self.default = Some(d), + Rule::AllowUnknown(a) => self.allow_unknown = Some(a), + Rule::Min(m) => self.min = OptionF64(m), + Rule::Max(m) => self.max = OptionF64(m), + } + } + // merge rule sets, giving `self` precedence + fn fall_back_on(&mut self, other: &RuleSet) { + let RuleSet { + location, + allow_unknown, + default, + r#type, + maxlength, + min, + max, + } = other; + if self.location.is_dummy() { + self.location = location.clone(); + } + self.allow_unknown = self.allow_unknown.or(*allow_unknown); + if self.r#type.is_none() { + self.r#type = r#type.clone(); + } + self.min = self.min.or(*min); + self.max = self.max.or(*max); + self.maxlength = self.maxlength.or(*maxlength); + if self.default.is_none() { + self.default = default.clone(); + } + } +} + /// A POM schema. /// /// See the [POM specification](https://www.pom.computer/spec.html) for a description @@ -194,9 +234,123 @@ pub struct Schema { } impl Schema { - /// Check that `conf` follows this schema, returning an appropriate `Err` if not. - pub fn check(&self, _conf: &Configuration) -> Result<()> { - todo!() + /// Load a schema from a file path. + #[cfg(feature = "std")] + pub fn load_path<P: AsRef<std::path::Path>>(path: P) -> Result<Self> { + Self::try_from(&Configuration::load_path(path)?) + } + + /// Load a schema. + /// + /// See [`Configuration::load`] for more information. + pub fn load<R: crate::Read>(filename: &str, reader: R) -> Result<Self> { + Self::try_from(&Configuration::load(filename, reader)?) + } + + fn binary_search_range( + &self, + range: Range<usize>, + key: &str, + ) -> core::result::Result<usize, usize> { + self.rules[range.clone()] + .binary_search_by(|(k, _)| k.as_ref().cmp(key)) + .map(|i| i + range.start) + .map_err(|i| i + range.start) + } + fn get_rule_set_(&self, prefix: &str, key: &str, range: Range<usize>) -> RuleSet { + // definitely overwrought. but it was fun to write. + if range.is_empty() { + return RuleSet::new(Location::dummy()); + } + if let Some((first_component, rest)) = key.split_once('.') { + let exact_key_dot = format!("{prefix}{first_component}."); + // NB: / is the first ASCII character after . + let exact_key_slash = format!("{prefix}{first_component}/"); + let star_key_dot = format!("{prefix}*."); + let star_key_slash = format!("{prefix}*/"); + let exact_keys_start = self + .binary_search_range(range.clone(), &exact_key_dot) + .expect_err("key should not end in ."); + let exact_keys_end = self + .binary_search_range(range.clone(), &exact_key_slash) + .unwrap_or_else(|x| x); + let mut exact_rule_set = + self.get_rule_set_(&exact_key_dot, rest, exact_keys_start..exact_keys_end); + let star_keys_start = self + .binary_search_range(range.clone(), &star_key_dot) + .expect_err("key should not end in ."); + let star_keys_end = self + .binary_search_range(range.clone(), &star_key_slash) + .unwrap_or_else(|x| x); + let star_rule_set = + self.get_rule_set_(&star_key_dot, rest, star_keys_start..star_keys_end); + exact_rule_set.fall_back_on(&star_rule_set); + exact_rule_set + } else { + let mut rule_set = RuleSet::new(Location::dummy()); + if let Ok(exact_key_rule_idx) = + self.binary_search_range(range.clone(), &format!("{prefix}{key}")) + { + rule_set = self.rules[exact_key_rule_idx].1.clone(); + } + if let Ok(star_key_rule_idx) = + self.binary_search_range(range.clone(), &format!("{prefix}*")) + { + rule_set.fall_back_on(&self.rules[star_key_rule_idx].1); + } + rule_set + } + } + fn get_rule_set(&self, key: &str) -> RuleSet { + self.get_rule_set_("", key, 0..self.rules.len()) + } + /// Check that `conf` follows this schema, and fill in default values. + pub fn check_and_fill_defaults(&self, conf: &mut Configuration) -> Result<()> { + let mut errors = vec![]; + for (key, val) in conf.iter() { + let location = || conf.location(key).unwrap(); + let rule_set = self.get_rule_set(key); + if (rule_set.min.is_some() || rule_set.max.is_some()) + && let Ok(val) = crate::parse_float(&Location::dummy(), val) + { + if let Some(min) = rule_set.min.into() + && val < min + { + errors.push(Error::SchemaValueLessThanMin( + location(), + key.into(), + val, + min, + )) + } + if let Some(max) = rule_set.max.into() + && val > max + { + errors.push(Error::SchemaValueGreaterThanMax( + location(), + key.into(), + val, + max, + )); + } + } + if let Some(max_length) = rule_set + .maxlength + .and_then(|x| usize::try_from(x.get()).ok()) + { + if val.len() > max_length { + errors.push(Error::SchemaValueTooLong( + location(), + key.into(), + val.len(), + max_length, + )); + } + } + // TODO: type, allow_unknown + } + // TODO: replace default values, report missing values + crate::check_error_vec(errors) } } @@ -204,7 +358,7 @@ impl TryFrom<&Configuration> for Schema { type Error = crate::Error; fn try_from(conf: &Configuration) -> Result<Self> { let mut errors = vec![]; - let mut rules: Vec<(&str, Rule)> = vec![]; + let mut rules: Vec<(&str, Location, Rule)> = vec![]; for (key, val) in conf.iter_sorted() { let location = conf.location(key).unwrap(); let rule = match Rule::parse(&location, key, val) { @@ -214,16 +368,24 @@ impl TryFrom<&Configuration> for Schema { continue; } }; - rules.push((key, rule)); + let Some((affected_key, _rule_name)) = key.rsplit_once('.') else { + unreachable!("Rule::parse shouldn't accept this key"); + }; + rules.push((affected_key, location, rule)); } let mut rule_sets: Vec<(Box<str>, RuleSet)> = vec![]; - for (key, rule) in rules { + for (key, location, rule) in rules { if rule_sets.last().is_none_or(|(k, _)| k.as_ref() != key) { // add new rule set - rule_sets.push((key.into(), RuleSet::default())); + rule_sets.push((key.into(), RuleSet::new(location))); } rule_sets.last_mut().unwrap().1.add_rule(rule); } + for (_, rule_set) in &rule_sets { + if let Err(e) = rule_set.check_consistency() { + errors.push(e); + } + } if errors.len() == 1 { return Err(errors.pop().unwrap()); } else if !errors.is_empty() { |