diff options
author | pommicket <pommicket@gmail.com> | 2025-09-09 23:34:05 -0400 |
---|---|---|
committer | pommicket <pommicket@gmail.com> | 2025-09-09 23:34:05 -0400 |
commit | 8439ed183206659ad581caf525e078c6bb2c6a64 (patch) | |
tree | 50e803277cb078f9bee2e270bface4054ae70c47 | |
parent | 00ba4ae712a7a372bd937b56053d601ddc6eb3e8 (diff) |
Schema parsing
-rw-r--r-- | src/lib.rs | 32 | ||||
-rw-r--r-- | src/schema.rs | 234 |
2 files changed, 261 insertions, 5 deletions
@@ -14,6 +14,8 @@ use alloc::{format, vec}; use core::fmt; use core::mem::take; +mod schema; +pub use schema::Schema; #[cfg(test)] mod tests; @@ -119,6 +121,12 @@ pub enum Error { /// None of the errors in the array will be [`Error::Multiple`]'s, /// and the array will contain at least two elements. Multiple(Box<[Error]>), + /// Bad type in schema + SchemaBadType(Location, Box<str>), + /// Bad maxlength in schema + SchemaBadMaxLength(Location, Box<str>), + /// Invalid schema key + SchemaBadKey(Location, Box<str>), } impl fmt::Display for Error { @@ -177,6 +185,9 @@ impl fmt::Display for Error { } Ok(()) } + Self::SchemaBadType(l, t) => write!(f, "{l}: invalid type: {t:?}"), + Self::SchemaBadMaxLength(l, m) => write!(f, "{l}: invalid maxlength: {m:?}"), + Self::SchemaBadKey(l, k) => write!(f, "{l}: invalid schema key: {k}"), } } } @@ -752,6 +763,14 @@ impl Configuration { self.into_iter() } + /// Same as `iter()` (for now), but explicitly marks that keys should be in sorted order. + /// + /// This is used internally so that we know what we have to fix if `iter()` + /// is ever changed to return a non-sorted iterator. + fn iter_sorted(&self) -> ConfigurationIter<'_> { + self.iter() + } + fn get_val(&self, key: &str) -> Option<&Value> { let idx = self.binary_search_for(key).ok()?; Some(&self.items[idx].1) @@ -910,13 +929,16 @@ impl Configuration { } } + /// Parse `self` as a [`Schema`]. + pub fn to_schema(&self) -> Result<Schema> { + Schema::try_from(self) + } + /// Check that `self` follows the given schema. /// - /// See the [POM specification](https://www.pom.computer/spec.html) for a description - /// of schemas. - pub fn check_against_schema(&self, schema: &Configuration) -> Result<()> { - _ = schema; - todo!() + /// Equivalent to `schema.check(self)`. + pub fn check_against(&self, schema: &Schema) -> Result<()> { + schema.check(self) } } diff --git a/src/schema.rs b/src/schema.rs new file mode 100644 index 0000000..2b17c7c --- /dev/null +++ b/src/schema.rs @@ -0,0 +1,234 @@ +use crate::{Box, Configuration, Error, Location, Result, Vec, vec}; +use core::num::NonZeroU32; + +/// Like `Option<f64>` but uses NaN as its "`None`" (to save memory). +/// +/// hopefully this will be added to rust std eventually… +#[derive(Clone, Copy)] +struct OptionF64(f64); + +impl OptionF64 { + const NONE: Self = Self(f64::NAN); +} + +impl Default for OptionF64 { + fn default() -> Self { + Self::NONE + } +} + +impl core::fmt::Debug for OptionF64 { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, "{:?}", Option::<f64>::from(*self)) + } +} + +impl From<OptionF64> for Option<f64> { + fn from(value: OptionF64) -> Self { + if value.0.is_nan() { + None + } else { + Some(value.0) + } + } +} +impl From<Option<f64>> for OptionF64 { + fn from(value: Option<f64>) -> Self { + match value { + Some(x) if x.is_nan() => { + // just panic-- this isn't a public API + panic!("OptionF64 can't contain NaN") + } + Some(x) => Self(x), + None => Self::default(), + } + } +} +impl From<f64> for OptionF64 { + fn from(value: f64) -> Self { + // just panic-- this isn't a public API + assert!(!value.is_nan(), "OptionF64 can't contain NaN"); + Self(value) + } +} + +#[derive(Default)] +struct RuleSet { + r#type: Option<Type>, + maxlength: Option<NonZeroU32>, + min: OptionF64, + max: OptionF64, + default: Option<Box<str>>, + allow_unknown: Option<bool>, +} + +#[allow(dead_code)] // TODO +enum Rule { + Type(Type), + MaxLength(NonZeroU32), + Min(f64), + Max(f64), + Default(Box<str>), + AllowUnknown(bool), +} + +impl Rule { + fn parse(location: &Location, key: &str, val: &str) -> Result<Rule> { + Ok(if key.ends_with(".type") { + let r#type = Type::parse(location, val)?; + Rule::Type(r#type) + } else if key.ends_with(".min") { + let max = crate::parse_float(location, val)?; + Rule::Min(max) + } else if key.ends_with(".max") { + let max = crate::parse_float(location, val)?; + Rule::Max(max) + } else if key.ends_with(".maxlength") { + let max = val + .parse::<NonZeroU32>() + .map_err(|_| Error::SchemaBadMaxLength(location.clone(), val.into()))?; + if max.get() > 0x7fff_ffff { + return Err(Error::SchemaBadMaxLength(location.clone(), val.into())); + } + Rule::MaxLength(max) + } else if key.ends_with(".default") { + Rule::Default(val.into()) + } else if key.ends_with(".allow_unknown") { + Rule::AllowUnknown(crate::parse_bool(location, val)?) + } else { + return Err(Error::SchemaBadKey(location.clone(), key.into())); + }) + } +} + +impl RuleSet { + fn add_rule(&mut self, rule: Rule) { + match rule { + Rule::Type(t) => self.r#type = Some(t), + Rule::MaxLength(m) => self.maxlength = Some(m), + Rule::Default(d) => self.default = Some(d), + Rule::AllowUnknown(a) => self.allow_unknown = Some(a), + Rule::Min(m) => self.min = m.into(), + Rule::Max(m) => self.max = m.into(), + } + } +} + +#[allow(dead_code)] // TODO +enum Type { + Any, + None, + Bool, + UInt, + Int, + Float, + Literal(Box<str>), + Optional(Box<Type>), + List(Box<Type>), + Union(Box<[Type]>), +} + +impl Type { + fn parse(location: &Location, s: &str) -> Result<Self> { + let s = s.trim_matches(['\t', ' ']); + match s { + "Int" => return Ok(Self::Int), + "None" => return Ok(Self::None), + "Float" => return Ok(Self::Float), + "UInt" => return Ok(Self::UInt), + "Any" | "String" => return Ok(Self::Any), + "Bool" => return Ok(Self::Bool), + "Empty" => return Ok(Self::Literal("".into())), + _ => {} + } + let mut in_string = false; + let mut brackets = 0isize; + let mut ors = vec![]; + for (i, c) in s.char_indices() { + if c == '[' { + brackets += 1; + } + if c == ']' { + brackets -= 1; + } + if c == '\'' { + in_string = !in_string; + } + if c == '|' && brackets == 0 { + ors.push(i + 1); + } + } + if ors.len() > 1 { + ors.push(s.len() + 1); + let mut subtypes: Vec<Type> = Vec::with_capacity(ors.len() - 1); + for w in ors.windows(2) { + let [start, end] = w else { unreachable!() }; + subtypes.push(Self::parse(location, &s[*start..*end - 1])?); + } + return Ok(Self::Union(subtypes.into())); + } + if let Some(lit) = s.strip_prefix('\'').and_then(|s| s.strip_suffix('\'')) { + return Ok(Self::Literal(lit.into())); + } + if let Some(opt) = s + .strip_prefix("Optional[") + .and_then(|s| s.strip_suffix(']')) + { + return Ok(Self::Optional(Box::new(Self::parse(location, opt)?))); + } + if let Some(of) = s.strip_prefix("List[").and_then(|s| s.strip_suffix(']')) { + return Ok(Self::Optional(Box::new(Self::parse(location, of)?))); + } + Err(Error::SchemaBadType(location.clone(), s.into())) + } +} + +/// A POM schema. +/// +/// See the [POM specification](https://www.pom.computer/spec.html) for a description +/// of schemas. +#[allow(dead_code)] // TODO +#[derive(Default)] +pub struct Schema { + rules: Vec<(Box<str>, RuleSet)>, +} + +impl Schema { + /// Check that `conf` follows this schema, returning an appropriate `Err` if not. + pub fn check(&self, _conf: &Configuration) -> Result<()> { + todo!() + } +} + +impl TryFrom<&Configuration> for Schema { + type Error = crate::Error; + fn try_from(conf: &Configuration) -> Result<Self> { + let mut errors = vec![]; + let mut rules: Vec<(&str, Rule)> = vec![]; + for (key, val) in conf.iter_sorted() { + let location = conf.location(key).unwrap(); + let rule = match Rule::parse(&location, key, val) { + Ok(r) => r, + Err(e) => { + errors.push(e); + continue; + } + }; + rules.push((key, rule)); + } + let mut rule_sets: Vec<(Box<str>, RuleSet)> = vec![]; + for (key, rule) in rules { + if rule_sets.last().is_none_or(|(k, _)| k.as_ref() != key) { + // add new rule set + rule_sets.push((key.into(), RuleSet::default())); + } + rule_sets.last_mut().unwrap().1.add_rule(rule); + } + if errors.len() == 1 { + return Err(errors.pop().unwrap()); + } else if !errors.is_empty() { + return Err(Error::Multiple(errors.into())); + } + Ok(Schema { rules: rule_sets }) + } +} |