summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorpommicket <pommicket@gmail.com>2025-09-09 23:34:05 -0400
committerpommicket <pommicket@gmail.com>2025-09-09 23:34:05 -0400
commit8439ed183206659ad581caf525e078c6bb2c6a64 (patch)
tree50e803277cb078f9bee2e270bface4054ae70c47 /src
parent00ba4ae712a7a372bd937b56053d601ddc6eb3e8 (diff)
Schema parsing
Diffstat (limited to 'src')
-rw-r--r--src/lib.rs32
-rw-r--r--src/schema.rs234
2 files changed, 261 insertions, 5 deletions
diff --git a/src/lib.rs b/src/lib.rs
index 268496f..e079fca 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -14,6 +14,8 @@ use alloc::{format, vec};
use core::fmt;
use core::mem::take;
+mod schema;
+pub use schema::Schema;
#[cfg(test)]
mod tests;
@@ -119,6 +121,12 @@ pub enum Error {
/// None of the errors in the array will be [`Error::Multiple`]'s,
/// and the array will contain at least two elements.
Multiple(Box<[Error]>),
+ /// Bad type in schema
+ SchemaBadType(Location, Box<str>),
+ /// Bad maxlength in schema
+ SchemaBadMaxLength(Location, Box<str>),
+ /// Invalid schema key
+ SchemaBadKey(Location, Box<str>),
}
impl fmt::Display for Error {
@@ -177,6 +185,9 @@ impl fmt::Display for Error {
}
Ok(())
}
+ Self::SchemaBadType(l, t) => write!(f, "{l}: invalid type: {t:?}"),
+ Self::SchemaBadMaxLength(l, m) => write!(f, "{l}: invalid maxlength: {m:?}"),
+ Self::SchemaBadKey(l, k) => write!(f, "{l}: invalid schema key: {k}"),
}
}
}
@@ -752,6 +763,14 @@ impl Configuration {
self.into_iter()
}
+ /// Same as `iter()` (for now), but explicitly marks that keys should be in sorted order.
+ ///
+ /// This is used internally so that we know what we have to fix if `iter()`
+ /// is ever changed to return a non-sorted iterator.
+ fn iter_sorted(&self) -> ConfigurationIter<'_> {
+ self.iter()
+ }
+
fn get_val(&self, key: &str) -> Option<&Value> {
let idx = self.binary_search_for(key).ok()?;
Some(&self.items[idx].1)
@@ -910,13 +929,16 @@ impl Configuration {
}
}
+ /// Parse `self` as a [`Schema`].
+ pub fn to_schema(&self) -> Result<Schema> {
+ Schema::try_from(self)
+ }
+
/// Check that `self` follows the given schema.
///
- /// See the [POM specification](https://www.pom.computer/spec.html) for a description
- /// of schemas.
- pub fn check_against_schema(&self, schema: &Configuration) -> Result<()> {
- _ = schema;
- todo!()
+ /// Equivalent to `schema.check(self)`.
+ pub fn check_against(&self, schema: &Schema) -> Result<()> {
+ schema.check(self)
}
}
diff --git a/src/schema.rs b/src/schema.rs
new file mode 100644
index 0000000..2b17c7c
--- /dev/null
+++ b/src/schema.rs
@@ -0,0 +1,234 @@
+use crate::{Box, Configuration, Error, Location, Result, Vec, vec};
+use core::num::NonZeroU32;
+
+/// Like `Option<f64>` but uses NaN as its "`None`" (to save memory).
+///
+/// hopefully this will be added to rust std eventually…
+#[derive(Clone, Copy)]
+struct OptionF64(f64);
+
+impl OptionF64 {
+ const NONE: Self = Self(f64::NAN);
+}
+
+impl Default for OptionF64 {
+ fn default() -> Self {
+ Self::NONE
+ }
+}
+
+impl core::fmt::Debug for OptionF64 {
+ fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+ write!(f, "{:?}", Option::<f64>::from(*self))
+ }
+}
+
+impl From<OptionF64> for Option<f64> {
+ fn from(value: OptionF64) -> Self {
+ if value.0.is_nan() {
+ None
+ } else {
+ Some(value.0)
+ }
+ }
+}
+impl From<Option<f64>> for OptionF64 {
+ fn from(value: Option<f64>) -> Self {
+ match value {
+ Some(x) if x.is_nan() => {
+ // just panic-- this isn't a public API
+ panic!("OptionF64 can't contain NaN")
+ }
+ Some(x) => Self(x),
+ None => Self::default(),
+ }
+ }
+}
+impl From<f64> for OptionF64 {
+ fn from(value: f64) -> Self {
+ // just panic-- this isn't a public API
+ assert!(!value.is_nan(), "OptionF64 can't contain NaN");
+ Self(value)
+ }
+}
+
+#[derive(Default)]
+struct RuleSet {
+ r#type: Option<Type>,
+ maxlength: Option<NonZeroU32>,
+ min: OptionF64,
+ max: OptionF64,
+ default: Option<Box<str>>,
+ allow_unknown: Option<bool>,
+}
+
+#[allow(dead_code)] // TODO
+enum Rule {
+ Type(Type),
+ MaxLength(NonZeroU32),
+ Min(f64),
+ Max(f64),
+ Default(Box<str>),
+ AllowUnknown(bool),
+}
+
+impl Rule {
+ fn parse(location: &Location, key: &str, val: &str) -> Result<Rule> {
+ Ok(if key.ends_with(".type") {
+ let r#type = Type::parse(location, val)?;
+ Rule::Type(r#type)
+ } else if key.ends_with(".min") {
+ let max = crate::parse_float(location, val)?;
+ Rule::Min(max)
+ } else if key.ends_with(".max") {
+ let max = crate::parse_float(location, val)?;
+ Rule::Max(max)
+ } else if key.ends_with(".maxlength") {
+ let max = val
+ .parse::<NonZeroU32>()
+ .map_err(|_| Error::SchemaBadMaxLength(location.clone(), val.into()))?;
+ if max.get() > 0x7fff_ffff {
+ return Err(Error::SchemaBadMaxLength(location.clone(), val.into()));
+ }
+ Rule::MaxLength(max)
+ } else if key.ends_with(".default") {
+ Rule::Default(val.into())
+ } else if key.ends_with(".allow_unknown") {
+ Rule::AllowUnknown(crate::parse_bool(location, val)?)
+ } else {
+ return Err(Error::SchemaBadKey(location.clone(), key.into()));
+ })
+ }
+}
+
+impl RuleSet {
+ fn add_rule(&mut self, rule: Rule) {
+ match rule {
+ Rule::Type(t) => self.r#type = Some(t),
+ Rule::MaxLength(m) => self.maxlength = Some(m),
+ Rule::Default(d) => self.default = Some(d),
+ Rule::AllowUnknown(a) => self.allow_unknown = Some(a),
+ Rule::Min(m) => self.min = m.into(),
+ Rule::Max(m) => self.max = m.into(),
+ }
+ }
+}
+
+#[allow(dead_code)] // TODO
+enum Type {
+ Any,
+ None,
+ Bool,
+ UInt,
+ Int,
+ Float,
+ Literal(Box<str>),
+ Optional(Box<Type>),
+ List(Box<Type>),
+ Union(Box<[Type]>),
+}
+
+impl Type {
+ fn parse(location: &Location, s: &str) -> Result<Self> {
+ let s = s.trim_matches(['\t', ' ']);
+ match s {
+ "Int" => return Ok(Self::Int),
+ "None" => return Ok(Self::None),
+ "Float" => return Ok(Self::Float),
+ "UInt" => return Ok(Self::UInt),
+ "Any" | "String" => return Ok(Self::Any),
+ "Bool" => return Ok(Self::Bool),
+ "Empty" => return Ok(Self::Literal("".into())),
+ _ => {}
+ }
+ let mut in_string = false;
+ let mut brackets = 0isize;
+ let mut ors = vec![];
+ for (i, c) in s.char_indices() {
+ if c == '[' {
+ brackets += 1;
+ }
+ if c == ']' {
+ brackets -= 1;
+ }
+ if c == '\'' {
+ in_string = !in_string;
+ }
+ if c == '|' && brackets == 0 {
+ ors.push(i + 1);
+ }
+ }
+ if ors.len() > 1 {
+ ors.push(s.len() + 1);
+ let mut subtypes: Vec<Type> = Vec::with_capacity(ors.len() - 1);
+ for w in ors.windows(2) {
+ let [start, end] = w else { unreachable!() };
+ subtypes.push(Self::parse(location, &s[*start..*end - 1])?);
+ }
+ return Ok(Self::Union(subtypes.into()));
+ }
+ if let Some(lit) = s.strip_prefix('\'').and_then(|s| s.strip_suffix('\'')) {
+ return Ok(Self::Literal(lit.into()));
+ }
+ if let Some(opt) = s
+ .strip_prefix("Optional[")
+ .and_then(|s| s.strip_suffix(']'))
+ {
+ return Ok(Self::Optional(Box::new(Self::parse(location, opt)?)));
+ }
+ if let Some(of) = s.strip_prefix("List[").and_then(|s| s.strip_suffix(']')) {
+ return Ok(Self::Optional(Box::new(Self::parse(location, of)?)));
+ }
+ Err(Error::SchemaBadType(location.clone(), s.into()))
+ }
+}
+
+/// A POM schema.
+///
+/// See the [POM specification](https://www.pom.computer/spec.html) for a description
+/// of schemas.
+#[allow(dead_code)] // TODO
+#[derive(Default)]
+pub struct Schema {
+ rules: Vec<(Box<str>, RuleSet)>,
+}
+
+impl Schema {
+ /// Check that `conf` follows this schema, returning an appropriate `Err` if not.
+ pub fn check(&self, _conf: &Configuration) -> Result<()> {
+ todo!()
+ }
+}
+
+impl TryFrom<&Configuration> for Schema {
+ type Error = crate::Error;
+ fn try_from(conf: &Configuration) -> Result<Self> {
+ let mut errors = vec![];
+ let mut rules: Vec<(&str, Rule)> = vec![];
+ for (key, val) in conf.iter_sorted() {
+ let location = conf.location(key).unwrap();
+ let rule = match Rule::parse(&location, key, val) {
+ Ok(r) => r,
+ Err(e) => {
+ errors.push(e);
+ continue;
+ }
+ };
+ rules.push((key, rule));
+ }
+ let mut rule_sets: Vec<(Box<str>, RuleSet)> = vec![];
+ for (key, rule) in rules {
+ if rule_sets.last().is_none_or(|(k, _)| k.as_ref() != key) {
+ // add new rule set
+ rule_sets.push((key.into(), RuleSet::default()));
+ }
+ rule_sets.last_mut().unwrap().1.add_rule(rule);
+ }
+ if errors.len() == 1 {
+ return Err(errors.pop().unwrap());
+ } else if !errors.is_empty() {
+ return Err(Error::Multiple(errors.into()));
+ }
+ Ok(Schema { rules: rule_sets })
+ }
+}