Schema parsing

author: pommicket <pommicket@gmail.com> 2025-09-09 23:34:05 -0400
committer: pommicket <pommicket@gmail.com> 2025-09-09 23:34:05 -0400
commit: 8439ed183206659ad581caf525e078c6bb2c6a64 (patch)
tree: 50e803277cb078f9bee2e270bface4054ae70c47 /src
parent: 00ba4ae712a7a372bd937b56053d601ddc6eb3e8 (diff)
2 files changed, 261 insertions, 5 deletions
diff --git a/src/lib.rs b/src/lib.rs
index 268496f..e079fca 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -14,6 +14,8 @@ use alloc::{format, vec};
 use core::fmt;
 use core::mem::take;
 
+mod schema;
+pub use schema::Schema;
 #[cfg(test)]
 mod tests;
 
@@ -119,6 +121,12 @@ pub enum Error {
 	/// None of the errors in the array will be [`Error::Multiple`]'s,
 	/// and the array will contain at least two elements.
 	Multiple(Box<[Error]>),
+	/// Bad type in schema
+	SchemaBadType(Location, Box<str>),
+	/// Bad maxlength in schema
+	SchemaBadMaxLength(Location, Box<str>),
+	/// Invalid schema key
+	SchemaBadKey(Location, Box<str>),
 }
 
 impl fmt::Display for Error {
@@ -177,6 +185,9 @@ impl fmt::Display for Error {
 				}
 				Ok(())
 			}
+			Self::SchemaBadType(l, t) => write!(f, "{l}: invalid type: {t:?}"),
+			Self::SchemaBadMaxLength(l, m) => write!(f, "{l}: invalid maxlength: {m:?}"),
+			Self::SchemaBadKey(l, k) => write!(f, "{l}: invalid schema key: {k}"),
 		}
 	}
 }
@@ -752,6 +763,14 @@ impl Configuration {
 		self.into_iter()
 	}
 
+	/// Same as `iter()` (for now), but explicitly marks that keys should be in sorted order.
+	///
+	/// This is used internally so that we know what we have to fix if `iter()`
+	/// is ever changed to return a non-sorted iterator.
+	fn iter_sorted(&self) -> ConfigurationIter<'_> {
+		self.iter()
+	}
+
 	fn get_val(&self, key: &str) -> Option<&Value> {
 		let idx = self.binary_search_for(key).ok()?;
 		Some(&self.items[idx].1)
@@ -910,13 +929,16 @@ impl Configuration {
 		}
 	}
 
+	/// Parse `self` as a [`Schema`].
+	pub fn to_schema(&self) -> Result<Schema> {
+		Schema::try_from(self)
+	}
+
 	/// Check that `self` follows the given schema.
 	///
-	/// See the [POM specification](https://www.pom.computer/spec.html) for a description
-	/// of schemas.
-	pub fn check_against_schema(&self, schema: &Configuration) -> Result<()> {
-		_ = schema;
-		todo!()
+	/// Equivalent to `schema.check(self)`.
+	pub fn check_against(&self, schema: &Schema) -> Result<()> {
+		schema.check(self)
 	}
 }
 
diff --git a/src/schema.rs b/src/schema.rs
new file mode 100644
index 0000000..2b17c7c
--- /dev/null
+++ b/src/schema.rs
@@ -0,0 +1,234 @@
+use crate::{Box, Configuration, Error, Location, Result, Vec, vec};
+use core::num::NonZeroU32;
+
+/// Like `Option<f64>` but uses NaN as its "`None`" (to save memory).
+///
+/// hopefully this will be added to rust std eventually…
+#[derive(Clone, Copy)]
+struct OptionF64(f64);
+
+impl OptionF64 {
+	const NONE: Self = Self(f64::NAN);
+}
+
+impl Default for OptionF64 {
+	fn default() -> Self {
+		Self::NONE
+	}
+}
+
+impl core::fmt::Debug for OptionF64 {
+	fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+		write!(f, "{:?}", Option::<f64>::from(*self))
+	}
+}
+
+impl From<OptionF64> for Option<f64> {
+	fn from(value: OptionF64) -> Self {
+		if value.0.is_nan() {
+			None
+		} else {
+			Some(value.0)
+		}
+	}
+}
+impl From<Option<f64>> for OptionF64 {
+	fn from(value: Option<f64>) -> Self {
+		match value {
+			Some(x) if x.is_nan() => {
+				// just panic-- this isn't a public API
+				panic!("OptionF64 can't contain NaN")
+			}
+			Some(x) => Self(x),
+			None => Self::default(),
+		}
+	}
+}
+impl From<f64> for OptionF64 {
+	fn from(value: f64) -> Self {
+		// just panic-- this isn't a public API
+		assert!(!value.is_nan(), "OptionF64 can't contain NaN");
+		Self(value)
+	}
+}
+
+#[derive(Default)]
+struct RuleSet {
+	r#type: Option<Type>,
+	maxlength: Option<NonZeroU32>,
+	min: OptionF64,
+	max: OptionF64,
+	default: Option<Box<str>>,
+	allow_unknown: Option<bool>,
+}
+
+#[allow(dead_code)] // TODO
+enum Rule {
+	Type(Type),
+	MaxLength(NonZeroU32),
+	Min(f64),
+	Max(f64),
+	Default(Box<str>),
+	AllowUnknown(bool),
+}
+
+impl Rule {
+	fn parse(location: &Location, key: &str, val: &str) -> Result<Rule> {
+		Ok(if key.ends_with(".type") {
+			let r#type = Type::parse(location, val)?;
+			Rule::Type(r#type)
+		} else if key.ends_with(".min") {
+			let max = crate::parse_float(location, val)?;
+			Rule::Min(max)
+		} else if key.ends_with(".max") {
+			let max = crate::parse_float(location, val)?;
+			Rule::Max(max)
+		} else if key.ends_with(".maxlength") {
+			let max = val
+				.parse::<NonZeroU32>()
+				.map_err(|_| Error::SchemaBadMaxLength(location.clone(), val.into()))?;
+			if max.get() > 0x7fff_ffff {
+				return Err(Error::SchemaBadMaxLength(location.clone(), val.into()));
+			}
+			Rule::MaxLength(max)
+		} else if key.ends_with(".default") {
+			Rule::Default(val.into())
+		} else if key.ends_with(".allow_unknown") {
+			Rule::AllowUnknown(crate::parse_bool(location, val)?)
+		} else {
+			return Err(Error::SchemaBadKey(location.clone(), key.into()));
+		})
+	}
+}
+
+impl RuleSet {
+	fn add_rule(&mut self, rule: Rule) {
+		match rule {
+			Rule::Type(t) => self.r#type = Some(t),
+			Rule::MaxLength(m) => self.maxlength = Some(m),
+			Rule::Default(d) => self.default = Some(d),
+			Rule::AllowUnknown(a) => self.allow_unknown = Some(a),
+			Rule::Min(m) => self.min = m.into(),
+			Rule::Max(m) => self.max = m.into(),
+		}
+	}
+}
+
+#[allow(dead_code)] // TODO
+enum Type {
+	Any,
+	None,
+	Bool,
+	UInt,
+	Int,
+	Float,
+	Literal(Box<str>),
+	Optional(Box<Type>),
+	List(Box<Type>),
+	Union(Box<[Type]>),
+}
+
+impl Type {
+	fn parse(location: &Location, s: &str) -> Result<Self> {
+		let s = s.trim_matches(['\t', ' ']);
+		match s {
+			"Int" => return Ok(Self::Int),
+			"None" => return Ok(Self::None),
+			"Float" => return Ok(Self::Float),
+			"UInt" => return Ok(Self::UInt),
+			"Any" | "String" => return Ok(Self::Any),
+			"Bool" => return Ok(Self::Bool),
+			"Empty" => return Ok(Self::Literal("".into())),
+			_ => {}
+		}
+		let mut in_string = false;
+		let mut brackets = 0isize;
+		let mut ors = vec![];
+		for (i, c) in s.char_indices() {
+			if c == '[' {
+				brackets += 1;
+			}
+			if c == ']' {
+				brackets -= 1;
+			}
+			if c == '\'' {
+				in_string = !in_string;
+			}
+			if c == '|' && brackets == 0 {
+				ors.push(i + 1);
+			}
+		}
+		if ors.len() > 1 {
+			ors.push(s.len() + 1);
+			let mut subtypes: Vec<Type> = Vec::with_capacity(ors.len() - 1);
+			for w in ors.windows(2) {
+				let [start, end] = w else { unreachable!() };
+				subtypes.push(Self::parse(location, &s[*start..*end - 1])?);
+			}
+			return Ok(Self::Union(subtypes.into()));
+		}
+		if let Some(lit) = s.strip_prefix('\'').and_then(|s| s.strip_suffix('\'')) {
+			return Ok(Self::Literal(lit.into()));
+		}
+		if let Some(opt) = s
+			.strip_prefix("Optional[")
+			.and_then(|s| s.strip_suffix(']'))
+		{
+			return Ok(Self::Optional(Box::new(Self::parse(location, opt)?)));
+		}
+		if let Some(of) = s.strip_prefix("List[").and_then(|s| s.strip_suffix(']')) {
+			return Ok(Self::Optional(Box::new(Self::parse(location, of)?)));
+		}
+		Err(Error::SchemaBadType(location.clone(), s.into()))
+	}
+}
+
+/// A POM schema.
+///
+/// See the [POM specification](https://www.pom.computer/spec.html) for a description
+/// of schemas.
+#[allow(dead_code)] // TODO
+#[derive(Default)]
+pub struct Schema {
+	rules: Vec<(Box<str>, RuleSet)>,
+}
+
+impl Schema {
+	/// Check that `conf` follows this schema, returning an appropriate `Err` if not.
+	pub fn check(&self, _conf: &Configuration) -> Result<()> {
+		todo!()
+	}
+}
+
+impl TryFrom<&Configuration> for Schema {
+	type Error = crate::Error;
+	fn try_from(conf: &Configuration) -> Result<Self> {
+		let mut errors = vec![];
+		let mut rules: Vec<(&str, Rule)> = vec![];
+		for (key, val) in conf.iter_sorted() {
+			let location = conf.location(key).unwrap();
+			let rule = match Rule::parse(&location, key, val) {
+				Ok(r) => r,
+				Err(e) => {
+					errors.push(e);
+					continue;
+				}
+			};
+			rules.push((key, rule));
+		}
+		let mut rule_sets: Vec<(Box<str>, RuleSet)> = vec![];
+		for (key, rule) in rules {
+			if rule_sets.last().is_none_or(|(k, _)| k.as_ref() != key) {
+				// add new rule set
+				rule_sets.push((key.into(), RuleSet::default()));
+			}
+			rule_sets.last_mut().unwrap().1.add_rule(rule);
+		}
+		if errors.len() == 1 {
+			return Err(errors.pop().unwrap());
+		} else if !errors.is_empty() {
+			return Err(Error::Multiple(errors.into()));
+		}
+		Ok(Schema { rules: rule_sets })
+	}
+}
author	pommicket <pommicket@gmail.com>	2025-09-09 23:34:05 -0400
committer	pommicket <pommicket@gmail.com>	2025-09-09 23:34:05 -0400
commit	8439ed183206659ad581caf525e078c6bb2c6a64 (patch)
tree	50e803277cb078f9bee2e270bface4054ae70c47 /src
parent	00ba4ae712a7a372bd937b56053d601ddc6eb3e8 (diff)