summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorpommicket <pommicket@gmail.com>2025-09-10 02:14:54 -0400
committerpommicket <pommicket@gmail.com>2025-09-10 02:14:54 -0400
commited0182736a20e0987c6dc9c5e086a30fd1b02f8b (patch)
tree4113e76e73e60fe43d72c707bcbbdb47a1ced0f7
parent8439ed183206659ad581caf525e078c6bb2c6a64 (diff)
More schemas, but probably going to get rid of it
-rw-r--r--src/lib.rs55
-rw-r--r--src/schema.rs332
2 files changed, 290 insertions, 97 deletions
diff --git a/src/lib.rs b/src/lib.rs
index e079fca..0763fa7 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -38,6 +38,19 @@ impl Location {
pub fn line(&self) -> u64 {
self.line
}
+
+ /// Dummy location for internal use
+ fn dummy() -> Self {
+ Self {
+ file: Arc::from(""),
+ line: 0,
+ }
+ }
+
+ fn is_dummy(&self) -> bool {
+ // we never use line number of 0 ordinarily
+ self.line == 0
+ }
}
impl fmt::Display for Location {
@@ -127,6 +140,12 @@ pub enum Error {
SchemaBadMaxLength(Location, Box<str>),
/// Invalid schema key
SchemaBadKey(Location, Box<str>),
+ /// Value is less than the schema-imposed minimum
+ SchemaValueLessThanMin(Location, Box<str>, f64, f64),
+ /// Value is greater than the schema-imposed maximum
+ SchemaValueGreaterThanMax(Location, Box<str>, f64, f64),
+ /// Value is greater than the schema-imposed maxlength
+ SchemaValueTooLong(Location, Box<str>, usize, usize),
}
impl fmt::Display for Error {
@@ -188,6 +207,18 @@ impl fmt::Display for Error {
Self::SchemaBadType(l, t) => write!(f, "{l}: invalid type: {t:?}"),
Self::SchemaBadMaxLength(l, m) => write!(f, "{l}: invalid maxlength: {m:?}"),
Self::SchemaBadKey(l, k) => write!(f, "{l}: invalid schema key: {k}"),
+ Self::SchemaValueLessThanMin(l, key, val, min) => write!(
+ f,
+ "{l}: {key}'s value of {val} is less than the minimum ({min})"
+ ),
+ Self::SchemaValueGreaterThanMax(l, key, val, max) => write!(
+ f,
+ "{l}: {key}'s value of {val} is greater than the maximum ({max})"
+ ),
+ Self::SchemaValueTooLong(l, key, len, maxlen) => write!(
+ f,
+ "{l}: {key}'s value has length {len}, which exceeds the maximum of {maxlen}"
+ ),
}
}
}
@@ -417,6 +448,16 @@ fn parse_hex_digit(c: char) -> Option<u32> {
})
}
+/// Returns `Ok(())` if `errors` is empty, otherwise a compound error
+/// containing all the `errors`.
+fn check_error_vec(mut errors: Vec<Error>) -> Result<()> {
+ match errors.len() {
+ 0 => Ok(()),
+ 1 => Err(errors.pop().unwrap()),
+ _ => Err(Error::Multiple(errors.into())),
+ }
+}
+
#[derive(Default)]
struct Parser {
nonfatal_errors: Vec<Error>,
@@ -664,11 +705,8 @@ impl Parser {
))));
}
}
- match self.nonfatal_errors.len() {
- 0 => Ok(Configuration { items }),
- 1 => Err(self.nonfatal_errors.pop().unwrap()),
- 2.. => Err(Error::Multiple(take(&mut self.nonfatal_errors).into())),
- }
+ check_error_vec(take(&mut self.nonfatal_errors))?;
+ Ok(Configuration { items })
}
}
@@ -933,13 +971,6 @@ impl Configuration {
pub fn to_schema(&self) -> Result<Schema> {
Schema::try_from(self)
}
-
- /// Check that `self` follows the given schema.
- ///
- /// Equivalent to `schema.check(self)`.
- pub fn check_against(&self, schema: &Schema) -> Result<()> {
- schema.check(self)
- }
}
/// Opaque type returned by [`Configuration::iter`].
diff --git a/src/schema.rs b/src/schema.rs
index 2b17c7c..ba7c440 100644
--- a/src/schema.rs
+++ b/src/schema.rs
@@ -1,5 +1,6 @@
-use crate::{Box, Configuration, Error, Location, Result, Vec, vec};
+use crate::{Arc, Box, Configuration, Error, Location, Result, Vec, vec};
use core::num::NonZeroU32;
+use core::ops::Range;
/// Like `Option<f64>` but uses NaN as its "`None`" (to save memory).
///
@@ -8,12 +9,17 @@ use core::num::NonZeroU32;
struct OptionF64(f64);
impl OptionF64 {
- const NONE: Self = Self(f64::NAN);
+ fn is_some(self) -> bool {
+ !self.0.is_nan()
+ }
+ fn or(self, other: OptionF64) -> Self {
+ if self.is_some() { self } else { other }
+ }
}
impl Default for OptionF64 {
fn default() -> Self {
- Self::NONE
+ Self(f64::NAN)
}
}
@@ -44,76 +50,8 @@ impl From<Option<f64>> for OptionF64 {
}
}
}
-impl From<f64> for OptionF64 {
- fn from(value: f64) -> Self {
- // just panic-- this isn't a public API
- assert!(!value.is_nan(), "OptionF64 can't contain NaN");
- Self(value)
- }
-}
-
-#[derive(Default)]
-struct RuleSet {
- r#type: Option<Type>,
- maxlength: Option<NonZeroU32>,
- min: OptionF64,
- max: OptionF64,
- default: Option<Box<str>>,
- allow_unknown: Option<bool>,
-}
-
-#[allow(dead_code)] // TODO
-enum Rule {
- Type(Type),
- MaxLength(NonZeroU32),
- Min(f64),
- Max(f64),
- Default(Box<str>),
- AllowUnknown(bool),
-}
-
-impl Rule {
- fn parse(location: &Location, key: &str, val: &str) -> Result<Rule> {
- Ok(if key.ends_with(".type") {
- let r#type = Type::parse(location, val)?;
- Rule::Type(r#type)
- } else if key.ends_with(".min") {
- let max = crate::parse_float(location, val)?;
- Rule::Min(max)
- } else if key.ends_with(".max") {
- let max = crate::parse_float(location, val)?;
- Rule::Max(max)
- } else if key.ends_with(".maxlength") {
- let max = val
- .parse::<NonZeroU32>()
- .map_err(|_| Error::SchemaBadMaxLength(location.clone(), val.into()))?;
- if max.get() > 0x7fff_ffff {
- return Err(Error::SchemaBadMaxLength(location.clone(), val.into()));
- }
- Rule::MaxLength(max)
- } else if key.ends_with(".default") {
- Rule::Default(val.into())
- } else if key.ends_with(".allow_unknown") {
- Rule::AllowUnknown(crate::parse_bool(location, val)?)
- } else {
- return Err(Error::SchemaBadKey(location.clone(), key.into()));
- })
- }
-}
-
-impl RuleSet {
- fn add_rule(&mut self, rule: Rule) {
- match rule {
- Rule::Type(t) => self.r#type = Some(t),
- Rule::MaxLength(m) => self.maxlength = Some(m),
- Rule::Default(d) => self.default = Some(d),
- Rule::AllowUnknown(a) => self.allow_unknown = Some(a),
- Rule::Min(m) => self.min = m.into(),
- Rule::Max(m) => self.max = m.into(),
- }
- }
-}
+#[derive(Clone, Debug)]
#[allow(dead_code)] // TODO
enum Type {
Any,
@@ -122,10 +60,10 @@ enum Type {
UInt,
Int,
Float,
- Literal(Box<str>),
- Optional(Box<Type>),
- List(Box<Type>),
- Union(Box<[Type]>),
+ Literal(Arc<str>),
+ Optional(Arc<Type>),
+ List(Arc<Type>),
+ Union(Arc<[Type]>),
}
impl Type {
@@ -174,15 +112,117 @@ impl Type {
.strip_prefix("Optional[")
.and_then(|s| s.strip_suffix(']'))
{
- return Ok(Self::Optional(Box::new(Self::parse(location, opt)?)));
+ return Ok(Self::Optional(Arc::new(Self::parse(location, opt)?)));
}
if let Some(of) = s.strip_prefix("List[").and_then(|s| s.strip_suffix(']')) {
- return Ok(Self::Optional(Box::new(Self::parse(location, of)?)));
+ return Ok(Self::Optional(Arc::new(Self::parse(location, of)?)));
}
Err(Error::SchemaBadType(location.clone(), s.into()))
}
}
+#[derive(Debug)]
+enum Rule {
+ Type(Type),
+ MaxLength(NonZeroU32),
+ Min(f64),
+ Max(f64),
+ Default(Arc<str>),
+ AllowUnknown(bool),
+}
+
+impl Rule {
+ fn parse(location: &Location, key: &str, val: &str) -> Result<Rule> {
+ Ok(if key.ends_with(".type") {
+ let r#type = Type::parse(location, val)?;
+ Rule::Type(r#type)
+ } else if key.ends_with(".min") {
+ let max = crate::parse_float(location, val)?;
+ Rule::Min(max)
+ } else if key.ends_with(".max") {
+ let max = crate::parse_float(location, val)?;
+ Rule::Max(max)
+ } else if key.ends_with(".maxlength") {
+ let max = val
+ .parse::<NonZeroU32>()
+ .map_err(|_| Error::SchemaBadMaxLength(location.clone(), val.into()))?;
+ if max.get() > 0x7fff_ffff {
+ return Err(Error::SchemaBadMaxLength(location.clone(), val.into()));
+ }
+ Rule::MaxLength(max)
+ } else if key.ends_with(".default") {
+ Rule::Default(val.into())
+ } else if key.ends_with(".allow_unknown") {
+ Rule::AllowUnknown(crate::parse_bool(location, val)?)
+ } else {
+ return Err(Error::SchemaBadKey(location.clone(), key.into()));
+ })
+ }
+}
+
+#[derive(Debug, Clone)]
+struct RuleSet {
+ location: Location,
+ r#type: Option<Type>,
+ maxlength: Option<NonZeroU32>,
+ min: OptionF64,
+ max: OptionF64,
+ default: Option<Arc<str>>,
+ allow_unknown: Option<bool>,
+}
+
+impl RuleSet {
+ fn new(location: Location) -> Self {
+ Self {
+ location,
+ r#type: None,
+ maxlength: None,
+ min: OptionF64::default(),
+ max: OptionF64::default(),
+ default: None,
+ allow_unknown: None,
+ }
+ }
+}
+
+impl RuleSet {
+ fn add_rule(&mut self, rule: Rule) {
+ match rule {
+ Rule::Type(t) => self.r#type = Some(t),
+ Rule::MaxLength(m) => self.maxlength = Some(m),
+ Rule::Default(d) => self.default = Some(d),
+ Rule::AllowUnknown(a) => self.allow_unknown = Some(a),
+ Rule::Min(m) => self.min = OptionF64(m),
+ Rule::Max(m) => self.max = OptionF64(m),
+ }
+ }
+ // merge rule sets, giving `self` precedence
+ fn fall_back_on(&mut self, other: &RuleSet) {
+ let RuleSet {
+ location,
+ allow_unknown,
+ default,
+ r#type,
+ maxlength,
+ min,
+ max,
+ } = other;
+ if self.location.is_dummy() {
+ self.location = location.clone();
+ }
+ self.allow_unknown = self.allow_unknown.or(*allow_unknown);
+ if self.r#type.is_none() {
+ self.r#type = r#type.clone();
+ }
+ self.min = self.min.or(*min);
+ self.max = self.max.or(*max);
+ self.maxlength = self.maxlength.or(*maxlength);
+ if self.default.is_none() {
+ self.default = default.clone();
+ }
+ }
+}
+
/// A POM schema.
///
/// See the [POM specification](https://www.pom.computer/spec.html) for a description
@@ -194,9 +234,123 @@ pub struct Schema {
}
impl Schema {
- /// Check that `conf` follows this schema, returning an appropriate `Err` if not.
- pub fn check(&self, _conf: &Configuration) -> Result<()> {
- todo!()
+ /// Load a schema from a file path.
+ #[cfg(feature = "std")]
+ pub fn load_path<P: AsRef<std::path::Path>>(path: P) -> Result<Self> {
+ Self::try_from(&Configuration::load_path(path)?)
+ }
+
+ /// Load a schema.
+ ///
+ /// See [`Configuration::load`] for more information.
+ pub fn load<R: crate::Read>(filename: &str, reader: R) -> Result<Self> {
+ Self::try_from(&Configuration::load(filename, reader)?)
+ }
+
+ fn binary_search_range(
+ &self,
+ range: Range<usize>,
+ key: &str,
+ ) -> core::result::Result<usize, usize> {
+ self.rules[range.clone()]
+ .binary_search_by(|(k, _)| k.as_ref().cmp(key))
+ .map(|i| i + range.start)
+ .map_err(|i| i + range.start)
+ }
+ fn get_rule_set_(&self, prefix: &str, key: &str, range: Range<usize>) -> RuleSet {
+ // definitely overwrought. but it was fun to write.
+ if range.is_empty() {
+ return RuleSet::new(Location::dummy());
+ }
+ if let Some((first_component, rest)) = key.split_once('.') {
+ let exact_key_dot = format!("{prefix}{first_component}.");
+ // NB: / is the first ASCII character after .
+ let exact_key_slash = format!("{prefix}{first_component}/");
+ let star_key_dot = format!("{prefix}*.");
+ let star_key_slash = format!("{prefix}*/");
+ let exact_keys_start = self
+ .binary_search_range(range.clone(), &exact_key_dot)
+ .expect_err("key should not end in .");
+ let exact_keys_end = self
+ .binary_search_range(range.clone(), &exact_key_slash)
+ .unwrap_or_else(|x| x);
+ let mut exact_rule_set =
+ self.get_rule_set_(&exact_key_dot, rest, exact_keys_start..exact_keys_end);
+ let star_keys_start = self
+ .binary_search_range(range.clone(), &star_key_dot)
+ .expect_err("key should not end in .");
+ let star_keys_end = self
+ .binary_search_range(range.clone(), &star_key_slash)
+ .unwrap_or_else(|x| x);
+ let star_rule_set =
+ self.get_rule_set_(&star_key_dot, rest, star_keys_start..star_keys_end);
+ exact_rule_set.fall_back_on(&star_rule_set);
+ exact_rule_set
+ } else {
+ let mut rule_set = RuleSet::new(Location::dummy());
+ if let Ok(exact_key_rule_idx) =
+ self.binary_search_range(range.clone(), &format!("{prefix}{key}"))
+ {
+ rule_set = self.rules[exact_key_rule_idx].1.clone();
+ }
+ if let Ok(star_key_rule_idx) =
+ self.binary_search_range(range.clone(), &format!("{prefix}*"))
+ {
+ rule_set.fall_back_on(&self.rules[star_key_rule_idx].1);
+ }
+ rule_set
+ }
+ }
+ fn get_rule_set(&self, key: &str) -> RuleSet {
+ self.get_rule_set_("", key, 0..self.rules.len())
+ }
+ /// Check that `conf` follows this schema, and fill in default values.
+ pub fn check_and_fill_defaults(&self, conf: &mut Configuration) -> Result<()> {
+ let mut errors = vec![];
+ for (key, val) in conf.iter() {
+ let location = || conf.location(key).unwrap();
+ let rule_set = self.get_rule_set(key);
+ if (rule_set.min.is_some() || rule_set.max.is_some())
+ && let Ok(val) = crate::parse_float(&Location::dummy(), val)
+ {
+ if let Some(min) = rule_set.min.into()
+ && val < min
+ {
+ errors.push(Error::SchemaValueLessThanMin(
+ location(),
+ key.into(),
+ val,
+ min,
+ ))
+ }
+ if let Some(max) = rule_set.max.into()
+ && val > max
+ {
+ errors.push(Error::SchemaValueGreaterThanMax(
+ location(),
+ key.into(),
+ val,
+ max,
+ ));
+ }
+ }
+ if let Some(max_length) = rule_set
+ .maxlength
+ .and_then(|x| usize::try_from(x.get()).ok())
+ {
+ if val.len() > max_length {
+ errors.push(Error::SchemaValueTooLong(
+ location(),
+ key.into(),
+ val.len(),
+ max_length,
+ ));
+ }
+ }
+ // TODO: type, allow_unknown
+ }
+ // TODO: replace default values, report missing values
+ crate::check_error_vec(errors)
}
}
@@ -204,7 +358,7 @@ impl TryFrom<&Configuration> for Schema {
type Error = crate::Error;
fn try_from(conf: &Configuration) -> Result<Self> {
let mut errors = vec![];
- let mut rules: Vec<(&str, Rule)> = vec![];
+ let mut rules: Vec<(&str, Location, Rule)> = vec![];
for (key, val) in conf.iter_sorted() {
let location = conf.location(key).unwrap();
let rule = match Rule::parse(&location, key, val) {
@@ -214,16 +368,24 @@ impl TryFrom<&Configuration> for Schema {
continue;
}
};
- rules.push((key, rule));
+ let Some((affected_key, _rule_name)) = key.rsplit_once('.') else {
+ unreachable!("Rule::parse shouldn't accept this key");
+ };
+ rules.push((affected_key, location, rule));
}
let mut rule_sets: Vec<(Box<str>, RuleSet)> = vec![];
- for (key, rule) in rules {
+ for (key, location, rule) in rules {
if rule_sets.last().is_none_or(|(k, _)| k.as_ref() != key) {
// add new rule set
- rule_sets.push((key.into(), RuleSet::default()));
+ rule_sets.push((key.into(), RuleSet::new(location)));
}
rule_sets.last_mut().unwrap().1.add_rule(rule);
}
+ for (_, rule_set) in &rule_sets {
+ if let Err(e) = rule_set.check_consistency() {
+ errors.push(e);
+ }
+ }
if errors.len() == 1 {
return Err(errors.pop().unwrap());
} else if !errors.is_empty() {