use crate::{Arc, Box, Configuration, Error, Location, Result, Vec, vec}; use core::num::NonZeroU32; use core::ops::Range; /// Like `Option` but uses NaN as its "`None`" (to save memory). /// /// hopefully this will be added to rust std eventually… #[derive(Clone, Copy)] struct OptionF64(f64); impl OptionF64 { fn is_some(self) -> bool { !self.0.is_nan() } fn or(self, other: OptionF64) -> Self { if self.is_some() { self } else { other } } } impl Default for OptionF64 { fn default() -> Self { Self(f64::NAN) } } impl core::fmt::Debug for OptionF64 { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { write!(f, "{:?}", Option::::from(*self)) } } impl From for Option { fn from(value: OptionF64) -> Self { if value.0.is_nan() { None } else { Some(value.0) } } } impl From> for OptionF64 { fn from(value: Option) -> Self { match value { Some(x) if x.is_nan() => { // just panic-- this isn't a public API panic!("OptionF64 can't contain NaN") } Some(x) => Self(x), None => Self::default(), } } } #[derive(Clone, Debug)] #[allow(dead_code)] // TODO enum Type { Any, None, Bool, UInt, Int, Float, Literal(Arc), Optional(Arc), List(Arc), Union(Arc<[Type]>), } impl Type { fn parse(location: &Location, s: &str) -> Result { let s = s.trim_matches(['\t', ' ']); match s { "Int" => return Ok(Self::Int), "None" => return Ok(Self::None), "Float" => return Ok(Self::Float), "UInt" => return Ok(Self::UInt), "Any" | "String" => return Ok(Self::Any), "Bool" => return Ok(Self::Bool), "Empty" => return Ok(Self::Literal("".into())), _ => {} } let mut in_string = false; let mut brackets = 0isize; let mut ors = vec![]; for (i, c) in s.char_indices() { if c == '[' { brackets += 1; } if c == ']' { brackets -= 1; } if c == '\'' { in_string = !in_string; } if c == '|' && brackets == 0 { ors.push(i + 1); } } if ors.len() > 1 { ors.push(s.len() + 1); let mut subtypes: Vec = Vec::with_capacity(ors.len() - 1); for w in ors.windows(2) { let [start, end] = w else { unreachable!() }; subtypes.push(Self::parse(location, &s[*start..*end - 1])?); } return Ok(Self::Union(subtypes.into())); } if let Some(lit) = s.strip_prefix('\'').and_then(|s| s.strip_suffix('\'')) { return Ok(Self::Literal(lit.into())); } if let Some(opt) = s .strip_prefix("Optional[") .and_then(|s| s.strip_suffix(']')) { return Ok(Self::Optional(Arc::new(Self::parse(location, opt)?))); } if let Some(of) = s.strip_prefix("List[").and_then(|s| s.strip_suffix(']')) { return Ok(Self::Optional(Arc::new(Self::parse(location, of)?))); } Err(Error::SchemaBadType(location.clone(), s.into())) } } #[derive(Debug)] enum Rule { Type(Type), MaxLength(NonZeroU32), Min(f64), Max(f64), Default(Arc), AllowUnknown(bool), } impl Rule { fn parse(location: &Location, key: &str, val: &str) -> Result { Ok(if key.ends_with(".type") { let r#type = Type::parse(location, val)?; Rule::Type(r#type) } else if key.ends_with(".min") { let max = crate::parse_float(location, val)?; Rule::Min(max) } else if key.ends_with(".max") { let max = crate::parse_float(location, val)?; Rule::Max(max) } else if key.ends_with(".maxlength") { let max = val .parse::() .map_err(|_| Error::SchemaBadMaxLength(location.clone(), val.into()))?; if max.get() > 0x7fff_ffff { return Err(Error::SchemaBadMaxLength(location.clone(), val.into())); } Rule::MaxLength(max) } else if key.ends_with(".default") { Rule::Default(val.into()) } else if key.ends_with(".allow_unknown") { Rule::AllowUnknown(crate::parse_bool(location, val)?) } else { return Err(Error::SchemaBadKey(location.clone(), key.into())); }) } } #[derive(Debug, Clone)] struct RuleSet { location: Location, r#type: Option, maxlength: Option, min: OptionF64, max: OptionF64, default: Option>, allow_unknown: Option, } impl RuleSet { fn new(location: Location) -> Self { Self { location, r#type: None, maxlength: None, min: OptionF64::default(), max: OptionF64::default(), default: None, allow_unknown: None, } } } impl RuleSet { fn add_rule(&mut self, rule: Rule) { match rule { Rule::Type(t) => self.r#type = Some(t), Rule::MaxLength(m) => self.maxlength = Some(m), Rule::Default(d) => self.default = Some(d), Rule::AllowUnknown(a) => self.allow_unknown = Some(a), Rule::Min(m) => self.min = OptionF64(m), Rule::Max(m) => self.max = OptionF64(m), } } // merge rule sets, giving `self` precedence fn fall_back_on(&mut self, other: &RuleSet) { let RuleSet { location, allow_unknown, default, r#type, maxlength, min, max, } = other; if self.location.is_dummy() { self.location = location.clone(); } self.allow_unknown = self.allow_unknown.or(*allow_unknown); if self.r#type.is_none() { self.r#type = r#type.clone(); } self.min = self.min.or(*min); self.max = self.max.or(*max); self.maxlength = self.maxlength.or(*maxlength); if self.default.is_none() { self.default = default.clone(); } } } /// A POM schema. /// /// See the [POM specification](https://www.pom.computer/spec.html) for a description /// of schemas. #[allow(dead_code)] // TODO #[derive(Default)] pub struct Schema { rules: Vec<(Box, RuleSet)>, } impl Schema { /// Load a schema from a file path. #[cfg(feature = "std")] pub fn load_path>(path: P) -> Result { Self::try_from(&Configuration::load_path(path)?) } /// Load a schema. /// /// See [`Configuration::load`] for more information. pub fn load(filename: &str, reader: R) -> Result { Self::try_from(&Configuration::load(filename, reader)?) } fn binary_search_range( &self, range: Range, key: &str, ) -> core::result::Result { self.rules[range.clone()] .binary_search_by(|(k, _)| k.as_ref().cmp(key)) .map(|i| i + range.start) .map_err(|i| i + range.start) } fn get_rule_set_(&self, prefix: &str, key: &str, range: Range) -> RuleSet { // definitely overwrought. but it was fun to write. if range.is_empty() { return RuleSet::new(Location::dummy()); } if let Some((first_component, rest)) = key.split_once('.') { let exact_key_dot = format!("{prefix}{first_component}."); // NB: / is the first ASCII character after . let exact_key_slash = format!("{prefix}{first_component}/"); let star_key_dot = format!("{prefix}*."); let star_key_slash = format!("{prefix}*/"); let exact_keys_start = self .binary_search_range(range.clone(), &exact_key_dot) .expect_err("key should not end in ."); let exact_keys_end = self .binary_search_range(range.clone(), &exact_key_slash) .unwrap_or_else(|x| x); let mut exact_rule_set = self.get_rule_set_(&exact_key_dot, rest, exact_keys_start..exact_keys_end); let star_keys_start = self .binary_search_range(range.clone(), &star_key_dot) .expect_err("key should not end in ."); let star_keys_end = self .binary_search_range(range.clone(), &star_key_slash) .unwrap_or_else(|x| x); let star_rule_set = self.get_rule_set_(&star_key_dot, rest, star_keys_start..star_keys_end); exact_rule_set.fall_back_on(&star_rule_set); exact_rule_set } else { let mut rule_set = RuleSet::new(Location::dummy()); if let Ok(exact_key_rule_idx) = self.binary_search_range(range.clone(), &format!("{prefix}{key}")) { rule_set = self.rules[exact_key_rule_idx].1.clone(); } if let Ok(star_key_rule_idx) = self.binary_search_range(range.clone(), &format!("{prefix}*")) { rule_set.fall_back_on(&self.rules[star_key_rule_idx].1); } rule_set } } fn get_rule_set(&self, key: &str) -> RuleSet { self.get_rule_set_("", key, 0..self.rules.len()) } /// Check that `conf` follows this schema, and fill in default values. pub fn check_and_fill_defaults(&self, conf: &mut Configuration) -> Result<()> { let mut errors = vec![]; for (key, val) in conf.iter() { let location = || conf.location(key).unwrap(); let rule_set = self.get_rule_set(key); if (rule_set.min.is_some() || rule_set.max.is_some()) && let Ok(val) = crate::parse_float(&Location::dummy(), val) { if let Some(min) = rule_set.min.into() && val < min { errors.push(Error::SchemaValueLessThanMin( location(), key.into(), val, min, )) } if let Some(max) = rule_set.max.into() && val > max { errors.push(Error::SchemaValueGreaterThanMax( location(), key.into(), val, max, )); } } if let Some(max_length) = rule_set .maxlength .and_then(|x| usize::try_from(x.get()).ok()) { if val.len() > max_length { errors.push(Error::SchemaValueTooLong( location(), key.into(), val.len(), max_length, )); } } // TODO: type, allow_unknown } // TODO: replace default values, report missing values crate::check_error_vec(errors) } } impl TryFrom<&Configuration> for Schema { type Error = crate::Error; fn try_from(conf: &Configuration) -> Result { let mut errors = vec![]; let mut rules: Vec<(&str, Location, Rule)> = vec![]; for (key, val) in conf.iter_sorted() { let location = conf.location(key).unwrap(); let rule = match Rule::parse(&location, key, val) { Ok(r) => r, Err(e) => { errors.push(e); continue; } }; let Some((affected_key, _rule_name)) = key.rsplit_once('.') else { unreachable!("Rule::parse shouldn't accept this key"); }; rules.push((affected_key, location, rule)); } let mut rule_sets: Vec<(Box, RuleSet)> = vec![]; for (key, location, rule) in rules { if rule_sets.last().is_none_or(|(k, _)| k.as_ref() != key) { // add new rule set rule_sets.push((key.into(), RuleSet::new(location))); } rule_sets.last_mut().unwrap().1.add_rule(rule); } for (_, rule_set) in &rule_sets { if let Err(e) = rule_set.check_consistency() { errors.push(e); } } if errors.len() == 1 { return Err(errors.pop().unwrap()); } else if !errors.is_empty() { return Err(Error::Multiple(errors.into())); } Ok(Schema { rules: rule_sets }) } }