diff options
Diffstat (limited to 'src/schema.rs')
-rw-r--r-- | src/schema.rs | 396 |
1 files changed, 0 insertions, 396 deletions
diff --git a/src/schema.rs b/src/schema.rs deleted file mode 100644 index ba7c440..0000000 --- a/src/schema.rs +++ /dev/null @@ -1,396 +0,0 @@ -use crate::{Arc, Box, Configuration, Error, Location, Result, Vec, vec}; -use core::num::NonZeroU32; -use core::ops::Range; - -/// Like `Option<f64>` but uses NaN as its "`None`" (to save memory). -/// -/// hopefully this will be added to rust std eventually… -#[derive(Clone, Copy)] -struct OptionF64(f64); - -impl OptionF64 { - fn is_some(self) -> bool { - !self.0.is_nan() - } - fn or(self, other: OptionF64) -> Self { - if self.is_some() { self } else { other } - } -} - -impl Default for OptionF64 { - fn default() -> Self { - Self(f64::NAN) - } -} - -impl core::fmt::Debug for OptionF64 { - fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - write!(f, "{:?}", Option::<f64>::from(*self)) - } -} - -impl From<OptionF64> for Option<f64> { - fn from(value: OptionF64) -> Self { - if value.0.is_nan() { - None - } else { - Some(value.0) - } - } -} -impl From<Option<f64>> for OptionF64 { - fn from(value: Option<f64>) -> Self { - match value { - Some(x) if x.is_nan() => { - // just panic-- this isn't a public API - panic!("OptionF64 can't contain NaN") - } - Some(x) => Self(x), - None => Self::default(), - } - } -} - -#[derive(Clone, Debug)] -#[allow(dead_code)] // TODO -enum Type { - Any, - None, - Bool, - UInt, - Int, - Float, - Literal(Arc<str>), - Optional(Arc<Type>), - List(Arc<Type>), - Union(Arc<[Type]>), -} - -impl Type { - fn parse(location: &Location, s: &str) -> Result<Self> { - let s = s.trim_matches(['\t', ' ']); - match s { - "Int" => return Ok(Self::Int), - "None" => return Ok(Self::None), - "Float" => return Ok(Self::Float), - "UInt" => return Ok(Self::UInt), - "Any" | "String" => return Ok(Self::Any), - "Bool" => return Ok(Self::Bool), - "Empty" => return Ok(Self::Literal("".into())), - _ => {} - } - let mut in_string = false; - let mut brackets = 0isize; - let mut ors = vec![]; - for (i, c) in s.char_indices() { - if c == '[' { - brackets += 1; - } - if c == ']' { - brackets -= 1; - } - if c == '\'' { - in_string = !in_string; - } - if c == '|' && brackets == 0 { - ors.push(i + 1); - } - } - if ors.len() > 1 { - ors.push(s.len() + 1); - let mut subtypes: Vec<Type> = Vec::with_capacity(ors.len() - 1); - for w in ors.windows(2) { - let [start, end] = w else { unreachable!() }; - subtypes.push(Self::parse(location, &s[*start..*end - 1])?); - } - return Ok(Self::Union(subtypes.into())); - } - if let Some(lit) = s.strip_prefix('\'').and_then(|s| s.strip_suffix('\'')) { - return Ok(Self::Literal(lit.into())); - } - if let Some(opt) = s - .strip_prefix("Optional[") - .and_then(|s| s.strip_suffix(']')) - { - return Ok(Self::Optional(Arc::new(Self::parse(location, opt)?))); - } - if let Some(of) = s.strip_prefix("List[").and_then(|s| s.strip_suffix(']')) { - return Ok(Self::Optional(Arc::new(Self::parse(location, of)?))); - } - Err(Error::SchemaBadType(location.clone(), s.into())) - } -} - -#[derive(Debug)] -enum Rule { - Type(Type), - MaxLength(NonZeroU32), - Min(f64), - Max(f64), - Default(Arc<str>), - AllowUnknown(bool), -} - -impl Rule { - fn parse(location: &Location, key: &str, val: &str) -> Result<Rule> { - Ok(if key.ends_with(".type") { - let r#type = Type::parse(location, val)?; - Rule::Type(r#type) - } else if key.ends_with(".min") { - let max = crate::parse_float(location, val)?; - Rule::Min(max) - } else if key.ends_with(".max") { - let max = crate::parse_float(location, val)?; - Rule::Max(max) - } else if key.ends_with(".maxlength") { - let max = val - .parse::<NonZeroU32>() - .map_err(|_| Error::SchemaBadMaxLength(location.clone(), val.into()))?; - if max.get() > 0x7fff_ffff { - return Err(Error::SchemaBadMaxLength(location.clone(), val.into())); - } - Rule::MaxLength(max) - } else if key.ends_with(".default") { - Rule::Default(val.into()) - } else if key.ends_with(".allow_unknown") { - Rule::AllowUnknown(crate::parse_bool(location, val)?) - } else { - return Err(Error::SchemaBadKey(location.clone(), key.into())); - }) - } -} - -#[derive(Debug, Clone)] -struct RuleSet { - location: Location, - r#type: Option<Type>, - maxlength: Option<NonZeroU32>, - min: OptionF64, - max: OptionF64, - default: Option<Arc<str>>, - allow_unknown: Option<bool>, -} - -impl RuleSet { - fn new(location: Location) -> Self { - Self { - location, - r#type: None, - maxlength: None, - min: OptionF64::default(), - max: OptionF64::default(), - default: None, - allow_unknown: None, - } - } -} - -impl RuleSet { - fn add_rule(&mut self, rule: Rule) { - match rule { - Rule::Type(t) => self.r#type = Some(t), - Rule::MaxLength(m) => self.maxlength = Some(m), - Rule::Default(d) => self.default = Some(d), - Rule::AllowUnknown(a) => self.allow_unknown = Some(a), - Rule::Min(m) => self.min = OptionF64(m), - Rule::Max(m) => self.max = OptionF64(m), - } - } - // merge rule sets, giving `self` precedence - fn fall_back_on(&mut self, other: &RuleSet) { - let RuleSet { - location, - allow_unknown, - default, - r#type, - maxlength, - min, - max, - } = other; - if self.location.is_dummy() { - self.location = location.clone(); - } - self.allow_unknown = self.allow_unknown.or(*allow_unknown); - if self.r#type.is_none() { - self.r#type = r#type.clone(); - } - self.min = self.min.or(*min); - self.max = self.max.or(*max); - self.maxlength = self.maxlength.or(*maxlength); - if self.default.is_none() { - self.default = default.clone(); - } - } -} - -/// A POM schema. -/// -/// See the [POM specification](https://www.pom.computer/spec.html) for a description -/// of schemas. -#[allow(dead_code)] // TODO -#[derive(Default)] -pub struct Schema { - rules: Vec<(Box<str>, RuleSet)>, -} - -impl Schema { - /// Load a schema from a file path. - #[cfg(feature = "std")] - pub fn load_path<P: AsRef<std::path::Path>>(path: P) -> Result<Self> { - Self::try_from(&Configuration::load_path(path)?) - } - - /// Load a schema. - /// - /// See [`Configuration::load`] for more information. - pub fn load<R: crate::Read>(filename: &str, reader: R) -> Result<Self> { - Self::try_from(&Configuration::load(filename, reader)?) - } - - fn binary_search_range( - &self, - range: Range<usize>, - key: &str, - ) -> core::result::Result<usize, usize> { - self.rules[range.clone()] - .binary_search_by(|(k, _)| k.as_ref().cmp(key)) - .map(|i| i + range.start) - .map_err(|i| i + range.start) - } - fn get_rule_set_(&self, prefix: &str, key: &str, range: Range<usize>) -> RuleSet { - // definitely overwrought. but it was fun to write. - if range.is_empty() { - return RuleSet::new(Location::dummy()); - } - if let Some((first_component, rest)) = key.split_once('.') { - let exact_key_dot = format!("{prefix}{first_component}."); - // NB: / is the first ASCII character after . - let exact_key_slash = format!("{prefix}{first_component}/"); - let star_key_dot = format!("{prefix}*."); - let star_key_slash = format!("{prefix}*/"); - let exact_keys_start = self - .binary_search_range(range.clone(), &exact_key_dot) - .expect_err("key should not end in ."); - let exact_keys_end = self - .binary_search_range(range.clone(), &exact_key_slash) - .unwrap_or_else(|x| x); - let mut exact_rule_set = - self.get_rule_set_(&exact_key_dot, rest, exact_keys_start..exact_keys_end); - let star_keys_start = self - .binary_search_range(range.clone(), &star_key_dot) - .expect_err("key should not end in ."); - let star_keys_end = self - .binary_search_range(range.clone(), &star_key_slash) - .unwrap_or_else(|x| x); - let star_rule_set = - self.get_rule_set_(&star_key_dot, rest, star_keys_start..star_keys_end); - exact_rule_set.fall_back_on(&star_rule_set); - exact_rule_set - } else { - let mut rule_set = RuleSet::new(Location::dummy()); - if let Ok(exact_key_rule_idx) = - self.binary_search_range(range.clone(), &format!("{prefix}{key}")) - { - rule_set = self.rules[exact_key_rule_idx].1.clone(); - } - if let Ok(star_key_rule_idx) = - self.binary_search_range(range.clone(), &format!("{prefix}*")) - { - rule_set.fall_back_on(&self.rules[star_key_rule_idx].1); - } - rule_set - } - } - fn get_rule_set(&self, key: &str) -> RuleSet { - self.get_rule_set_("", key, 0..self.rules.len()) - } - /// Check that `conf` follows this schema, and fill in default values. - pub fn check_and_fill_defaults(&self, conf: &mut Configuration) -> Result<()> { - let mut errors = vec![]; - for (key, val) in conf.iter() { - let location = || conf.location(key).unwrap(); - let rule_set = self.get_rule_set(key); - if (rule_set.min.is_some() || rule_set.max.is_some()) - && let Ok(val) = crate::parse_float(&Location::dummy(), val) - { - if let Some(min) = rule_set.min.into() - && val < min - { - errors.push(Error::SchemaValueLessThanMin( - location(), - key.into(), - val, - min, - )) - } - if let Some(max) = rule_set.max.into() - && val > max - { - errors.push(Error::SchemaValueGreaterThanMax( - location(), - key.into(), - val, - max, - )); - } - } - if let Some(max_length) = rule_set - .maxlength - .and_then(|x| usize::try_from(x.get()).ok()) - { - if val.len() > max_length { - errors.push(Error::SchemaValueTooLong( - location(), - key.into(), - val.len(), - max_length, - )); - } - } - // TODO: type, allow_unknown - } - // TODO: replace default values, report missing values - crate::check_error_vec(errors) - } -} - -impl TryFrom<&Configuration> for Schema { - type Error = crate::Error; - fn try_from(conf: &Configuration) -> Result<Self> { - let mut errors = vec![]; - let mut rules: Vec<(&str, Location, Rule)> = vec![]; - for (key, val) in conf.iter_sorted() { - let location = conf.location(key).unwrap(); - let rule = match Rule::parse(&location, key, val) { - Ok(r) => r, - Err(e) => { - errors.push(e); - continue; - } - }; - let Some((affected_key, _rule_name)) = key.rsplit_once('.') else { - unreachable!("Rule::parse shouldn't accept this key"); - }; - rules.push((affected_key, location, rule)); - } - let mut rule_sets: Vec<(Box<str>, RuleSet)> = vec![]; - for (key, location, rule) in rules { - if rule_sets.last().is_none_or(|(k, _)| k.as_ref() != key) { - // add new rule set - rule_sets.push((key.into(), RuleSet::new(location))); - } - rule_sets.last_mut().unwrap().1.add_rule(rule); - } - for (_, rule_set) in &rule_sets { - if let Err(e) = rule_set.check_consistency() { - errors.push(e); - } - } - if errors.len() == 1 { - return Err(errors.pop().unwrap()); - } else if !errors.is_empty() { - return Err(Error::Multiple(errors.into())); - } - Ok(Schema { rules: rule_sets }) - } -} |