summaryrefslogtreecommitdiff
path: root/src/animalia.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/animalia.rs')
-rw-r--r--src/animalia.rs55
1 files changed, 53 insertions, 2 deletions
diff --git a/src/animalia.rs b/src/animalia.rs
index f3b3261..e1ec8eb 100644
--- a/src/animalia.rs
+++ b/src/animalia.rs
@@ -1,9 +1,60 @@
use std::error::Error;
-
+use std::io::{self, prelude::*};
+use std::collections::HashSet;
pub fn main(args: Vec<String>) -> Result<(), Box<dyn Error>> {
if !args.is_empty() {
Err("No arguments expected to 'animalia' command")?;
}
-
+ let defs_path = "en-definitions.txt";
+ let taxa_path = "taxa.txt";
+ let taxa_file = std::fs::File::open(taxa_path)
+ .map_err(|e| format!("couldn't open {taxa_path}: {e}"))?;
+ let mut species: HashSet<String> = HashSet::new();
+ let mut taxa: HashSet<String> = HashSet::new();
+ for line in io::BufReader::new(taxa_file).lines() {
+ let line = line.map_err(|e| format!("couldn't read {taxa_path}: {e}"))?;
+ let line = line.trim_end_matches(['\r', '\n']);
+ if line.contains(' ') {
+ species.insert(line.into());
+ } else {
+ taxa.insert(line.into());
+ }
+ }
+ let definitions = std::fs::File::open(defs_path)
+ .map_err(|e| format!("couldn't open {defs_path}: {e}"))?;
+ let levels: HashSet<&str> = [
+ "kingdom",
+ "phylum",
+ "class",
+ "order",
+ "family",
+ "genus",
+ ].into_iter().collect();
+ let mut animalia = vec![];
+ for line in io::BufReader::new(definitions).lines() {
+ let line = line.map_err(|e| format!("error reading {defs_path}: {e}"))?;
+ let line = line.trim_end_matches(['\r', '\n']);
+ let (word, rest) = line.split_once(" ").expect("bad format for definitions file");
+ let (_class, definition) = rest.split_once(' ').expect("bad format for definitions file");
+// println!("{word} {definition}");
+ let parts: Vec<&str> = definition.split(|c: char| !c.is_alphabetic()).collect();
+ for ws in parts.windows(2) {
+ if species.contains(&format!("{} {}",ws[0],ws[1])) ||
+ // handles {{taxfmt|Felidae|family}} &c.
+ levels.contains(ws[1]) && taxa.contains(ws[0]) {
+ animalia.push(word.to_owned());
+ }
+ }
+ }
+ animalia.sort_unstable();
+ animalia.dedup();
+ let output_path = "animalia.txt";
+ let mut s = String::new();
+ for animal in &animalia {
+ s.push_str(animal);
+ s.push('\n');
+ }
+ std::fs::write(output_path, s)
+ .map_err(|e| format!("couldn't write {output_path}: {e}"))?;
Ok(())
}