1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
|
use std::error::Error;
use std::io::{self, prelude::*};
use std::collections::HashSet;
pub fn main(args: Vec<String>) -> Result<(), Box<dyn Error>> {
if !args.is_empty() {
Err("No arguments expected to 'animalia' command")?;
}
let defs_path = "en-definitions.txt";
let taxa_path = "taxa.txt";
let taxa_file = std::fs::File::open(taxa_path)
.map_err(|e| format!("couldn't open {taxa_path}: {e}"))?;
let mut species: HashSet<String> = HashSet::new();
let mut taxa: HashSet<String> = HashSet::new();
for line in io::BufReader::new(taxa_file).lines() {
let line = line.map_err(|e| format!("couldn't read {taxa_path}: {e}"))?;
let line = line.trim_end_matches(['\r', '\n']);
if line.contains(' ') {
species.insert(line.into());
} else {
taxa.insert(line.into());
}
}
let definitions = std::fs::File::open(defs_path)
.map_err(|e| format!("couldn't open {defs_path}: {e}"))?;
let levels: HashSet<&str> = [
"kingdom",
"phylum",
"class",
"order",
"family",
"genus",
].into_iter().collect();
let mut animalia = vec![];
for line in io::BufReader::new(definitions).lines() {
let line = line.map_err(|e| format!("error reading {defs_path}: {e}"))?;
let line = line.trim_end_matches(['\r', '\n']);
let (word, rest) = line.split_once(" ").expect("bad format for definitions file");
let (_class, definition) = rest.split_once(' ').expect("bad format for definitions file");
// println!("{word} {definition}");
let parts: Vec<&str> = definition.split(|c: char| !c.is_alphabetic()).collect();
for ws in parts.windows(2) {
if species.contains(&format!("{} {}",ws[0],ws[1])) ||
// handles {{taxfmt|Felidae|family}} &c.
levels.contains(ws[1]) && taxa.contains(ws[0]) {
animalia.push(word.to_owned());
}
}
}
animalia.sort_unstable();
animalia.dedup();
let output_path = "animalia.txt";
let mut s = String::new();
for animal in &animalia {
s.push_str(animal);
s.push('\n');
}
std::fs::write(output_path, s)
.map_err(|e| format!("couldn't write {output_path}: {e}"))?;
Ok(())
}
|