summaryrefslogtreecommitdiff
path: root/src/animalia.rs
blob: f7651727221c2402b8aa1e69c5e8f460853a7af2 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
use std::collections::HashSet;
use std::error::Error;
use std::io::{self, prelude::*};
pub fn main(args: Vec<String>) -> Result<(), Box<dyn Error>> {
	if !args.is_empty() {
		Err("No arguments expected to 'animalia' command")?;
	}
	let defs_path = "en-definitions.txt";
	let taxa_path = "taxa.txt";
	let taxa_file =
		std::fs::File::open(taxa_path).map_err(|e| format!("couldn't open {taxa_path}: {e}"))?;
	let mut species: HashSet<String> = HashSet::new();
	let mut taxa: HashSet<String> = HashSet::new();
	for line in io::BufReader::new(taxa_file).lines() {
		let line = line.map_err(|e| format!("couldn't read {taxa_path}: {e}"))?;
		let line = line.trim_end_matches(['\r', '\n']);
		if line.contains(' ') {
			species.insert(line.into());
		} else {
			taxa.insert(line.into());
		}
	}
	let definitions =
		std::fs::File::open(defs_path).map_err(|e| format!("couldn't open {defs_path}: {e}"))?;
	let levels: HashSet<&str> = ["kingdom", "phylum", "class", "order", "family", "genus"]
		.into_iter()
		.collect();
	let mut animalia = vec![];
	for line in io::BufReader::new(definitions).lines() {
		let line = line.map_err(|e| format!("error reading {defs_path}: {e}"))?;
		let line = line.trim_end_matches(['\r', '\n']);
		let (word, rest) = line
			.split_once("  ")
			.expect("bad format for definitions file");
		let (_class, definition) = rest
			.split_once(' ')
			.expect("bad format for definitions file");
		//		println!("{word} {definition}");
		let parts: Vec<&str> = definition.split(|c: char| !c.is_alphabetic()).collect();
		for ws in parts.windows(2) {
			if species.contains(&format!("{} {}",ws[0],ws[1])) ||
			// handles {{taxfmt|Felidae|family}} &c.
				levels.contains(ws[1]) && taxa.contains(ws[0])
			{
				animalia.push(word.to_owned());
			}
		}
	}
	animalia.sort_unstable();
	animalia.dedup();
	let output_path = "animalia.txt";
	let mut s = String::new();
	for animal in &animalia {
		s.push_str(animal);
		s.push('\n');
	}
	std::fs::write(output_path, s).map_err(|e| format!("couldn't write {output_path}: {e}"))?;
	Ok(())
}