7 files changed, 69 insertions, 11 deletions
diff --git a/.gitignore b/.gitignore
index 811af54..b2d5af0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,7 @@
 /target
 enwiktionary-*.xml-p*
 *definitions.txt*
+animalia.txt*
 .*.tmp
 *~
 .vscode
diff --git a/Cargo.lock b/Cargo.lock
index 213875d..c8519f5 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -3,7 +3,7 @@
 version = 4
 
 [[package]]
-name = "wicopy"
+name = "wiktionary"
 version = "0.1.0"
 dependencies = [
  "xml",
diff --git a/Cargo.toml b/Cargo.toml
index 2a9267e..8361bae 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,5 +1,5 @@
 [package]
-name = "wicopy"
+name = "wiktionary"
 version = "0.1.0"
 edition = "2024"
 
diff --git a/README.md b/README.md
index ecd34b4..92ef4dc 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-# wicopy
+# wiktionary
 
 Various miscellaneous scripts for parsing [wiktionary data dumps](https://dumps.wikimedia.org/enwiktionary/).
 
diff --git a/index.html b/index.html
index 306087f..8dff7cd 100644
--- a/index.html
+++ b/index.html
@@ -22,11 +22,6 @@
 	</p>
 	<ul>
 		<li>
-			The Big List: <a href="/tmt/word-list.txt.xz">word-list.txt.xz (27MB compressed, 120MB uncompressed, 9,878,558 entries)</a>.¹<br>
-			Every English Wikipedia article title &amp; entry in English Wiktionary; containing only ASCII a-z/A-Z/space, max 2 words.<br>
-			Words labelled <i>offensive</i> on Wiktionary were filtered out (overly aggressively—some totally inoffensive words were removed in the process).
-		</li>
-		<li>
 			English definitions:
 			<a href="/wiktionary/en-definitions.txt.xz">en-definitions.txt.xz (23MB compressed, 127MB uncompressed, 1,629,482 entries)</a>
 			and<br>Translingual definitions:
@@ -68,6 +63,16 @@
 			<code>DEFINITION</code> is in the wikitext format.<br>
 			It’s possible that there are parsing errors, but I haven’t spotted any yet.
 		</li>
+		<li>
+			All English animal terms: <a href="/wiktionary/animalia.txt.xz">animalia.txt.xz (62KB compressed, 192KB uncompressed)</a>.¹<br>
+			This includes both nouns referring to animals (e.g. <i>dog</i>) and animal-related adjectives (e.g. <i>canine</i>).
+			There could definitely be errors due to bad parsing (but I have checked a number of entries at random and they seem good).
+		</li>
+		<li>
+			The Big List: <a href="/tmt/word-list.txt.xz">word-list.txt.xz (27MB compressed, 120MB uncompressed, 9,878,558 entries)</a>.¹<br>
+			Every English Wikipedia article title &amp; entry in English Wiktionary; containing only ASCII a-z/A-Z/space, max 2 words.<br>
+			Words labelled <i>offensive</i> on Wiktionary were filtered out (overly aggressively—some totally inoffensive words were removed in the process).
+		</li>
 	</ul>
 	<p>¹ Derived from <a href="https://dumps.wikimedia.org/enwiktionary/20250701/" target="_blank">enwiktionary-20250701</a> dump.</p>
 </body>
diff --git a/src/animalia.rs b/src/animalia.rs
index f3b3261..e1ec8eb 100644
--- a/src/animalia.rs
+++ b/src/animalia.rs
@@ -1,9 +1,60 @@
 use std::error::Error;
-
+use std::io::{self, prelude::*};
+use std::collections::HashSet;
 pub fn main(args: Vec<String>) -> Result<(), Box<dyn Error>> {
 	if !args.is_empty() {
 		Err("No arguments expected to 'animalia' command")?;
 	}
-	
+	let defs_path = "en-definitions.txt";
+	let taxa_path = "taxa.txt";
+	let taxa_file = std::fs::File::open(taxa_path)
+		.map_err(|e| format!("couldn't open {taxa_path}: {e}"))?;
+	let mut species: HashSet<String> = HashSet::new();
+	let mut taxa: HashSet<String> = HashSet::new();
+	for line in io::BufReader::new(taxa_file).lines() {
+		let line = line.map_err(|e| format!("couldn't read {taxa_path}: {e}"))?;
+		let line = line.trim_end_matches(['\r', '\n']);
+		if line.contains(' ') {
+			species.insert(line.into());
+		} else {
+			taxa.insert(line.into());
+		}
+	}
+	let definitions = std::fs::File::open(defs_path)
+		.map_err(|e| format!("couldn't open {defs_path}: {e}"))?;
+	let levels: HashSet<&str> = [
+		"kingdom",
+		"phylum",
+		"class",
+		"order",
+		"family",
+		"genus",
+	].into_iter().collect();
+	let mut animalia = vec![];
+	for line in io::BufReader::new(definitions).lines() {
+		let line = line.map_err(|e| format!("error reading {defs_path}: {e}"))?;
+		let line = line.trim_end_matches(['\r', '\n']);
+		let (word, rest) = line.split_once("  ").expect("bad format for definitions file");
+		let (_class, definition) = rest.split_once(' ').expect("bad format for definitions file");
+//		println!("{word} {definition}");
+		let parts: Vec<&str> = definition.split(|c: char| !c.is_alphabetic()).collect();
+		for ws in parts.windows(2) {
+			if species.contains(&format!("{} {}",ws[0],ws[1])) ||
+			// handles {{taxfmt|Felidae|family}} &c.
+				levels.contains(ws[1]) && taxa.contains(ws[0]) {
+				animalia.push(word.to_owned());
+			}
+		}
+	}
+	animalia.sort_unstable();
+	animalia.dedup();
+	let output_path = "animalia.txt";
+	let mut s = String::new();
+	for animal in &animalia {
+		s.push_str(animal);
+		s.push('\n');
+	}
+	std::fs::write(output_path, s)
+		.map_err(|e| format!("couldn't write {output_path}: {e}"))?;
 	Ok(())
 }
diff --git a/src/main.rs b/src/main.rs
index 0fcc3d6..8ca45fb 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -25,7 +25,8 @@ fn try_main() -> Result<(), Box<dyn Error>> {
 	let mut args = std::env::args_os().skip(1);
 	let command = args.next();
 	let no_command = "No command specified. Commands available:
-- definitions";
+- definitions
+- animalia";
 	let Some(command) = command else {
 		return Err(no_command.into());
 	};