From 8286a9dc4803dd0165846acfb105f28c662dceb0 Mon Sep 17 00:00:00 2001
From: pommicket <pommicket@gmail.com>
Date: Fri, 6 Sep 2024 14:07:56 -0400
Subject: row-limit search

---
 Cargo.lock  |  35 +++++++++++
 Cargo.toml  |   1 +
 README.md   |  16 ++++-
 src/main.rs | 191 ++++++++++++++++++++++++++++++++++++++++++++++--------------
 4 files changed, 197 insertions(+), 46 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index a5a6de5..3f25e85 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2,15 +2,50 @@
 # It is not intended for manual editing.
 version = 3
 
+[[package]]
+name = "autocfg"
+version = "1.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0"
+
 [[package]]
 name = "bnum"
 version = "0.11.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "3e31ea183f6ee62ac8b8a8cf7feddd766317adfb13ff469de57ce033efd6a790"
 
+[[package]]
+name = "num-bigint"
+version = "0.4.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9"
+dependencies = [
+ "num-integer",
+ "num-traits",
+]
+
+[[package]]
+name = "num-integer"
+version = "0.1.46"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f"
+dependencies = [
+ "num-traits",
+]
+
+[[package]]
+name = "num-traits"
+version = "0.2.19"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841"
+dependencies = [
+ "autocfg",
+]
+
 [[package]]
 name = "pascal"
 version = "0.1.0"
 dependencies = [
  "bnum",
+ "num-bigint",
 ]
diff --git a/Cargo.toml b/Cargo.toml
index 492faf6..da15ea0 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -5,3 +5,4 @@ edition = "2021"
 
 [dependencies]
 bnum = "0.11.0"
+num-bigint = "0.4.6"
diff --git a/README.md b/README.md
index 6d504d4..050040d 100644
--- a/README.md
+++ b/README.md
@@ -30,7 +30,7 @@ $$
 
 De Weger did a computer search up to ${n\choose k} < 10^{30}$ in 1995. Now that we have
 faster computers we can go higher — and we can say for certain that the only solutions
-up to ${n\choose k}<10^{41}$ are the ones found by de Weger: namely, the ones in the infinite
+up to ${n\choose k}<10^{42}$ are the ones found by de Weger: namely, the ones in the infinite
 family above,
 $$ {15 \choose 5} = {14 \choose 6} = 3003 = {78 \choose 2}$$
 $$ {104 \choose 39} = {103 \choose 40} = 61218182743304701891431482520$$
@@ -38,10 +38,20 @@ And the “sporadic” solutions:
 $$ {153 \choose 2} ={19\choose 5} = 11628$$
 $$ {221\choose 2}={17 \choose 8}=24310$$
 
-This program searches up to $10^X$, when given the argument $X$.
+This program searches up to ${n\choose k} <10^X$, when given the arguments `entry-limit X`.
 The search works by putting all ${n\choose k}<10^X$ with $5\leq k\leq \frac n 2$ into
 an array and sorting it. Then we check for adjacent elements which are equal,
 and use a binary search to check whether elements in the array are ${n\choose 2},{n\choose 3},{n\choose 4}$.
 This finds all solutions with $l>4$ (since the solutions with $l\leq 4$ are known).
 
-Unfortunately searching to $10^{41}$ with this method already requires 13 GB of memory.
+We can make a slight optimization by just storing the entries mod $2^{64}$ in the array,
+then only doing the full comparison on entries who agree mod $2^{64}$.
+But still searching to $10^{42}$ with this method already requires 11 GB of memory.
+
+Using this modular trick we can also search up to the 60,000th row (use arguments `row-limit 60000`),
+and (sadly) confirm that the only repeated entries are the ones listed above and the new entries in the infinite family,
+$$ {713\choose 273} = {714\choose 272} \approx 3.5 \times 10^{204}$$
+$$ {4894\choose 1870} = {4895\choose 1869} \approx 4.6 \times 10^{1141}$$
+$$ {33551 \choose 12816} = {33552 \choose 12815} \approx 6.0 \times 10^{9687}$$
+Again we run into a memory bottleneck — searching this far required 14 GB of memory.
+
diff --git a/src/main.rs b/src/main.rs
index 8be934d..2d54fc7 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,3 +1,4 @@
+use num_bigint::BigUint;
 use std::ffi::OsString;
 use std::process::ExitCode;
 
@@ -54,27 +55,67 @@ fn superscript(number: &str) -> String {
 		.collect()
 }
 
-fn main() -> ExitCode {
-	let args: Vec<OsString> = std::env::args_os().collect();
-	if args.len() > 2 {
-		eprintln!("Please provide at most 1 argument (power of 10 to search up to).");
-		return ExitCode::FAILURE;
+#[derive(Clone, Copy)]
+struct PascalEntry {
+	// row choose col mod 2^64
+	value: u64,
+	row_lo: u16,
+	row_hi: u32,
+	col: u16,
+}
+
+impl PascalEntry {
+	fn new(value: u64, row: u64, col: u16) -> Self {
+		assert!(row < (1 << 48));
+		Self {
+			value,
+			row_lo: row as _,
+			row_hi: (row >> 16) as _,
+			col,
+		}
 	}
-	let power_of_10: Option<usize> = match args.get(1) {
-		Some(s) => s.clone().into_string().ok().and_then(|x| x.parse().ok()),
-		None => Some(35),
-	};
-	let Some(power_of_10) = power_of_10 else {
-		eprintln!("Argument must be a nonnegative integer");
-		return ExitCode::FAILURE;
-	};
-	if power_of_10 > usize::MAX / 4
-		|| (power_of_10 as f64 * f64::log2(10.0)) as usize + 10 > size_of::<UInt>() * 8
-	{
-		eprintln!("Power of 10 is too large for integer type. You will have to increase the size of UInt in the source code.");
-		return ExitCode::FAILURE;
+	fn row(self) -> u64 {
+		u64::from(self.row_lo) | u64::from(self.row_hi) << 16
 	}
+	fn full_value(self) -> BigUint {
+		let row: u64 = self.row();
+		let col: u64 = self.col.into();
+		let mut value = BigUint::from(1u8);
+		for r in row - col + 1..=row {
+			value *= BigUint::from(r);
+		}
+		for r in 1..=col {
+			value /= BigUint::from(r);
+		}
+		value
+	}
+}
+
+fn find_duplicates_in(entries: &mut [PascalEntry]) {
+	entries.sort_by_key(|x| x.value);
+	for e in entries.chunk_by(|a, b| a.value == b.value) {
+		if e.len() == 1 {
+			continue;
+		}
+		for i in 0..e.len() {
+			for j in i + 1..e.len() {
+				if e[i].full_value() != e[j].full_value() {
+					continue;
+				}
+				println!(
+					"({} choose {}) = {} = ({} choose {})",
+					e[i].row(),
+					e[i].col,
+					e[i].full_value(),
+					e[j].row(),
+					e[j].col
+				);
+			}
+		}
+	}
+}
 
+fn search_entry_limit(power_of_10: usize) {
 	let mut pascal_row = [UInt::from(0u8); 500];
 	let mut range = pascal_row.len();
 	println!(
@@ -82,44 +123,108 @@ fn main() -> ExitCode {
 		superscript(&format!("{power_of_10}"))
 	);
 	let limit = UInt::from(10u8).pow(power_of_10 as u32);
-	let mut numbers: Vec<UInt> = Vec::new();
+	let mut entries: Vec<PascalEntry> = vec![];
 	pascal_row[0] = UInt::from(1u8);
-	for row in 1u128.. {
-		for i in (1..range).rev() {
-			pascal_row[i] += pascal_row[i - 1];
-			if i > 4 && i as u128 <= row / 2 {
-				if is_choose_r(pascal_row[i], 2)
-					|| is_choose_r(pascal_row[i], 3)
-					|| is_choose_r(pascal_row[i], 4) {
-					println!("{}",pascal_row[i]);
+	for row in 1u64.. {
+		for col in (1..range).rev() {
+			pascal_row[col] += pascal_row[col - 1];
+			if col > 4 && col as u64 <= row / 2 {
+				if is_choose_r(pascal_row[col], 2)
+					|| is_choose_r(pascal_row[col], 3)
+					|| is_choose_r(pascal_row[col], 4)
+				{
+					println!("FOUND DUPLICATE {}", pascal_row[col]);
 				}
-				numbers.push(pascal_row[i]);
+				entries.push(PascalEntry::new(
+					pascal_row[col].digits()[0],
+					row,
+					col.try_into().expect("needs redesign: col > 65535"),
+				));
 			}
-			if pascal_row[i] > limit {
-				range = i;
+			if pascal_row[col] > limit {
+				range = col;
+				if col < 10 {
+					println!("n choose {col} exceeds limit at row {row}");
+				}
 			}
 		}
-		if range <= 4 {
+		if range <= 5 {
 			break;
 		}
 	}
 	println!(
 		"memory needed = {}MiB",
-		(numbers.len() * size_of::<UInt>()) >> 20
+		(entries.len() * size_of::<PascalEntry>()) >> 20
+	);
+	find_duplicates_in(&mut entries);
+}
+
+fn search_row_limit(row_limit: u32) {
+	let row_limit = u64::from(row_limit);
+	let mut pascal_row = vec![0u64; row_limit as usize / 2 + 1];
+	pascal_row[0] = 1;
+	let mut entries: Vec<PascalEntry> = vec![];
+	for row in 0..row_limit {
+		if row > 0 && row % 2 == 0 {
+			pascal_row[row as usize / 2] = pascal_row[row as usize / 2 - 1].wrapping_mul(2);
+		}
+		for col in (2..=(std::cmp::max(3, row) - 1) / 2).rev() {
+			pascal_row[col as usize] =
+				pascal_row[col as usize].wrapping_add(pascal_row[col as usize - 1]);
+			entries.push(PascalEntry::new(pascal_row[col as usize], row, col as u16));
+		}
+		pascal_row[1] = row;
+	}
+	println!(
+		"memory needed = {}MiB",
+		(entries.len() * size_of::<PascalEntry>()) >> 20
 	);
-	numbers.sort();
-	let mut prev = UInt::from(0u8);
-	let mut occurrences = 0;
-	for n in numbers {
-		if n == prev {
-			occurrences += 1;
-		} else if occurrences > 0 {
-			if occurrences > 1 {
-				println!("{prev}: {occurrences}");
+	find_duplicates_in(&mut entries);
+}
+
+fn main() -> ExitCode {
+	let args: Vec<OsString> = std::env::args_os().collect();
+	if args.len() < 2 {
+		eprintln!(" Usage: pascal entry-limit <p>  — search all entries up to 10^p");
+		eprintln!("        pascal row-limit <n>    — search all entries up to the nth row");
+		return ExitCode::FAILURE;
+	}
+	match args[1].as_os_str().to_str() {
+		Some("entry-limit") => {
+			let power_of_10: Option<usize> = match args.get(2) {
+				Some(s) => s.clone().into_string().ok().and_then(|x| x.parse().ok()),
+				None => Some(35),
+			};
+			let Some(power_of_10) = power_of_10 else {
+				eprintln!("Argument must be a nonnegative integer");
+				return ExitCode::FAILURE;
+			};
+			if power_of_10 > usize::MAX / 4
+				|| (power_of_10 as f64 * f64::log2(10.0)) as usize + 10 > size_of::<UInt>() * 8
+			{
+				eprintln!("Power of 10 is too large for integer type. You will have to increase the size of UInt in the source code.");
+				return ExitCode::FAILURE;
+			}
+			search_entry_limit(power_of_10);
+		}
+		Some("row-limit") => {
+			let row_limit: Option<u32> = match args.get(2) {
+				Some(s) => s.clone().into_string().ok().and_then(|x| x.parse().ok()),
+				None => Some(1000),
+			};
+			let Some(row_limit) = row_limit else {
+				eprintln!("Argument must be a nonnegative integer");
+				return ExitCode::FAILURE;
+			};
+			if row_limit > 0xffff * 2 {
+				eprintln!("row limit too large (need to change PascalEntry type)");
+				return ExitCode::FAILURE;
 			}
-			occurrences = 1;
+			search_row_limit(row_limit);
+		}
+		_ => {
+			eprintln!("Bad command: {:?}", args[1]);
 		}
-		prev = n;
 	}
 	ExitCode::SUCCESS
 }
-- 
cgit v1.2.3