From 8286a9dc4803dd0165846acfb105f28c662dceb0 Mon Sep 17 00:00:00 2001 From: pommicket Date: Fri, 6 Sep 2024 14:07:56 -0400 Subject: row-limit search --- Cargo.lock | 35 +++++++++++ Cargo.toml | 1 + README.md | 16 ++++- src/main.rs | 191 ++++++++++++++++++++++++++++++++++++++++++++++-------------- 4 files changed, 197 insertions(+), 46 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a5a6de5..3f25e85 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,15 +2,50 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "autocfg" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" + [[package]] name = "bnum" version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3e31ea183f6ee62ac8b8a8cf7feddd766317adfb13ff469de57ce033efd6a790" +[[package]] +name = "num-bigint" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" +dependencies = [ + "num-integer", + "num-traits", +] + +[[package]] +name = "num-integer" +version = "0.1.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + [[package]] name = "pascal" version = "0.1.0" dependencies = [ "bnum", + "num-bigint", ] diff --git a/Cargo.toml b/Cargo.toml index 492faf6..da15ea0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,3 +5,4 @@ edition = "2021" [dependencies] bnum = "0.11.0" +num-bigint = "0.4.6" diff --git a/README.md b/README.md index 6d504d4..050040d 100644 --- a/README.md +++ b/README.md @@ -30,7 +30,7 @@ $$ De Weger did a computer search up to ${n\choose k} < 10^{30}$ in 1995. Now that we have faster computers we can go higher — and we can say for certain that the only solutions -up to ${n\choose k}<10^{41}$ are the ones found by de Weger: namely, the ones in the infinite +up to ${n\choose k}<10^{42}$ are the ones found by de Weger: namely, the ones in the infinite family above, $$ {15 \choose 5} = {14 \choose 6} = 3003 = {78 \choose 2}$$ $$ {104 \choose 39} = {103 \choose 40} = 61218182743304701891431482520$$ @@ -38,10 +38,20 @@ And the “sporadic” solutions: $$ {153 \choose 2} ={19\choose 5} = 11628$$ $$ {221\choose 2}={17 \choose 8}=24310$$ -This program searches up to $10^X$, when given the argument $X$. +This program searches up to ${n\choose k} <10^X$, when given the arguments `entry-limit X`. The search works by putting all ${n\choose k}<10^X$ with $5\leq k\leq \frac n 2$ into an array and sorting it. Then we check for adjacent elements which are equal, and use a binary search to check whether elements in the array are ${n\choose 2},{n\choose 3},{n\choose 4}$. This finds all solutions with $l>4$ (since the solutions with $l\leq 4$ are known). -Unfortunately searching to $10^{41}$ with this method already requires 13 GB of memory. +We can make a slight optimization by just storing the entries mod $2^{64}$ in the array, +then only doing the full comparison on entries who agree mod $2^{64}$. +But still searching to $10^{42}$ with this method already requires 11 GB of memory. + +Using this modular trick we can also search up to the 60,000th row (use arguments `row-limit 60000`), +and (sadly) confirm that the only repeated entries are the ones listed above and the new entries in the infinite family, +$$ {713\choose 273} = {714\choose 272} \approx 3.5 \times 10^{204}$$ +$$ {4894\choose 1870} = {4895\choose 1869} \approx 4.6 \times 10^{1141}$$ +$$ {33551 \choose 12816} = {33552 \choose 12815} \approx 6.0 \times 10^{9687}$$ +Again we run into a memory bottleneck — searching this far required 14 GB of memory. + diff --git a/src/main.rs b/src/main.rs index 8be934d..2d54fc7 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,3 +1,4 @@ +use num_bigint::BigUint; use std::ffi::OsString; use std::process::ExitCode; @@ -54,27 +55,67 @@ fn superscript(number: &str) -> String { .collect() } -fn main() -> ExitCode { - let args: Vec = std::env::args_os().collect(); - if args.len() > 2 { - eprintln!("Please provide at most 1 argument (power of 10 to search up to)."); - return ExitCode::FAILURE; +#[derive(Clone, Copy)] +struct PascalEntry { + // row choose col mod 2^64 + value: u64, + row_lo: u16, + row_hi: u32, + col: u16, +} + +impl PascalEntry { + fn new(value: u64, row: u64, col: u16) -> Self { + assert!(row < (1 << 48)); + Self { + value, + row_lo: row as _, + row_hi: (row >> 16) as _, + col, + } } - let power_of_10: Option = match args.get(1) { - Some(s) => s.clone().into_string().ok().and_then(|x| x.parse().ok()), - None => Some(35), - }; - let Some(power_of_10) = power_of_10 else { - eprintln!("Argument must be a nonnegative integer"); - return ExitCode::FAILURE; - }; - if power_of_10 > usize::MAX / 4 - || (power_of_10 as f64 * f64::log2(10.0)) as usize + 10 > size_of::() * 8 - { - eprintln!("Power of 10 is too large for integer type. You will have to increase the size of UInt in the source code."); - return ExitCode::FAILURE; + fn row(self) -> u64 { + u64::from(self.row_lo) | u64::from(self.row_hi) << 16 } + fn full_value(self) -> BigUint { + let row: u64 = self.row(); + let col: u64 = self.col.into(); + let mut value = BigUint::from(1u8); + for r in row - col + 1..=row { + value *= BigUint::from(r); + } + for r in 1..=col { + value /= BigUint::from(r); + } + value + } +} + +fn find_duplicates_in(entries: &mut [PascalEntry]) { + entries.sort_by_key(|x| x.value); + for e in entries.chunk_by(|a, b| a.value == b.value) { + if e.len() == 1 { + continue; + } + for i in 0..e.len() { + for j in i + 1..e.len() { + if e[i].full_value() != e[j].full_value() { + continue; + } + println!( + "({} choose {}) = {} = ({} choose {})", + e[i].row(), + e[i].col, + e[i].full_value(), + e[j].row(), + e[j].col + ); + } + } + } +} +fn search_entry_limit(power_of_10: usize) { let mut pascal_row = [UInt::from(0u8); 500]; let mut range = pascal_row.len(); println!( @@ -82,44 +123,108 @@ fn main() -> ExitCode { superscript(&format!("{power_of_10}")) ); let limit = UInt::from(10u8).pow(power_of_10 as u32); - let mut numbers: Vec = Vec::new(); + let mut entries: Vec = vec![]; pascal_row[0] = UInt::from(1u8); - for row in 1u128.. { - for i in (1..range).rev() { - pascal_row[i] += pascal_row[i - 1]; - if i > 4 && i as u128 <= row / 2 { - if is_choose_r(pascal_row[i], 2) - || is_choose_r(pascal_row[i], 3) - || is_choose_r(pascal_row[i], 4) { - println!("{}",pascal_row[i]); + for row in 1u64.. { + for col in (1..range).rev() { + pascal_row[col] += pascal_row[col - 1]; + if col > 4 && col as u64 <= row / 2 { + if is_choose_r(pascal_row[col], 2) + || is_choose_r(pascal_row[col], 3) + || is_choose_r(pascal_row[col], 4) + { + println!("FOUND DUPLICATE {}", pascal_row[col]); } - numbers.push(pascal_row[i]); + entries.push(PascalEntry::new( + pascal_row[col].digits()[0], + row, + col.try_into().expect("needs redesign: col > 65535"), + )); } - if pascal_row[i] > limit { - range = i; + if pascal_row[col] > limit { + range = col; + if col < 10 { + println!("n choose {col} exceeds limit at row {row}"); + } } } - if range <= 4 { + if range <= 5 { break; } } println!( "memory needed = {}MiB", - (numbers.len() * size_of::()) >> 20 + (entries.len() * size_of::()) >> 20 + ); + find_duplicates_in(&mut entries); +} + +fn search_row_limit(row_limit: u32) { + let row_limit = u64::from(row_limit); + let mut pascal_row = vec![0u64; row_limit as usize / 2 + 1]; + pascal_row[0] = 1; + let mut entries: Vec = vec![]; + for row in 0..row_limit { + if row > 0 && row % 2 == 0 { + pascal_row[row as usize / 2] = pascal_row[row as usize / 2 - 1].wrapping_mul(2); + } + for col in (2..=(std::cmp::max(3, row) - 1) / 2).rev() { + pascal_row[col as usize] = + pascal_row[col as usize].wrapping_add(pascal_row[col as usize - 1]); + entries.push(PascalEntry::new(pascal_row[col as usize], row, col as u16)); + } + pascal_row[1] = row; + } + println!( + "memory needed = {}MiB", + (entries.len() * size_of::()) >> 20 ); - numbers.sort(); - let mut prev = UInt::from(0u8); - let mut occurrences = 0; - for n in numbers { - if n == prev { - occurrences += 1; - } else if occurrences > 0 { - if occurrences > 1 { - println!("{prev}: {occurrences}"); + find_duplicates_in(&mut entries); +} + +fn main() -> ExitCode { + let args: Vec = std::env::args_os().collect(); + if args.len() < 2 { + eprintln!(" Usage: pascal entry-limit

— search all entries up to 10^p"); + eprintln!(" pascal row-limit — search all entries up to the nth row"); + return ExitCode::FAILURE; + } + match args[1].as_os_str().to_str() { + Some("entry-limit") => { + let power_of_10: Option = match args.get(2) { + Some(s) => s.clone().into_string().ok().and_then(|x| x.parse().ok()), + None => Some(35), + }; + let Some(power_of_10) = power_of_10 else { + eprintln!("Argument must be a nonnegative integer"); + return ExitCode::FAILURE; + }; + if power_of_10 > usize::MAX / 4 + || (power_of_10 as f64 * f64::log2(10.0)) as usize + 10 > size_of::() * 8 + { + eprintln!("Power of 10 is too large for integer type. You will have to increase the size of UInt in the source code."); + return ExitCode::FAILURE; + } + search_entry_limit(power_of_10); + } + Some("row-limit") => { + let row_limit: Option = match args.get(2) { + Some(s) => s.clone().into_string().ok().and_then(|x| x.parse().ok()), + None => Some(1000), + }; + let Some(row_limit) = row_limit else { + eprintln!("Argument must be a nonnegative integer"); + return ExitCode::FAILURE; + }; + if row_limit > 0xffff * 2 { + eprintln!("row limit too large (need to change PascalEntry type)"); + return ExitCode::FAILURE; } - occurrences = 1; + search_row_limit(row_limit); + } + _ => { + eprintln!("Bad command: {:?}", args[1]); } - prev = n; } ExitCode::SUCCESS } -- cgit v1.2.3