From 6613f3fd45b4ecf6821ee7bb07c95f86f43b0db2 Mon Sep 17 00:00:00 2001 From: John Fultz Date: Thu, 20 Aug 2015 04:59:36 -0500 Subject: Move DAWG generation into quackleio. Same thing I just did for the gaddag code I'm now doing for the dawg code. While I was at it, I made some improvements to the dawg code... * Instead of adding multiple cross-checks for various kinds of node metadata, there's now only one cross- check...a hash applied to each node. * Some useless variables/members have been excised. * Add ability to do a word count (cryptohash coming soon). * Make it possible to call generate() and writeIndex() multiple times without corrupting the dictionary. --- makeminidawg/makeminidawgmain.cpp | 124 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 119 insertions(+), 5 deletions(-) (limited to 'makeminidawg/makeminidawgmain.cpp') diff --git a/makeminidawg/makeminidawgmain.cpp b/makeminidawg/makeminidawgmain.cpp index 82871be..89afb68 100644 --- a/makeminidawg/makeminidawgmain.cpp +++ b/makeminidawg/makeminidawgmain.cpp @@ -16,16 +16,130 @@ * along with this program. If not, see . */ -#include -#include +#include +#include -#include "minidawgmaker.h" +#include "quackleio/dawgfactory.h" +#include "quackleio/froggetopt.h" +#include "quackleio/util.h" + +std::map< QString, bool> smallerMap; +std::map< QString, int> playabilityMap; int main(int argc, char **argv) { QCoreApplication a(argc, argv); - MiniDawgMaker maker; - return maker.executeFromArguments(); + GetOpt opts; + QString alphabet; + opts.addOption('a', "alphabet", &alphabet); + if (!opts.parse()) + return 1; + + if (alphabet.isNull()) + alphabet = "english"; + + QString alphabetFile = QString("../data/alphabets/%1.quackle_alphabet").arg(alphabet); + UVcout << "Using alphabet file: " << QuackleIO::Util::qstringToString(alphabetFile) << endl; + + DawgFactory factory(alphabetFile); + + + QString smallerDictFilename = "smaller.raw"; + QFile smallerDict(smallerDictFilename); + if (!smallerDict.exists()) + { + UVcout << "smaller dictionary does not exist: " << QuackleIO::Util::qstringToString(smallerDictFilename) << endl; + return false; + } + + if (!smallerDict.open(QIODevice::ReadOnly | QIODevice::Text)) + { + UVcout << "Could not open " << QuackleIO::Util::qstringToString(smallerDictFilename) << endl; + return false; + } + + QTextStream smallerStream(&smallerDict); + smallerStream.setCodec(QTextCodec::codecForName("UTF-8")); + + while (!smallerStream.atEnd()) + { + QString originalQString; + smallerStream >> originalQString; + //UVcout << "this word is in the smaller dictionary: " << QuackleIO::Util::qstringToString(originalQString) << endl; + smallerMap[originalQString] = true; + } + + QString playabilityFilename = "playabilities.raw"; + QFile playability(playabilityFilename); + if (!playability.exists()) + { + UVcout << "playability does not exist: " << QuackleIO::Util::qstringToString(playabilityFilename) << endl; + return false; + } + + if (!playability.open(QIODevice::ReadOnly | QIODevice::Text)) + { + UVcout << "Could not open " << QuackleIO::Util::qstringToString(playabilityFilename) << endl; + return false; + } + + QTextStream playabilityStream(&playability); + playabilityStream.setCodec(QTextCodec::codecForName("UTF-8")); + + while (!playabilityStream.atEnd()) + { + int pb; + playabilityStream >> pb; + QString originalQString; + playabilityStream >> originalQString; + //UVcout << "playability: " << QuackleIO::Util::qstringToString(originalQString) << " " << pb << endl; + playabilityMap[originalQString] = pb; + } + + QString dawgFilename = "dawginput.raw"; + QFile file(dawgFilename); + if (!file.exists()) + { + UVcout << "dawg does not exist: " << QuackleIO::Util::qstringToString(dawgFilename) << endl; + return false; + } + + if (!file.open(QIODevice::ReadOnly | QIODevice::Text)) + { + UVcout << "Could not open " << QuackleIO::Util::qstringToString(dawgFilename) << endl; + return false; + } + + QTextStream stream(&file); + stream.setCodec(QTextCodec::codecForName("UTF-8")); + + while (!stream.atEnd()) + { + QString word; + stream >> word; + + bool inSmaller = smallerMap[word]; + int pb = playabilityMap[word]; + + if (stream.atEnd()) + break; + + if (!factory.pushWord(word, inSmaller, pb)) + UVcout << "not encodable without leftover: " << QuackleIO::Util::qstringToString(word) << endl; + } + + file.close(); + + UVcout << "encodable words: " << factory.encodableWords() << ", unencodable words: " << factory.unencodableWords() << endl; + + UVcout << "nodelist.size(): " << factory.nodeCount() << endl; + + factory.generate(); + UVcout << "Compressed nodelist.size(): " << factory.nodeCount() << endl; + + factory.writeIndex("output.dawg"); + + return 0; } -- cgit v1.2.3