diff options
author | John Fultz <jfultz@wolfram.com> | 2015-08-20 04:59:36 -0500 |
---|---|---|
committer | John Fultz <jfultz@wolfram.com> | 2015-08-20 04:59:36 -0500 |
commit | 6613f3fd45b4ecf6821ee7bb07c95f86f43b0db2 (patch) | |
tree | 98eb783e05100250cceba7fc4c56ddfdaaf0aba1 /makeminidawg/makeminidawgmain.cpp | |
parent | 4ef5b33708a4ff0435d5c8254b860cd03a264c66 (diff) |
Move DAWG generation into quackleio.
Same thing I just did for the gaddag code I'm now doing
for the dawg code. While I was at it, I made some
improvements to the dawg code...
* Instead of adding multiple cross-checks for various
kinds of node metadata, there's now only one cross-
check...a hash applied to each node.
* Some useless variables/members have been excised.
* Add ability to do a word count (cryptohash coming soon).
* Make it possible to call generate() and writeIndex()
multiple times without corrupting the dictionary.
Diffstat (limited to 'makeminidawg/makeminidawgmain.cpp')
-rw-r--r-- | makeminidawg/makeminidawgmain.cpp | 124 |
1 files changed, 119 insertions, 5 deletions
diff --git a/makeminidawg/makeminidawgmain.cpp b/makeminidawg/makeminidawgmain.cpp index 82871be..89afb68 100644 --- a/makeminidawg/makeminidawgmain.cpp +++ b/makeminidawg/makeminidawgmain.cpp @@ -16,16 +16,130 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ -#include <QCoreApplication> -#include <QStringList> +#include <map> +#include <QtCore> -#include "minidawgmaker.h" +#include "quackleio/dawgfactory.h" +#include "quackleio/froggetopt.h" +#include "quackleio/util.h" + +std::map< QString, bool> smallerMap; +std::map< QString, int> playabilityMap; int main(int argc, char **argv) { QCoreApplication a(argc, argv); - MiniDawgMaker maker; - return maker.executeFromArguments(); + GetOpt opts; + QString alphabet; + opts.addOption('a', "alphabet", &alphabet); + if (!opts.parse()) + return 1; + + if (alphabet.isNull()) + alphabet = "english"; + + QString alphabetFile = QString("../data/alphabets/%1.quackle_alphabet").arg(alphabet); + UVcout << "Using alphabet file: " << QuackleIO::Util::qstringToString(alphabetFile) << endl; + + DawgFactory factory(alphabetFile); + + + QString smallerDictFilename = "smaller.raw"; + QFile smallerDict(smallerDictFilename); + if (!smallerDict.exists()) + { + UVcout << "smaller dictionary does not exist: " << QuackleIO::Util::qstringToString(smallerDictFilename) << endl; + return false; + } + + if (!smallerDict.open(QIODevice::ReadOnly | QIODevice::Text)) + { + UVcout << "Could not open " << QuackleIO::Util::qstringToString(smallerDictFilename) << endl; + return false; + } + + QTextStream smallerStream(&smallerDict); + smallerStream.setCodec(QTextCodec::codecForName("UTF-8")); + + while (!smallerStream.atEnd()) + { + QString originalQString; + smallerStream >> originalQString; + //UVcout << "this word is in the smaller dictionary: " << QuackleIO::Util::qstringToString(originalQString) << endl; + smallerMap[originalQString] = true; + } + + QString playabilityFilename = "playabilities.raw"; + QFile playability(playabilityFilename); + if (!playability.exists()) + { + UVcout << "playability does not exist: " << QuackleIO::Util::qstringToString(playabilityFilename) << endl; + return false; + } + + if (!playability.open(QIODevice::ReadOnly | QIODevice::Text)) + { + UVcout << "Could not open " << QuackleIO::Util::qstringToString(playabilityFilename) << endl; + return false; + } + + QTextStream playabilityStream(&playability); + playabilityStream.setCodec(QTextCodec::codecForName("UTF-8")); + + while (!playabilityStream.atEnd()) + { + int pb; + playabilityStream >> pb; + QString originalQString; + playabilityStream >> originalQString; + //UVcout << "playability: " << QuackleIO::Util::qstringToString(originalQString) << " " << pb << endl; + playabilityMap[originalQString] = pb; + } + + QString dawgFilename = "dawginput.raw"; + QFile file(dawgFilename); + if (!file.exists()) + { + UVcout << "dawg does not exist: " << QuackleIO::Util::qstringToString(dawgFilename) << endl; + return false; + } + + if (!file.open(QIODevice::ReadOnly | QIODevice::Text)) + { + UVcout << "Could not open " << QuackleIO::Util::qstringToString(dawgFilename) << endl; + return false; + } + + QTextStream stream(&file); + stream.setCodec(QTextCodec::codecForName("UTF-8")); + + while (!stream.atEnd()) + { + QString word; + stream >> word; + + bool inSmaller = smallerMap[word]; + int pb = playabilityMap[word]; + + if (stream.atEnd()) + break; + + if (!factory.pushWord(word, inSmaller, pb)) + UVcout << "not encodable without leftover: " << QuackleIO::Util::qstringToString(word) << endl; + } + + file.close(); + + UVcout << "encodable words: " << factory.encodableWords() << ", unencodable words: " << factory.unencodableWords() << endl; + + UVcout << "nodelist.size(): " << factory.nodeCount() << endl; + + factory.generate(); + UVcout << "Compressed nodelist.size(): " << factory.nodeCount() << endl; + + factory.writeIndex("output.dawg"); + + return 0; } |