diff options
author | John Fultz <jfultz@wolfram.com> | 2015-09-07 14:19:46 -0500 |
---|---|---|
committer | John Fultz <jfultz@wolfram.com> | 2015-09-07 15:45:41 -0500 |
commit | 5350a57f1be22b28914fca14225c73dac5b30b24 (patch) | |
tree | 399a309a1302d30ec83cc5d7281ac7286882523a | |
parent | 9ea9637922ca68d24d7517cf61870d8cee31f6c5 (diff) |
Auto-generate gaddags
Need to add a user interface, but gaddags are now
auto-generated if they can't be found.
Some specific improvements here:
* FixedLengthString gained a pop_back member.
* Add code to allow v1 gaddags and v0 dawgs to work
together.
* Change memory allocation of dawgs and gaddags to
be dynamic (the old limit didn't accommodate the
ridiculously large Polish dictionary in the gaddag)
* The Settings class now knows a bit about generating
gaddags. This will be important for giving UI feedback.
* Fixed several places using filenames which should be
using string, not UVString.
* Dawg/GaddagFactory should have been using
UVString, not QString. My misunderstanding.
-rw-r--r-- | fixedstring.h | 8 | ||||
-rw-r--r-- | lexiconparameters.cpp | 81 | ||||
-rw-r--r-- | lexiconparameters.h | 6 | ||||
-rw-r--r-- | quacker/settings.cpp | 86 | ||||
-rw-r--r-- | quacker/settings.h | 9 | ||||
-rw-r--r-- | quackleio/dawgfactory.cpp | 14 | ||||
-rw-r--r-- | quackleio/dawgfactory.h | 6 | ||||
-rw-r--r-- | quackleio/flexiblealphabet.h | 2 | ||||
-rw-r--r-- | quackleio/gaddagfactory.cpp | 71 | ||||
-rw-r--r-- | quackleio/gaddagfactory.h | 7 |
10 files changed, 197 insertions, 93 deletions
diff --git a/fixedstring.h b/fixedstring.h index e8db0bf..a31ecd6 100644 --- a/fixedstring.h +++ b/fixedstring.h @@ -54,6 +54,7 @@ class FixedLengthString size_type size() const { return length(); } void clear() { m_end = m_data; } void push_back(char c); + void pop_back(); const char* constData() const { return m_data; } int compare(const FixedLengthString& s) const; @@ -221,6 +222,13 @@ FixedLengthString::push_back(char c) *this += c; } +inline void +FixedLengthString::pop_back() +{ + assert(size() > 0); + m_end--; +} + inline int FixedLengthString::compare(const FixedLengthString& s) const { diff --git a/lexiconparameters.cpp b/lexiconparameters.cpp index f6e646b..6761fc1 100644 --- a/lexiconparameters.cpp +++ b/lexiconparameters.cpp @@ -32,7 +32,6 @@ class Quackle::V0LexiconInterpreter : public LexiconInterpreter virtual void loadDawg(ifstream &file, LexiconParameters &lexparams) { int i = 0; - file.unget(); // version 0 doesn't have a version byte...it's just the node byte which is always set to 0 while (!file.eof()) { file.read((char*)(lexparams.m_dawg) + i, 7); @@ -43,7 +42,6 @@ class Quackle::V0LexiconInterpreter : public LexiconInterpreter virtual void loadGaddag(ifstream &file, LexiconParameters &lexparams) { int i = 0; - file.unget(); while (!file.eof()) { file.read((char*)(lexparams.m_gaddag) + i, 4); @@ -74,6 +72,7 @@ class Quackle::V1LexiconInterpreter : public LexiconInterpreter { int i = 0; unsigned char bytes[3]; + file.get(); // skip past version byte file.read(lexparams.m_hash, sizeof(lexparams.m_hash)); file.read((char*)bytes, 3); lexparams.m_wordcount = (bytes[0] << 16) | (bytes[1] << 8) | bytes[2]; @@ -87,14 +86,22 @@ class Quackle::V1LexiconInterpreter : public LexiconInterpreter virtual void loadGaddag(ifstream &file, LexiconParameters &lexparams) { char hash[16]; + file.get(); // skip past version byte file.read(hash, sizeof(hash)); if (memcmp(hash, lexparams.m_hash, sizeof(hash))) { - lexparams.unloadGaddag(); // don't use a mismatched gaddag - return; + // If we're using a v0 DAWG, then ignore the hash + for (size_t i = 0; i < sizeof(lexparams.m_hash); i++) + { + if (lexparams.m_hash[0] != 0) + { + lexparams.unloadGaddag(); // don't use a mismatched gaddag + return; + } + } } - int i = 0; + size_t i = 0; while (!file.eof()) { file.read((char*)(lexparams.m_gaddag) + i, 4); @@ -160,20 +167,16 @@ void LexiconParameters::loadDawg(const string &filename) } char versionByte = file.get(); - switch(versionByte) + m_interpreter = createInterpreter(versionByte); + if (m_interpreter == NULL) { - case 0: - m_interpreter = new V0LexiconInterpreter(); - break; - case 1: - m_interpreter = new V1LexiconInterpreter(); - break; - default: - UVcout << "couldn't open dawg " << filename.c_str() << endl; - return; + UVcout << "couldn't open file " << filename.c_str() << endl; + return; } - m_dawg = new unsigned char[7000000]; + file.seekg(0, ios_base::end); + m_dawg = new unsigned char[file.tellg()]; + file.seekg(0, ios_base::beg); m_interpreter->loadDawg(file, *this); } @@ -191,19 +194,53 @@ void LexiconParameters::loadGaddag(const string &filename) } char versionByte = file.get(); - if (versionByte != m_interpreter->versionNumber()) + if (versionByte < m_interpreter->versionNumber()) return; - m_gaddag = new unsigned char[40000000]; + file.seekg(0, ios_base::end); + m_gaddag = new unsigned char[file.tellg()]; + file.seekg(0, ios_base::beg); - m_interpreter->loadGaddag(file, *this); + // must create a local interpreter because dawg/gaddag versions might not match + LexiconInterpreter* interpreter = createInterpreter(versionByte); + if (interpreter != NULL) + { + interpreter->loadGaddag(file, *this); + delete interpreter; + } + else + unloadGaddag(); } string LexiconParameters::findDictionaryFile(const string &lexicon) { - return DataManager::self()->findDataFile("lexica", lexicon); + return QUACKLE_DATAMANAGER->findDataFile("lexica", lexicon); +} + +UVString LexiconParameters::hashString(bool shortened) const +{ + const char hex[] = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' }; + string hashStr; + for (size_t i = 0; i < sizeof(m_hash); i++) + { + hashStr.push_back(hex[(m_hash[i] & 0xF0) >> 4]); + hashStr.push_back(hex[m_hash[i] & 0x0F]); + if (shortened && i == 5) + break; + if (i % 2 == 1) + hashStr.push_back('-'); + } + return hashStr; } -QString hashString() const +LexiconInterpreter* LexiconParameters::createInterpreter(char version) const { - return QString(QByteArray(m_hash, sizeof(m_hash)).toHex()); + switch(version) + { + case 0: + return new V0LexiconInterpreter(); + case 1: + return new V1LexiconInterpreter(); + default: + return NULL; + } } diff --git a/lexiconparameters.h b/lexiconparameters.h index b5bc564..3890d8d 100644 --- a/lexiconparameters.h +++ b/lexiconparameters.h @@ -19,8 +19,6 @@ #ifndef QUACKLE_LEXICONPARAMETERS_H #define QUACKLE_LEXICONPARAMETERS_H -#include <QString> -#include "alphabetparameters.h" #include "gaddag.h" namespace Quackle @@ -77,7 +75,7 @@ public: } const GaddagNode *gaddagRoot() const { return (GaddagNode *) &m_gaddag[0]; }; - QString hashString() const; + UVString hashString(bool shortened) const; protected: unsigned char *m_dawg; @@ -86,6 +84,8 @@ protected: LexiconInterpreter *m_interpreter; char m_hash[16]; int m_wordcount; + + LexiconInterpreter* createInterpreter(char version) const; }; } diff --git a/quacker/settings.cpp b/quacker/settings.cpp index 362e916..1febdb5 100644 --- a/quacker/settings.cpp +++ b/quacker/settings.cpp @@ -93,6 +93,8 @@ Settings::Settings(QWidget *parent) m_appDataDir = directory.absolutePath(); } m_userDataDir = QDesktopServices::storageLocation(QDesktopServices::DataLocation); + QDir qdir(m_userDataDir); + qdir.mkpath("lexica"); } void Settings::createGUI() @@ -195,49 +197,97 @@ void Settings::initialize() if (lexiconName == "cswfeb07") lexiconName = "cswapr07"; - setQuackleToUseLexiconName(QuackleIO::Util::qstringToStdString(lexiconName)); - setQuackleToUseAlphabetName(QuackleIO::Util::qstringToStdString(settings.value("quackle/settings/alphabet-name", QString("english")).toString())); + setQuackleToUseLexiconName(lexiconName); + setQuackleToUseAlphabetName(settings.value("quackle/settings/alphabet-name", QString("english")).toString()); setQuackleToUseThemeName(settings.value("quackle/settings/theme-name", QString("traditional")).toString()); setQuackleToUseBoardName(settings.value("quackle/settings/board-name", QString("")).toString()); } -void Settings::setQuackleToUseLexiconName(const string &lexiconName) +void Settings::buildGaddag(const string &filename) { - if (QUACKLE_LEXICON_PARAMETERS->lexiconName() != lexiconName) + GaddagFactory factory((UVString())); + Quackle::LetterString word; + + pushIndex(factory, word, 1); + factory.generate(); + factory.writeIndex(filename); +} + +void Settings::pushIndex(GaddagFactory &factory, Quackle::LetterString &word, int index) +{ + unsigned int p; + Quackle::Letter letter; + bool t; + bool lastchild; + bool british; + int playability; + + do + { + QUACKLE_LEXICON_PARAMETERS->dawgAt(index, p, letter, t, lastchild, british, playability); + word.push_back(letter); + if (t) + factory.pushWord(word); + if (p) + pushIndex(factory, word, p); + index++; + word.pop_back(); + } while (!lastchild); +} + + +void Settings::setQuackleToUseLexiconName(const QString &lexiconName) +{ + string lexiconNameStr = lexiconName.toStdString(); + if (QUACKLE_LEXICON_PARAMETERS->lexiconName() != lexiconNameStr) { - QUACKLE_LEXICON_PARAMETERS->setLexiconName(lexiconName); + QUACKLE_LEXICON_PARAMETERS->setLexiconName(lexiconNameStr); - string dawgFile = Quackle::LexiconParameters::findDictionaryFile(lexiconName + ".dawg"); + string dawgFile = Quackle::LexiconParameters::findDictionaryFile(lexiconNameStr + ".dawg"); if (dawgFile.empty()) { - UVcout << "Dawg for lexicon '" << lexiconName << "' does not exist." << endl; + UVcout << "Dawg for lexicon '" << lexiconNameStr << "' does not exist." << endl; QUACKLE_LEXICON_PARAMETERS->unloadDawg(); } else QUACKLE_LEXICON_PARAMETERS->loadDawg(dawgFile); - string gaddagFile = Quackle::LexiconParameters::findDictionaryFile(lexiconName + ".gaddag"); + if (!QUACKLE_LEXICON_PARAMETERS->hasDawg()) + { + QUACKLE_LEXICON_PARAMETERS->unloadGaddag(); + return; + } + + string gaddagFile = Quackle::LexiconParameters::findDictionaryFile(lexiconNameStr + ".gaddag"); if (gaddagFile.empty()) { - UVcout << "Gaddag for lexicon '" << lexiconName << "' does not exist." << endl; + UVcout << "Gaddag for lexicon '" << lexiconNameStr << "' does not exist." << endl; QUACKLE_LEXICON_PARAMETERS->unloadGaddag(); } else QUACKLE_LEXICON_PARAMETERS->loadGaddag(gaddagFile); - QUACKLE_STRATEGY_PARAMETERS->initialize(lexiconName); + if (!QUACKLE_LEXICON_PARAMETERS->hasGaddag()) + { + gaddagFile = QUACKLE_DATAMANAGER->makeDataFilename("lexica", lexiconNameStr + ".gaddag", true); + buildGaddag(gaddagFile); + QUACKLE_LEXICON_PARAMETERS->loadGaddag(gaddagFile); + } + + QUACKLE_STRATEGY_PARAMETERS->initialize(lexiconNameStr); } } -void Settings::setQuackleToUseAlphabetName(const string &alphabetName) +void Settings::setQuackleToUseAlphabetName(const QString &alphabetName) { - if (QUACKLE_ALPHABET_PARAMETERS->alphabetName() != alphabetName) + string alphabetNameStr = alphabetName.toStdString(); + if (QUACKLE_ALPHABET_PARAMETERS->alphabetName() != alphabetNameStr) { - QString alphabetFile = QuackleIO::Util::stdStringToQString(Quackle::AlphabetParameters::findAlphabetFile(alphabetName + ".quackle_alphabet")); + QString alphabetFileStr = QuackleIO::Util::stdStringToQString(Quackle::AlphabetParameters::findAlphabetFile(alphabetNameStr + ".quackle_alphabet")); QuackleIO::FlexibleAlphabetParameters *flexure = new QuackleIO::FlexibleAlphabetParameters; - flexure->setAlphabetName(alphabetName); - if (flexure->load(alphabetFile)) + flexure->setAlphabetName(alphabetNameStr); + if (flexure->load(alphabetFileStr)) { if (flexure->length() != QUACKLE_ALPHABET_PARAMETERS->length() && QUACKLE_ALPHABET_PARAMETERS->alphabetName() != "default") { @@ -295,8 +345,7 @@ void Settings::lexiconChanged(const QString &lexiconName) editLexicon(); return; } - string lexiconNameString = QuackleIO::Util::qstringToStdString(lexiconName); - setQuackleToUseLexiconName(lexiconNameString); + setQuackleToUseLexiconName(lexiconName); CustomQSettings settings; settings.setValue("quackle/settings/lexicon-name", lexiconName); @@ -311,8 +360,7 @@ void Settings::alphabetChanged(const QString &alphabetName) editAlphabet(); return; } - string alphabetNameString = QuackleIO::Util::qstringToStdString(alphabetName); - setQuackleToUseAlphabetName(alphabetNameString); + setQuackleToUseAlphabetName(alphabetName); CustomQSettings settings; settings.setValue("quackle/settings/alphabet-name", alphabetName); diff --git a/quacker/settings.h b/quacker/settings.h index cee0562..fab2f3f 100644 --- a/quacker/settings.h +++ b/quacker/settings.h @@ -24,6 +24,8 @@ #include <QWidget> #include <QSettings> +#include "quackleio/gaddagfactory.h" + class QComboBox; class QCheckBox; class QPushButton; @@ -72,8 +74,8 @@ protected slots: void editAlphabet(); void editTheme(); - void setQuackleToUseLexiconName(const string &lexiconName); - void setQuackleToUseAlphabetName(const string &alphabetName); + void setQuackleToUseLexiconName(const QString &lexiconName); + void setQuackleToUseAlphabetName(const QString &alphabetName); void setQuackleToUseThemeName(const QString &themeName); void setQuackleToUseBoardName(const QString &lexiconName); @@ -94,6 +96,9 @@ private: // populate the popup based on what's in QSettings void loadBoardNameCombo(); + void buildGaddag(const string &filename); + void pushIndex(GaddagFactory &factory, Quackle::LetterString &word, int index); + static Settings *m_self; }; diff --git a/quackleio/dawgfactory.cpp b/quackleio/dawgfactory.cpp index 74b4346..3a971a3 100644 --- a/quackleio/dawgfactory.cpp +++ b/quackleio/dawgfactory.cpp @@ -25,10 +25,10 @@ #include "util.h" -DawgFactory::DawgFactory(const QString& alphabetFile) +DawgFactory::DawgFactory(const UVString& alphabetFile) { QuackleIO::FlexibleAlphabetParameters *flexure = new QuackleIO::FlexibleAlphabetParameters; - flexure->load(alphabetFile); + flexure->load(QuackleIO::Util::uvStringToQString(alphabetFile)); m_alphas = flexure; m_root.insmallerdict = false; @@ -45,12 +45,10 @@ DawgFactory::~DawgFactory() delete m_alphas; } -bool DawgFactory::pushWord(const QString& word, bool inSmaller, int playability) +bool DawgFactory::pushWord(const UVString& word, bool inSmaller, int playability) { - UVString originalString = QuackleIO::Util::qstringToString(word); - UVString leftover; - Quackle::LetterString encodedWord = m_alphas->encode(originalString, &leftover); + Quackle::LetterString encodedWord = m_alphas->encode(word, &leftover); if (leftover.empty()) { if (m_root.pushWord(encodedWord, inSmaller, playability)) @@ -129,9 +127,9 @@ void DawgFactory::generate() m_root.print(m_nodelist); } -void DawgFactory::writeIndex(const QString& filename) +void DawgFactory::writeIndex(const UVString& filename) { - ofstream out(QuackleIO::Util::qstringToStdString(filename).c_str(), ios::out | ios::binary); + ofstream out(filename.c_str(), ios::out | ios::binary); unsigned char bytes[7]; bytes[0] = (m_encodableWords & 0x00FF0000) >> 16; diff --git a/quackleio/dawgfactory.h b/quackleio/dawgfactory.h index 23bb4f5..051e632 100644 --- a/quackleio/dawgfactory.h +++ b/quackleio/dawgfactory.h @@ -26,7 +26,7 @@ class DawgFactory { public: - DawgFactory(const QString& alphabetFile); + DawgFactory(const UVString& alphabetFile); ~DawgFactory(); int wordCount() const { return m_root.wordCount(); }; @@ -35,10 +35,10 @@ public: int unencodableWords() const { return m_unencodableWords; }; int duplicateWords() const { return m_duplicateWords; }; - bool pushWord(const QString& word, bool inSmaller, int playability); + bool pushWord(const UVString& word, bool inSmaller, int playability); void hashWord(const Quackle::LetterString &word); void generate(); - void writeIndex(const QString& fname); + void writeIndex(const UVString& filename); const char* hashBytes() { return m_hash.charptr; }; diff --git a/quackleio/flexiblealphabet.h b/quackleio/flexiblealphabet.h index 89bd1f4..d5db68a 100644 --- a/quackleio/flexiblealphabet.h +++ b/quackleio/flexiblealphabet.h @@ -21,8 +21,6 @@ #include "alphabetparameters.h" -class QString; - namespace QuackleIO { diff --git a/quackleio/gaddagfactory.cpp b/quackleio/gaddagfactory.cpp index 7f666cb..53ccf04 100644 --- a/quackleio/gaddagfactory.cpp +++ b/quackleio/gaddagfactory.cpp @@ -24,11 +24,15 @@ #include "gaddagfactory.h" #include "util.h" -GaddagFactory::GaddagFactory(const QString& alphabetFile) +GaddagFactory::GaddagFactory(const UVString &alphabetFile) + : m_encodableWords(0), m_unencodableWords(0), m_alphas(NULL) { - QuackleIO::FlexibleAlphabetParameters *flexure = new QuackleIO::FlexibleAlphabetParameters; - flexure->load(alphabetFile); - m_alphas = flexure; + if (!alphabetFile.empty()) + { + QuackleIO::FlexibleAlphabetParameters *flexure = new QuackleIO::FlexibleAlphabetParameters; + flexure->load(QuackleIO::Util::uvStringToQString(alphabetFile)); + m_alphas = flexure; + } // So the separator is sorted to last. m_root.t = false; @@ -44,35 +48,13 @@ GaddagFactory::~GaddagFactory() delete m_alphas; } -bool GaddagFactory::pushWord(const QString& word) +bool GaddagFactory::pushWord(const UVString &word) { - UVString originalString = QuackleIO::Util::qstringToString(word); - UVString leftover; - Quackle::LetterString encodedWord = m_alphas->encode(originalString, &leftover); + Quackle::LetterString encodedWord = m_alphas->encode(word, &leftover); if (leftover.empty()) { - ++m_encodableWords; - hashWord(encodedWord); - // FIXME: This hash will fail if duplicate words are passed in. - // But testing for duplicate words isn't so easy without keeping - // an entirely separate list. - - for (unsigned i = 1; i <= encodedWord.length(); i++) - { - Quackle::LetterString newword; - - for (int j = i - 1; j >= 0; j--) - newword.push_back(encodedWord[j]); - - if (i < encodedWord.length()) - { - newword.push_back(internalSeparatorRepresentation); // "^" - for (unsigned j = i; j < encodedWord.length(); j++) - newword.push_back(encodedWord[j]); - } - m_gaddagizedWords.push_back(newword); - } + pushWord(encodedWord); return true; } @@ -80,6 +62,32 @@ bool GaddagFactory::pushWord(const QString& word) return false; } +bool GaddagFactory::pushWord(const Quackle::LetterString &word) +{ + ++m_encodableWords; + hashWord(word); + // FIXME: This hash will fail if duplicate words are passed in. + // But testing for duplicate words isn't so easy without keeping + // an entirely separate list. + + for (unsigned i = 1; i <= word.length(); i++) + { + Quackle::LetterString newword; + + for (int j = i - 1; j >= 0; j--) + newword.push_back(word[j]); + + if (i < word.length()) + { + newword.push_back(internalSeparatorRepresentation); // "^" + for (unsigned j = i; j < word.length(); j++) + newword.push_back(word[j]); + } + m_gaddagizedWords.push_back(newword); + } + return true; +} + void GaddagFactory::hashWord(const Quackle::LetterString &word) { QCryptographicHash wordhash(QCryptographicHash::Md5); @@ -93,6 +101,7 @@ void GaddagFactory::hashWord(const Quackle::LetterString &word) void GaddagFactory::generate() { + sort(m_gaddagizedWords.begin(), m_gaddagizedWords.end()); Quackle::WordList::const_iterator wordsEnd = m_gaddagizedWords.end(); for (Quackle::WordList::const_iterator wordsIt = m_gaddagizedWords.begin(); wordsIt != wordsEnd; ++wordsIt) m_root.pushWord(*wordsIt); @@ -100,13 +109,13 @@ void GaddagFactory::generate() // m_root.pushWord(words); } -void GaddagFactory::writeIndex(const QString &fname) +void GaddagFactory::writeIndex(const string &fname) { m_nodelist.push_back(&m_root); m_root.print(m_nodelist); - ofstream out(QuackleIO::Util::qstringToStdString(fname).c_str(), ios::out | ios::binary); + ofstream out(fname.c_str(), ios::out | ios::binary); out.put(1); // GADDAG format version 1 out.write(m_hash.charptr, sizeof(m_hash.charptr)); diff --git a/quackleio/gaddagfactory.h b/quackleio/gaddagfactory.h index 03cb546..415baff 100644 --- a/quackleio/gaddagfactory.h +++ b/quackleio/gaddagfactory.h @@ -27,7 +27,7 @@ public: static const Quackle::Letter internalSeparatorRepresentation = QUACKLE_FIRST_LETTER + QUACKLE_MAXIMUM_ALPHABET_SIZE; - GaddagFactory(const QString& alphabetFile); + GaddagFactory(const UVString &alphabetFile); ~GaddagFactory(); int wordCount() const { return m_gaddagizedWords.size(); }; @@ -35,11 +35,12 @@ public: int encodableWords() const { return m_encodableWords; }; int unencodableWords() const { return m_unencodableWords; }; - bool pushWord(const QString& word); + bool pushWord(const UVString &word); + bool pushWord(const Quackle::LetterString &word); void hashWord(const Quackle::LetterString &word); void sortWords() { sort(m_gaddagizedWords.begin(), m_gaddagizedWords.end()); }; void generate(); - void writeIndex(const QString& fname); + void writeIndex(const string &fname); const char* hashBytes() { return m_hash.charptr; }; |