From 5350a57f1be22b28914fca14225c73dac5b30b24 Mon Sep 17 00:00:00 2001 From: John Fultz Date: Mon, 7 Sep 2015 14:19:46 -0500 Subject: Auto-generate gaddags Need to add a user interface, but gaddags are now auto-generated if they can't be found. Some specific improvements here: * FixedLengthString gained a pop_back member. * Add code to allow v1 gaddags and v0 dawgs to work together. * Change memory allocation of dawgs and gaddags to be dynamic (the old limit didn't accommodate the ridiculously large Polish dictionary in the gaddag) * The Settings class now knows a bit about generating gaddags. This will be important for giving UI feedback. * Fixed several places using filenames which should be using string, not UVString. * Dawg/GaddagFactory should have been using UVString, not QString. My misunderstanding. --- quackleio/dawgfactory.cpp | 14 ++++----- quackleio/dawgfactory.h | 6 ++-- quackleio/flexiblealphabet.h | 2 -- quackleio/gaddagfactory.cpp | 71 +++++++++++++++++++++++++------------------- quackleio/gaddagfactory.h | 7 +++-- 5 files changed, 53 insertions(+), 47 deletions(-) (limited to 'quackleio') diff --git a/quackleio/dawgfactory.cpp b/quackleio/dawgfactory.cpp index 74b4346..3a971a3 100644 --- a/quackleio/dawgfactory.cpp +++ b/quackleio/dawgfactory.cpp @@ -25,10 +25,10 @@ #include "util.h" -DawgFactory::DawgFactory(const QString& alphabetFile) +DawgFactory::DawgFactory(const UVString& alphabetFile) { QuackleIO::FlexibleAlphabetParameters *flexure = new QuackleIO::FlexibleAlphabetParameters; - flexure->load(alphabetFile); + flexure->load(QuackleIO::Util::uvStringToQString(alphabetFile)); m_alphas = flexure; m_root.insmallerdict = false; @@ -45,12 +45,10 @@ DawgFactory::~DawgFactory() delete m_alphas; } -bool DawgFactory::pushWord(const QString& word, bool inSmaller, int playability) +bool DawgFactory::pushWord(const UVString& word, bool inSmaller, int playability) { - UVString originalString = QuackleIO::Util::qstringToString(word); - UVString leftover; - Quackle::LetterString encodedWord = m_alphas->encode(originalString, &leftover); + Quackle::LetterString encodedWord = m_alphas->encode(word, &leftover); if (leftover.empty()) { if (m_root.pushWord(encodedWord, inSmaller, playability)) @@ -129,9 +127,9 @@ void DawgFactory::generate() m_root.print(m_nodelist); } -void DawgFactory::writeIndex(const QString& filename) +void DawgFactory::writeIndex(const UVString& filename) { - ofstream out(QuackleIO::Util::qstringToStdString(filename).c_str(), ios::out | ios::binary); + ofstream out(filename.c_str(), ios::out | ios::binary); unsigned char bytes[7]; bytes[0] = (m_encodableWords & 0x00FF0000) >> 16; diff --git a/quackleio/dawgfactory.h b/quackleio/dawgfactory.h index 23bb4f5..051e632 100644 --- a/quackleio/dawgfactory.h +++ b/quackleio/dawgfactory.h @@ -26,7 +26,7 @@ class DawgFactory { public: - DawgFactory(const QString& alphabetFile); + DawgFactory(const UVString& alphabetFile); ~DawgFactory(); int wordCount() const { return m_root.wordCount(); }; @@ -35,10 +35,10 @@ public: int unencodableWords() const { return m_unencodableWords; }; int duplicateWords() const { return m_duplicateWords; }; - bool pushWord(const QString& word, bool inSmaller, int playability); + bool pushWord(const UVString& word, bool inSmaller, int playability); void hashWord(const Quackle::LetterString &word); void generate(); - void writeIndex(const QString& fname); + void writeIndex(const UVString& filename); const char* hashBytes() { return m_hash.charptr; }; diff --git a/quackleio/flexiblealphabet.h b/quackleio/flexiblealphabet.h index 89bd1f4..d5db68a 100644 --- a/quackleio/flexiblealphabet.h +++ b/quackleio/flexiblealphabet.h @@ -21,8 +21,6 @@ #include "alphabetparameters.h" -class QString; - namespace QuackleIO { diff --git a/quackleio/gaddagfactory.cpp b/quackleio/gaddagfactory.cpp index 7f666cb..53ccf04 100644 --- a/quackleio/gaddagfactory.cpp +++ b/quackleio/gaddagfactory.cpp @@ -24,11 +24,15 @@ #include "gaddagfactory.h" #include "util.h" -GaddagFactory::GaddagFactory(const QString& alphabetFile) +GaddagFactory::GaddagFactory(const UVString &alphabetFile) + : m_encodableWords(0), m_unencodableWords(0), m_alphas(NULL) { - QuackleIO::FlexibleAlphabetParameters *flexure = new QuackleIO::FlexibleAlphabetParameters; - flexure->load(alphabetFile); - m_alphas = flexure; + if (!alphabetFile.empty()) + { + QuackleIO::FlexibleAlphabetParameters *flexure = new QuackleIO::FlexibleAlphabetParameters; + flexure->load(QuackleIO::Util::uvStringToQString(alphabetFile)); + m_alphas = flexure; + } // So the separator is sorted to last. m_root.t = false; @@ -44,35 +48,13 @@ GaddagFactory::~GaddagFactory() delete m_alphas; } -bool GaddagFactory::pushWord(const QString& word) +bool GaddagFactory::pushWord(const UVString &word) { - UVString originalString = QuackleIO::Util::qstringToString(word); - UVString leftover; - Quackle::LetterString encodedWord = m_alphas->encode(originalString, &leftover); + Quackle::LetterString encodedWord = m_alphas->encode(word, &leftover); if (leftover.empty()) { - ++m_encodableWords; - hashWord(encodedWord); - // FIXME: This hash will fail if duplicate words are passed in. - // But testing for duplicate words isn't so easy without keeping - // an entirely separate list. - - for (unsigned i = 1; i <= encodedWord.length(); i++) - { - Quackle::LetterString newword; - - for (int j = i - 1; j >= 0; j--) - newword.push_back(encodedWord[j]); - - if (i < encodedWord.length()) - { - newword.push_back(internalSeparatorRepresentation); // "^" - for (unsigned j = i; j < encodedWord.length(); j++) - newword.push_back(encodedWord[j]); - } - m_gaddagizedWords.push_back(newword); - } + pushWord(encodedWord); return true; } @@ -80,6 +62,32 @@ bool GaddagFactory::pushWord(const QString& word) return false; } +bool GaddagFactory::pushWord(const Quackle::LetterString &word) +{ + ++m_encodableWords; + hashWord(word); + // FIXME: This hash will fail if duplicate words are passed in. + // But testing for duplicate words isn't so easy without keeping + // an entirely separate list. + + for (unsigned i = 1; i <= word.length(); i++) + { + Quackle::LetterString newword; + + for (int j = i - 1; j >= 0; j--) + newword.push_back(word[j]); + + if (i < word.length()) + { + newword.push_back(internalSeparatorRepresentation); // "^" + for (unsigned j = i; j < word.length(); j++) + newword.push_back(word[j]); + } + m_gaddagizedWords.push_back(newword); + } + return true; +} + void GaddagFactory::hashWord(const Quackle::LetterString &word) { QCryptographicHash wordhash(QCryptographicHash::Md5); @@ -93,6 +101,7 @@ void GaddagFactory::hashWord(const Quackle::LetterString &word) void GaddagFactory::generate() { + sort(m_gaddagizedWords.begin(), m_gaddagizedWords.end()); Quackle::WordList::const_iterator wordsEnd = m_gaddagizedWords.end(); for (Quackle::WordList::const_iterator wordsIt = m_gaddagizedWords.begin(); wordsIt != wordsEnd; ++wordsIt) m_root.pushWord(*wordsIt); @@ -100,13 +109,13 @@ void GaddagFactory::generate() // m_root.pushWord(words); } -void GaddagFactory::writeIndex(const QString &fname) +void GaddagFactory::writeIndex(const string &fname) { m_nodelist.push_back(&m_root); m_root.print(m_nodelist); - ofstream out(QuackleIO::Util::qstringToStdString(fname).c_str(), ios::out | ios::binary); + ofstream out(fname.c_str(), ios::out | ios::binary); out.put(1); // GADDAG format version 1 out.write(m_hash.charptr, sizeof(m_hash.charptr)); diff --git a/quackleio/gaddagfactory.h b/quackleio/gaddagfactory.h index 03cb546..415baff 100644 --- a/quackleio/gaddagfactory.h +++ b/quackleio/gaddagfactory.h @@ -27,7 +27,7 @@ public: static const Quackle::Letter internalSeparatorRepresentation = QUACKLE_FIRST_LETTER + QUACKLE_MAXIMUM_ALPHABET_SIZE; - GaddagFactory(const QString& alphabetFile); + GaddagFactory(const UVString &alphabetFile); ~GaddagFactory(); int wordCount() const { return m_gaddagizedWords.size(); }; @@ -35,11 +35,12 @@ public: int encodableWords() const { return m_encodableWords; }; int unencodableWords() const { return m_unencodableWords; }; - bool pushWord(const QString& word); + bool pushWord(const UVString &word); + bool pushWord(const Quackle::LetterString &word); void hashWord(const Quackle::LetterString &word); void sortWords() { sort(m_gaddagizedWords.begin(), m_gaddagizedWords.end()); }; void generate(); - void writeIndex(const QString& fname); + void writeIndex(const string &fname); const char* hashBytes() { return m_hash.charptr; }; -- cgit v1.2.3