summaryrefslogtreecommitdiff
path: root/quackleio/gaddagfactory.cpp
diff options
context:
space:
mode:
authorJohn Fultz <jfultz@wolfram.com>2015-09-07 14:19:46 -0500
committerJohn Fultz <jfultz@wolfram.com>2015-09-07 15:45:41 -0500
commit5350a57f1be22b28914fca14225c73dac5b30b24 (patch)
tree399a309a1302d30ec83cc5d7281ac7286882523a /quackleio/gaddagfactory.cpp
parent9ea9637922ca68d24d7517cf61870d8cee31f6c5 (diff)
Auto-generate gaddags
Need to add a user interface, but gaddags are now auto-generated if they can't be found. Some specific improvements here: * FixedLengthString gained a pop_back member. * Add code to allow v1 gaddags and v0 dawgs to work together. * Change memory allocation of dawgs and gaddags to be dynamic (the old limit didn't accommodate the ridiculously large Polish dictionary in the gaddag) * The Settings class now knows a bit about generating gaddags. This will be important for giving UI feedback. * Fixed several places using filenames which should be using string, not UVString. * Dawg/GaddagFactory should have been using UVString, not QString. My misunderstanding.
Diffstat (limited to 'quackleio/gaddagfactory.cpp')
-rw-r--r--quackleio/gaddagfactory.cpp71
1 files changed, 40 insertions, 31 deletions
diff --git a/quackleio/gaddagfactory.cpp b/quackleio/gaddagfactory.cpp
index 7f666cb..53ccf04 100644
--- a/quackleio/gaddagfactory.cpp
+++ b/quackleio/gaddagfactory.cpp
@@ -24,11 +24,15 @@
#include "gaddagfactory.h"
#include "util.h"
-GaddagFactory::GaddagFactory(const QString& alphabetFile)
+GaddagFactory::GaddagFactory(const UVString &alphabetFile)
+ : m_encodableWords(0), m_unencodableWords(0), m_alphas(NULL)
{
- QuackleIO::FlexibleAlphabetParameters *flexure = new QuackleIO::FlexibleAlphabetParameters;
- flexure->load(alphabetFile);
- m_alphas = flexure;
+ if (!alphabetFile.empty())
+ {
+ QuackleIO::FlexibleAlphabetParameters *flexure = new QuackleIO::FlexibleAlphabetParameters;
+ flexure->load(QuackleIO::Util::uvStringToQString(alphabetFile));
+ m_alphas = flexure;
+ }
// So the separator is sorted to last.
m_root.t = false;
@@ -44,35 +48,13 @@ GaddagFactory::~GaddagFactory()
delete m_alphas;
}
-bool GaddagFactory::pushWord(const QString& word)
+bool GaddagFactory::pushWord(const UVString &word)
{
- UVString originalString = QuackleIO::Util::qstringToString(word);
-
UVString leftover;
- Quackle::LetterString encodedWord = m_alphas->encode(originalString, &leftover);
+ Quackle::LetterString encodedWord = m_alphas->encode(word, &leftover);
if (leftover.empty())
{
- ++m_encodableWords;
- hashWord(encodedWord);
- // FIXME: This hash will fail if duplicate words are passed in.
- // But testing for duplicate words isn't so easy without keeping
- // an entirely separate list.
-
- for (unsigned i = 1; i <= encodedWord.length(); i++)
- {
- Quackle::LetterString newword;
-
- for (int j = i - 1; j >= 0; j--)
- newword.push_back(encodedWord[j]);
-
- if (i < encodedWord.length())
- {
- newword.push_back(internalSeparatorRepresentation); // "^"
- for (unsigned j = i; j < encodedWord.length(); j++)
- newword.push_back(encodedWord[j]);
- }
- m_gaddagizedWords.push_back(newword);
- }
+ pushWord(encodedWord);
return true;
}
@@ -80,6 +62,32 @@ bool GaddagFactory::pushWord(const QString& word)
return false;
}
+bool GaddagFactory::pushWord(const Quackle::LetterString &word)
+{
+ ++m_encodableWords;
+ hashWord(word);
+ // FIXME: This hash will fail if duplicate words are passed in.
+ // But testing for duplicate words isn't so easy without keeping
+ // an entirely separate list.
+
+ for (unsigned i = 1; i <= word.length(); i++)
+ {
+ Quackle::LetterString newword;
+
+ for (int j = i - 1; j >= 0; j--)
+ newword.push_back(word[j]);
+
+ if (i < word.length())
+ {
+ newword.push_back(internalSeparatorRepresentation); // "^"
+ for (unsigned j = i; j < word.length(); j++)
+ newword.push_back(word[j]);
+ }
+ m_gaddagizedWords.push_back(newword);
+ }
+ return true;
+}
+
void GaddagFactory::hashWord(const Quackle::LetterString &word)
{
QCryptographicHash wordhash(QCryptographicHash::Md5);
@@ -93,6 +101,7 @@ void GaddagFactory::hashWord(const Quackle::LetterString &word)
void GaddagFactory::generate()
{
+ sort(m_gaddagizedWords.begin(), m_gaddagizedWords.end());
Quackle::WordList::const_iterator wordsEnd = m_gaddagizedWords.end();
for (Quackle::WordList::const_iterator wordsIt = m_gaddagizedWords.begin(); wordsIt != wordsEnd; ++wordsIt)
m_root.pushWord(*wordsIt);
@@ -100,13 +109,13 @@ void GaddagFactory::generate()
// m_root.pushWord(words);
}
-void GaddagFactory::writeIndex(const QString &fname)
+void GaddagFactory::writeIndex(const string &fname)
{
m_nodelist.push_back(&m_root);
m_root.print(m_nodelist);
- ofstream out(QuackleIO::Util::qstringToStdString(fname).c_str(), ios::out | ios::binary);
+ ofstream out(fname.c_str(), ios::out | ios::binary);
out.put(1); // GADDAG format version 1
out.write(m_hash.charptr, sizeof(m_hash.charptr));