From 909c37b77534b88eeafac7a03286692c31cbb1ef Mon Sep 17 00:00:00 2001 From: John Fultz Date: Tue, 18 Aug 2015 10:37:11 -0500 Subject: Migrate gaddag maker into quackleio. Prepping to build the gaddag maker into the quacker ui. Built a new class called GaddagFactory to do this and cleaned up the code a bit. makegaddag still builds exactly as it did before. --- quackleio/gaddagfactory.cpp | 166 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 166 insertions(+) create mode 100644 quackleio/gaddagfactory.cpp (limited to 'quackleio/gaddagfactory.cpp') diff --git a/quackleio/gaddagfactory.cpp b/quackleio/gaddagfactory.cpp new file mode 100644 index 0000000..3a608f0 --- /dev/null +++ b/quackleio/gaddagfactory.cpp @@ -0,0 +1,166 @@ +/* + * Quackle -- Crossword game artificial intelligence and analysis tool + * Copyright (C) 2005-2014 Jason Katz-Brown and John O'Laughlin. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + + +#include +#include + +#include "gaddagfactory.h" +#include "util.h" + +GaddagFactory::GaddagFactory(const QString& alphabetFile) +{ + QuackleIO::FlexibleAlphabetParameters *flexure = new QuackleIO::FlexibleAlphabetParameters; + flexure->load(alphabetFile); + alphas = flexure; + + // So the separator is sorted to last. + root.t = false; + root.c = QUACKLE_NULL_MARK; // "_" + root.pointer = 0; + root.lastchild = true; +} + +void GaddagFactory::pushWord(const QString& word) +{ + UVString originalString = QuackleIO::Util::qstringToString(word); + + UVString leftover; + Quackle::LetterString encodedWord = alphas->encode(originalString, &leftover); + if (leftover.empty()) + { + ++m_encodableWords; + + for (unsigned i = 1; i <= encodedWord.length(); i++) + { + Quackle::LetterString newword; + + for (int j = i - 1; j >= 0; j--) + newword.push_back(encodedWord[j]); + + if (i < encodedWord.length()) + { + newword.push_back(internalSeparatorRepresentation); // "^" + for (unsigned j = i; j < encodedWord.length(); j++) + newword.push_back(encodedWord[j]); + } + gaddagizedWords.push_back(newword); + } + } + else + { + UVcout << "not encodable without leftover: " << originalString << endl; + ++m_unencodableWords; + } +} + +void GaddagFactory::generate() +{ + Quackle::WordList::const_iterator wordsEnd = gaddagizedWords.end(); + for (Quackle::WordList::const_iterator wordsIt = gaddagizedWords.begin(); wordsIt != wordsEnd; ++wordsIt) + root.pushWord(*wordsIt); + // for (const auto& words : gaddaggizedWords) + // root.pushWord(words); +} + +void GaddagFactory::writeIndex(const QString& fname) +{ + nodelist.push_back(&root); + + root.print(nodelist, ""); + + ofstream out(QuackleIO::Util::qstringToStdString(fname).c_str(), ios::out | ios::binary); + + for (size_t i = 0; i < nodelist.size(); i++) + { + unsigned int p = (unsigned int)(nodelist[i]->pointer); + if (p != 0) + p -= i; // offset indexing + + char bytes[4]; + unsigned char n1 = (p & 0x00FF0000) >> 16; + unsigned char n2 = (p & 0x0000FF00) >> 8; + unsigned char n3 = (p & 0x000000FF) >> 0; + unsigned char n4; + + n4 = nodelist[i]->c; + if (n4 == internalSeparatorRepresentation) + n4 = QUACKLE_NULL_MARK; + + if (nodelist[i]->t) + n4 |= 64; + + if (nodelist[i]->lastchild) + n4 |= 128; + + bytes[0] = n1; bytes[1] = n2; bytes[2] = n3; bytes[3] = n4; + out.write(bytes, 4); + } +} + + +void GaddagFactory::Node::print(vector< Node* > nodelist, Quackle::LetterString prefix) +{ + if (children.size() > 0) + { + pointer = nodelist.size(); + children[children.size() - 1].lastchild = true; + } + + for (size_t i = 0; i < children.size(); i++) + nodelist.push_back(&children[i]); + + for (size_t i = 0; i < children.size(); i++) + children[i].print(nodelist, prefix + children[i].c); +} + + +void GaddagFactory::Node::pushWord(Quackle::LetterString word) +{ + if (word.length() == 0) + { + t = true; + return; + } + + Quackle::Letter first = Quackle::String::front(word); + Quackle::LetterString rest = Quackle::String::allButFront(word); + int index = -1; + + for (size_t i = 0; i < children.size(); i++) + { + if (children[i].c == first) + { + index = i; + i = children.size(); + } + } + + if (index == -1) + { + Node n; + n.c = first; + n.t = false; + n.pointer = 0; + n.lastchild = false; + children.push_back(n); + index = children.size() - 1; + } + + children[index].pushWord(rest); +} -- cgit v1.2.3 From 4ef5b33708a4ff0435d5c8254b860cd03a264c66 Mon Sep 17 00:00:00 2001 From: John Fultz Date: Thu, 20 Aug 2015 04:49:46 -0500 Subject: Bug fixes to GaddagFactory A few things from my last commit needed to be fixed or improved. --- gaddagize/.gitignore | 1 + makegaddag/makegaddag.cpp | 3 ++- quackle.sublime-project | 3 ++- quackleio/gaddagfactory.cpp | 24 ++++++++++++++---------- quackleio/gaddagfactory.h | 13 ++++++++++--- 5 files changed, 29 insertions(+), 15 deletions(-) (limited to 'quackleio/gaddagfactory.cpp') diff --git a/gaddagize/.gitignore b/gaddagize/.gitignore index f878785..3c70113 100644 --- a/gaddagize/.gitignore +++ b/gaddagize/.gitignore @@ -4,3 +4,4 @@ Makefile.Debug Makefile.Release debug release +gaddagize diff --git a/makegaddag/makegaddag.cpp b/makegaddag/makegaddag.cpp index b8fe276..dc38a5f 100644 --- a/makegaddag/makegaddag.cpp +++ b/makegaddag/makegaddag.cpp @@ -88,7 +88,8 @@ int main(int argc, char **argv) if (stream.atEnd()) break; - factory.pushWord(originalQString); + if (!factory.pushWord(originalQString)) + UVcout << "not encodable without leftover: " << QuackleIO::Util::qstringToString(originalQString) << endl; } UVcout << "Sorting " << factory.wordCount() << " words..." << endl; diff --git a/quackle.sublime-project b/quackle.sublime-project index a219eb1..2ca8db5 100644 --- a/quackle.sublime-project +++ b/quackle.sublime-project @@ -5,7 +5,8 @@ "path": ".", "file_exclude_patterns" : ["*.tgz", "*.sublime-workspace", ".tags*", "dawginput.raw", "playabilities.raw", "smaller.raw", ".gitattributes", - "*.Debug", "*.Release", "*.pfx", "*.cer"], + "*.Debug", "*.Release", "*.pfx", "*.cer", + "makegaddag", "makeminidawg", "gaddagize", "Makefile"], "folder_exclude_patterns" : ["obj", "moc", "build", "*.xcodeproj", "lib", "lexica", "strategy", "debug", "release", "makeswelexicon", "lisp", "DerivedData"] } diff --git a/quackleio/gaddagfactory.cpp b/quackleio/gaddagfactory.cpp index 3a608f0..e2c726d 100644 --- a/quackleio/gaddagfactory.cpp +++ b/quackleio/gaddagfactory.cpp @@ -36,7 +36,12 @@ GaddagFactory::GaddagFactory(const QString& alphabetFile) root.lastchild = true; } -void GaddagFactory::pushWord(const QString& word) +GaddagFactory::~GaddagFactory() +{ + delete alphas; +} + +bool GaddagFactory::pushWord(const QString& word) { UVString originalString = QuackleIO::Util::qstringToString(word); @@ -61,12 +66,11 @@ void GaddagFactory::pushWord(const QString& word) } gaddagizedWords.push_back(newword); } + return true; } - else - { - UVcout << "not encodable without leftover: " << originalString << endl; - ++m_unencodableWords; - } + + ++m_unencodableWords; + return false; } void GaddagFactory::generate() @@ -82,7 +86,7 @@ void GaddagFactory::writeIndex(const QString& fname) { nodelist.push_back(&root); - root.print(nodelist, ""); + root.print(nodelist); ofstream out(QuackleIO::Util::qstringToStdString(fname).c_str(), ios::out | ios::binary); @@ -114,7 +118,7 @@ void GaddagFactory::writeIndex(const QString& fname) } -void GaddagFactory::Node::print(vector< Node* > nodelist, Quackle::LetterString prefix) +void GaddagFactory::Node::print(vector< Node* >& nodelist) { if (children.size() > 0) { @@ -126,11 +130,11 @@ void GaddagFactory::Node::print(vector< Node* > nodelist, Quackle::LetterString nodelist.push_back(&children[i]); for (size_t i = 0; i < children.size(); i++) - children[i].print(nodelist, prefix + children[i].c); + children[i].print(nodelist); } -void GaddagFactory::Node::pushWord(Quackle::LetterString word) +void GaddagFactory::Node::pushWord(const Quackle::LetterString& word) { if (word.length() == 0) { diff --git a/quackleio/gaddagfactory.h b/quackleio/gaddagfactory.h index ca3bc40..9eb8d72 100644 --- a/quackleio/gaddagfactory.h +++ b/quackleio/gaddagfactory.h @@ -16,6 +16,9 @@ * along with this program. If not, see . */ +#ifndef QUACKLE_GADDAGFACTORY_H +#define QUACKLE_GADDAGFACTORY_H + #include "flexiblealphabet.h" @@ -25,13 +28,14 @@ public: static const Quackle::Letter internalSeparatorRepresentation = QUACKLE_FIRST_LETTER + QUACKLE_MAXIMUM_ALPHABET_SIZE; GaddagFactory(const QString& alphabetFile); + ~GaddagFactory(); int wordCount() const { return gaddagizedWords.size(); }; int nodeCount() const { return nodelist.size(); }; int encodableWords() const { return m_encodableWords; }; int unencodableWords() const { return m_unencodableWords; }; - void pushWord(const QString& word); + bool pushWord(const QString& word); void sortWords() { sort(gaddagizedWords.begin(), gaddagizedWords.end()); }; void generate(); void writeIndex(const QString& fname); @@ -44,8 +48,8 @@ private: vector children; int pointer; bool lastchild; - void pushWord(Quackle::LetterString word); - void print(vector< Node* > nodelist, Quackle::LetterString prefix); + void pushWord(const Quackle::LetterString& word); + void print(vector< Node* >& nodelist); }; int m_encodableWords; @@ -57,3 +61,6 @@ private: }; + +#endif + -- cgit v1.2.3 From 1f7b8ef6f96e1d5a2c50565a0f52cc633215e485 Mon Sep 17 00:00:00 2001 From: John Fultz Date: Mon, 24 Aug 2015 04:45:27 -0500 Subject: Version the GADDAGs. Basically the same thing I just did to the DAWG files, now done to GADDAGs. Also, add hashing, and make sure GADDAGs only load if their hash matches that of the DAWG files. --- lexiconparameters.cpp | 52 +++++++++++++++++++++++++++++--------- lexiconparameters.h | 15 +++++------ quacker/settings.cpp | 19 +++++++------- quackleio/dawgfactory.cpp | 1 + quackleio/gaddagfactory.cpp | 61 ++++++++++++++++++++++++++++++--------------- quackleio/gaddagfactory.h | 23 +++++++++-------- quackletest.cpp | 4 +-- test/testharness.cpp | 6 ++--- 8 files changed, 117 insertions(+), 64 deletions(-) (limited to 'quackleio/gaddagfactory.cpp') diff --git a/lexiconparameters.cpp b/lexiconparameters.cpp index ca09fa5..e014048 100644 --- a/lexiconparameters.cpp +++ b/lexiconparameters.cpp @@ -19,13 +19,14 @@ #include #include + #include "datamanager.h" #include "lexiconparameters.h" #include "uv.h" using namespace Quackle; -class Quackle::V0DawgInterpreter : public DawgInterpreter +class Quackle::V0LexiconInterpreter : public LexiconInterpreter { virtual void loadDawg(ifstream &file, LexiconParameters &lexparams) @@ -39,6 +40,17 @@ class Quackle::V0DawgInterpreter : public DawgInterpreter } } + virtual void loadGaddag(ifstream &file, LexiconParameters &lexparams) + { + int i = 0; + file.unget(); + while (!file.eof()) + { + file.read((char*)(lexparams.m_gaddag) + i, 4); + i += 4; + } + } + virtual void dawgAt(const unsigned char *dawg, int index, unsigned int &p, Letter &letter, bool &t, bool &lastchild, bool &british, int &playability) const { index *= 7; @@ -55,7 +67,7 @@ class Quackle::V0DawgInterpreter : public DawgInterpreter virtual int versionNumber() const { return 0; } }; -class Quackle::V1DawgInterpreter : public DawgInterpreter +class Quackle::V1LexiconInterpreter : public LexiconInterpreter { virtual void loadDawg(ifstream &file, LexiconParameters &lexparams) @@ -72,6 +84,24 @@ class Quackle::V1DawgInterpreter : public DawgInterpreter } } + virtual void loadGaddag(ifstream &file, LexiconParameters &lexparams) + { + char hash[16]; + file.read(hash, sizeof(hash)); + if (memcmp(hash, lexparams.m_hash, sizeof(hash))) + { + lexparams.unloadGaddag(); // don't use a mismatched gaddag + return; + } + + int i = 0; + while (!file.eof()) + { + file.read((char*)(lexparams.m_gaddag) + i, 4); + i += 4; + } + } + virtual void dawgAt(const unsigned char *dawg, int index, unsigned int &p, Letter &letter, bool &t, bool &lastchild, bool &british, int &playability) const { index *= 7; @@ -108,14 +138,14 @@ void LexiconParameters::unloadAll() void LexiconParameters::unloadDawg() { delete[] m_dawg; - m_dawg = 0; + m_dawg = NULL; delete m_interpreter; } void LexiconParameters::unloadGaddag() { delete[] m_gaddag; - m_gaddag = 0; + m_gaddag = NULL; } void LexiconParameters::loadDawg(const string &filename) @@ -133,10 +163,10 @@ void LexiconParameters::loadDawg(const string &filename) switch(versionByte) { case 0: - m_interpreter = new V0DawgInterpreter(); + m_interpreter = new V0LexiconInterpreter(); break; case 1: - m_interpreter = new V1DawgInterpreter(); + m_interpreter = new V1LexiconInterpreter(); break; default: UVcout << "couldn't open dawg " << filename.c_str() << endl; @@ -160,14 +190,12 @@ void LexiconParameters::loadGaddag(const string &filename) return; } + char versionByte = file.get(); + if (versionByte != m_interpreter->versionNumber()) + return; m_gaddag = new unsigned char[40000000]; - int i = 0; - while (!file.eof()) - { - file.read((char*)(m_gaddag) + i, 4); - i += 4; - } + m_interpreter->loadGaddag(file, *this); } string LexiconParameters::findDictionaryFile(const string &lexicon) diff --git a/lexiconparameters.h b/lexiconparameters.h index 4b6369d..04ad4e7 100644 --- a/lexiconparameters.h +++ b/lexiconparameters.h @@ -25,22 +25,23 @@ namespace Quackle { -class DawgInterpreter +class LexiconInterpreter { public: virtual void loadDawg(ifstream &file, LexiconParameters &lexparams) = 0; + virtual void loadGaddag(ifstream &file, LexiconParameters &lexparams) = 0; virtual void dawgAt(const unsigned char *dawg, int index, unsigned int &p, Letter &letter, bool &t, bool &lastchild, bool &british, int &playability) const = 0; virtual int versionNumber() const = 0; - virtual ~DawgInterpreter() {}; + virtual ~LexiconInterpreter() {}; }; -class V0DawgInterpreter; -class V1DawgInterpreter; +class V0LexiconInterpreter; +class V1LexiconInterpreter; class LexiconParameters { - friend class Quackle::V0DawgInterpreter; - friend class Quackle::V1DawgInterpreter; + friend class Quackle::V0LexiconInterpreter; + friend class Quackle::V1LexiconInterpreter; public: LexiconParameters(); @@ -79,7 +80,7 @@ protected: unsigned char *m_dawg; unsigned char *m_gaddag; string m_lexiconName; - DawgInterpreter *m_interpreter; + LexiconInterpreter *m_interpreter; char m_hash[16]; int m_wordcount; }; diff --git a/quacker/settings.cpp b/quacker/settings.cpp index 3c42a39..362e916 100644 --- a/quacker/settings.cpp +++ b/quacker/settings.cpp @@ -207,16 +207,6 @@ void Settings::setQuackleToUseLexiconName(const string &lexiconName) { QUACKLE_LEXICON_PARAMETERS->setLexiconName(lexiconName); - string gaddagFile = Quackle::LexiconParameters::findDictionaryFile(lexiconName + ".gaddag"); - - if (gaddagFile.empty()) - { - UVcout << "Gaddag for lexicon '" << lexiconName << "' does not exist." << endl; - QUACKLE_LEXICON_PARAMETERS->unloadGaddag(); - } - else - QUACKLE_LEXICON_PARAMETERS->loadGaddag(gaddagFile); - string dawgFile = Quackle::LexiconParameters::findDictionaryFile(lexiconName + ".dawg"); if (dawgFile.empty()) { @@ -226,6 +216,15 @@ void Settings::setQuackleToUseLexiconName(const string &lexiconName) else QUACKLE_LEXICON_PARAMETERS->loadDawg(dawgFile); + string gaddagFile = Quackle::LexiconParameters::findDictionaryFile(lexiconName + ".gaddag"); + if (gaddagFile.empty()) + { + UVcout << "Gaddag for lexicon '" << lexiconName << "' does not exist." << endl; + QUACKLE_LEXICON_PARAMETERS->unloadGaddag(); + } + else + QUACKLE_LEXICON_PARAMETERS->loadGaddag(gaddagFile); + QUACKLE_STRATEGY_PARAMETERS->initialize(lexiconName); } } diff --git a/quackleio/dawgfactory.cpp b/quackleio/dawgfactory.cpp index 6fb5be0..74b4346 100644 --- a/quackleio/dawgfactory.cpp +++ b/quackleio/dawgfactory.cpp @@ -138,6 +138,7 @@ void DawgFactory::writeIndex(const QString& filename) bytes[1] = (m_encodableWords & 0x0000FF00) >> 8; bytes[2] = (m_encodableWords & 0x000000FF); + out.put(1); // DAWG format version 1 out.write(m_hash.charptr, sizeof(m_hash.charptr)); out.write((char*)bytes, 3); diff --git a/quackleio/gaddagfactory.cpp b/quackleio/gaddagfactory.cpp index e2c726d..7f666cb 100644 --- a/quackleio/gaddagfactory.cpp +++ b/quackleio/gaddagfactory.cpp @@ -19,6 +19,7 @@ #include #include +#include #include "gaddagfactory.h" #include "util.h" @@ -27,18 +28,20 @@ GaddagFactory::GaddagFactory(const QString& alphabetFile) { QuackleIO::FlexibleAlphabetParameters *flexure = new QuackleIO::FlexibleAlphabetParameters; flexure->load(alphabetFile); - alphas = flexure; + m_alphas = flexure; // So the separator is sorted to last. - root.t = false; - root.c = QUACKLE_NULL_MARK; // "_" - root.pointer = 0; - root.lastchild = true; + m_root.t = false; + m_root.c = QUACKLE_NULL_MARK; // "_" + m_root.pointer = 0; + m_root.lastchild = true; + + m_hash.int32ptr[0] = m_hash.int32ptr[1] = m_hash.int32ptr[2] = m_hash.int32ptr[3] = 0; } GaddagFactory::~GaddagFactory() { - delete alphas; + delete m_alphas; } bool GaddagFactory::pushWord(const QString& word) @@ -46,10 +49,14 @@ bool GaddagFactory::pushWord(const QString& word) UVString originalString = QuackleIO::Util::qstringToString(word); UVString leftover; - Quackle::LetterString encodedWord = alphas->encode(originalString, &leftover); + Quackle::LetterString encodedWord = m_alphas->encode(originalString, &leftover); if (leftover.empty()) { ++m_encodableWords; + hashWord(encodedWord); + // FIXME: This hash will fail if duplicate words are passed in. + // But testing for duplicate words isn't so easy without keeping + // an entirely separate list. for (unsigned i = 1; i <= encodedWord.length(); i++) { @@ -64,7 +71,7 @@ bool GaddagFactory::pushWord(const QString& word) for (unsigned j = i; j < encodedWord.length(); j++) newword.push_back(encodedWord[j]); } - gaddagizedWords.push_back(newword); + m_gaddagizedWords.push_back(newword); } return true; } @@ -73,26 +80,40 @@ bool GaddagFactory::pushWord(const QString& word) return false; } +void GaddagFactory::hashWord(const Quackle::LetterString &word) +{ + QCryptographicHash wordhash(QCryptographicHash::Md5); + wordhash.addData(word.constData(), word.length()); + QByteArray wordhashbytes = wordhash.result(); + m_hash.int32ptr[0] ^= ((const int32_t*)wordhashbytes.constData())[0]; + m_hash.int32ptr[1] ^= ((const int32_t*)wordhashbytes.constData())[1]; + m_hash.int32ptr[2] ^= ((const int32_t*)wordhashbytes.constData())[2]; + m_hash.int32ptr[3] ^= ((const int32_t*)wordhashbytes.constData())[3]; +} + void GaddagFactory::generate() { - Quackle::WordList::const_iterator wordsEnd = gaddagizedWords.end(); - for (Quackle::WordList::const_iterator wordsIt = gaddagizedWords.begin(); wordsIt != wordsEnd; ++wordsIt) - root.pushWord(*wordsIt); + Quackle::WordList::const_iterator wordsEnd = m_gaddagizedWords.end(); + for (Quackle::WordList::const_iterator wordsIt = m_gaddagizedWords.begin(); wordsIt != wordsEnd; ++wordsIt) + m_root.pushWord(*wordsIt); // for (const auto& words : gaddaggizedWords) - // root.pushWord(words); + // m_root.pushWord(words); } -void GaddagFactory::writeIndex(const QString& fname) +void GaddagFactory::writeIndex(const QString &fname) { - nodelist.push_back(&root); + m_nodelist.push_back(&m_root); - root.print(nodelist); + m_root.print(m_nodelist); ofstream out(QuackleIO::Util::qstringToStdString(fname).c_str(), ios::out | ios::binary); - for (size_t i = 0; i < nodelist.size(); i++) + out.put(1); // GADDAG format version 1 + out.write(m_hash.charptr, sizeof(m_hash.charptr)); + + for (size_t i = 0; i < m_nodelist.size(); i++) { - unsigned int p = (unsigned int)(nodelist[i]->pointer); + unsigned int p = (unsigned int)(m_nodelist[i]->pointer); if (p != 0) p -= i; // offset indexing @@ -102,14 +123,14 @@ void GaddagFactory::writeIndex(const QString& fname) unsigned char n3 = (p & 0x000000FF) >> 0; unsigned char n4; - n4 = nodelist[i]->c; + n4 = m_nodelist[i]->c; if (n4 == internalSeparatorRepresentation) n4 = QUACKLE_NULL_MARK; - if (nodelist[i]->t) + if (m_nodelist[i]->t) n4 |= 64; - if (nodelist[i]->lastchild) + if (m_nodelist[i]->lastchild) n4 |= 128; bytes[0] = n1; bytes[1] = n2; bytes[2] = n3; bytes[3] = n4; diff --git a/quackleio/gaddagfactory.h b/quackleio/gaddagfactory.h index 9eb8d72..2d21192 100644 --- a/quackleio/gaddagfactory.h +++ b/quackleio/gaddagfactory.h @@ -30,13 +30,14 @@ public: GaddagFactory(const QString& alphabetFile); ~GaddagFactory(); - int wordCount() const { return gaddagizedWords.size(); }; - int nodeCount() const { return nodelist.size(); }; + int wordCount() const { return m_gaddagizedWords.size(); }; + int nodeCount() const { return m_nodelist.size(); }; int encodableWords() const { return m_encodableWords; }; int unencodableWords() const { return m_unencodableWords; }; bool pushWord(const QString& word); - void sortWords() { sort(gaddagizedWords.begin(), gaddagizedWords.end()); }; + void hashWord(const Quackle::LetterString &word); + void sortWords() { sort(m_gaddagizedWords.begin(), m_gaddagizedWords.end()); }; void generate(); void writeIndex(const QString& fname); @@ -49,17 +50,19 @@ private: int pointer; bool lastchild; void pushWord(const Quackle::LetterString& word); - void print(vector< Node* >& nodelist); + void print(vector< Node* >& m_nodelist); }; int m_encodableWords; int m_unencodableWords; - Quackle::WordList gaddagizedWords; - vector< Node* > nodelist; - Quackle::AlphabetParameters *alphas; - Node root; - - + Quackle::WordList m_gaddagizedWords; + vector< Node* > m_nodelist; + Quackle::AlphabetParameters *m_alphas; + Node m_root; + union { + char charptr[16]; + int32_t int32ptr[4]; + } m_hash; }; #endif diff --git a/quackletest.cpp b/quackletest.cpp index e69c2cb..7ea5d10 100644 --- a/quackletest.cpp +++ b/quackletest.cpp @@ -47,7 +47,7 @@ int main() dataManager.setAppDataDirectory("data"); dataManager.lexiconParameters()->loadDawg(Quackle::LexiconParameters::findDictionaryFile("twl06.dawg")); - dataManager.lexiconParameters()->loadGaddag(Quackle::LexiconParameters::findDictionaryFile("twl06.gaddag")); + dataManager.lexiconParameters()->loadGaddag(Quackle::LexiconParameters::findDictionaryFile("twl06.gaddag")); dataManager.strategyParameters()->initialize("twl06"); dataManager.setBoardParameters(new Quackle::EnglishBoard()); @@ -58,7 +58,7 @@ int main() const int gameCnt = 1000; //const int gameCnt = 1; for (int game = 0; game < gameCnt; ++game) { - testGame(); + testGame(); } return 0; diff --git a/test/testharness.cpp b/test/testharness.cpp index 683443f..3f390c1 100644 --- a/test/testharness.cpp +++ b/test/testharness.cpp @@ -207,13 +207,13 @@ void TestHarness::startUp() m_dataManager.setBoardParameters(new ScrabbleBoard()); - m_dataManager.lexiconParameters()->loadGaddag(Quackle::LexiconParameters::findDictionaryFile(QuackleIO::Util::qstringToStdString(m_lexicon + ".gaddag"))); + m_dataManager.lexiconParameters()->loadDawg(Quackle::LexiconParameters::findDictionaryFile(QuackleIO::Util::qstringToStdString(m_lexicon + ".dawg"))); UVcout << "."; - m_dataManager.lexiconParameters()->loadDawg(Quackle::LexiconParameters::findDictionaryFile(QuackleIO::Util::qstringToStdString(m_lexicon + ".dawg"))); + m_dataManager.lexiconParameters()->loadGaddag(Quackle::LexiconParameters::findDictionaryFile(QuackleIO::Util::qstringToStdString(m_lexicon + ".gaddag"))); + UVcout << "."; m_dataManager.strategyParameters()->initialize(QuackleIO::Util::qstringToStdString(m_lexicon)); - UVcout << "."; UVcout << endl; -- cgit v1.2.3 From 5350a57f1be22b28914fca14225c73dac5b30b24 Mon Sep 17 00:00:00 2001 From: John Fultz Date: Mon, 7 Sep 2015 14:19:46 -0500 Subject: Auto-generate gaddags Need to add a user interface, but gaddags are now auto-generated if they can't be found. Some specific improvements here: * FixedLengthString gained a pop_back member. * Add code to allow v1 gaddags and v0 dawgs to work together. * Change memory allocation of dawgs and gaddags to be dynamic (the old limit didn't accommodate the ridiculously large Polish dictionary in the gaddag) * The Settings class now knows a bit about generating gaddags. This will be important for giving UI feedback. * Fixed several places using filenames which should be using string, not UVString. * Dawg/GaddagFactory should have been using UVString, not QString. My misunderstanding. --- fixedstring.h | 8 +++++ lexiconparameters.cpp | 81 +++++++++++++++++++++++++++++------------ lexiconparameters.h | 6 ++-- quacker/settings.cpp | 86 ++++++++++++++++++++++++++++++++++---------- quacker/settings.h | 9 +++-- quackleio/dawgfactory.cpp | 14 ++++---- quackleio/dawgfactory.h | 6 ++-- quackleio/flexiblealphabet.h | 2 -- quackleio/gaddagfactory.cpp | 71 ++++++++++++++++++++---------------- quackleio/gaddagfactory.h | 7 ++-- 10 files changed, 197 insertions(+), 93 deletions(-) (limited to 'quackleio/gaddagfactory.cpp') diff --git a/fixedstring.h b/fixedstring.h index e8db0bf..a31ecd6 100644 --- a/fixedstring.h +++ b/fixedstring.h @@ -54,6 +54,7 @@ class FixedLengthString size_type size() const { return length(); } void clear() { m_end = m_data; } void push_back(char c); + void pop_back(); const char* constData() const { return m_data; } int compare(const FixedLengthString& s) const; @@ -221,6 +222,13 @@ FixedLengthString::push_back(char c) *this += c; } +inline void +FixedLengthString::pop_back() +{ + assert(size() > 0); + m_end--; +} + inline int FixedLengthString::compare(const FixedLengthString& s) const { diff --git a/lexiconparameters.cpp b/lexiconparameters.cpp index f6e646b..6761fc1 100644 --- a/lexiconparameters.cpp +++ b/lexiconparameters.cpp @@ -32,7 +32,6 @@ class Quackle::V0LexiconInterpreter : public LexiconInterpreter virtual void loadDawg(ifstream &file, LexiconParameters &lexparams) { int i = 0; - file.unget(); // version 0 doesn't have a version byte...it's just the node byte which is always set to 0 while (!file.eof()) { file.read((char*)(lexparams.m_dawg) + i, 7); @@ -43,7 +42,6 @@ class Quackle::V0LexiconInterpreter : public LexiconInterpreter virtual void loadGaddag(ifstream &file, LexiconParameters &lexparams) { int i = 0; - file.unget(); while (!file.eof()) { file.read((char*)(lexparams.m_gaddag) + i, 4); @@ -74,6 +72,7 @@ class Quackle::V1LexiconInterpreter : public LexiconInterpreter { int i = 0; unsigned char bytes[3]; + file.get(); // skip past version byte file.read(lexparams.m_hash, sizeof(lexparams.m_hash)); file.read((char*)bytes, 3); lexparams.m_wordcount = (bytes[0] << 16) | (bytes[1] << 8) | bytes[2]; @@ -87,14 +86,22 @@ class Quackle::V1LexiconInterpreter : public LexiconInterpreter virtual void loadGaddag(ifstream &file, LexiconParameters &lexparams) { char hash[16]; + file.get(); // skip past version byte file.read(hash, sizeof(hash)); if (memcmp(hash, lexparams.m_hash, sizeof(hash))) { - lexparams.unloadGaddag(); // don't use a mismatched gaddag - return; + // If we're using a v0 DAWG, then ignore the hash + for (size_t i = 0; i < sizeof(lexparams.m_hash); i++) + { + if (lexparams.m_hash[0] != 0) + { + lexparams.unloadGaddag(); // don't use a mismatched gaddag + return; + } + } } - int i = 0; + size_t i = 0; while (!file.eof()) { file.read((char*)(lexparams.m_gaddag) + i, 4); @@ -160,20 +167,16 @@ void LexiconParameters::loadDawg(const string &filename) } char versionByte = file.get(); - switch(versionByte) + m_interpreter = createInterpreter(versionByte); + if (m_interpreter == NULL) { - case 0: - m_interpreter = new V0LexiconInterpreter(); - break; - case 1: - m_interpreter = new V1LexiconInterpreter(); - break; - default: - UVcout << "couldn't open dawg " << filename.c_str() << endl; - return; + UVcout << "couldn't open file " << filename.c_str() << endl; + return; } - m_dawg = new unsigned char[7000000]; + file.seekg(0, ios_base::end); + m_dawg = new unsigned char[file.tellg()]; + file.seekg(0, ios_base::beg); m_interpreter->loadDawg(file, *this); } @@ -191,19 +194,53 @@ void LexiconParameters::loadGaddag(const string &filename) } char versionByte = file.get(); - if (versionByte != m_interpreter->versionNumber()) + if (versionByte < m_interpreter->versionNumber()) return; - m_gaddag = new unsigned char[40000000]; + file.seekg(0, ios_base::end); + m_gaddag = new unsigned char[file.tellg()]; + file.seekg(0, ios_base::beg); - m_interpreter->loadGaddag(file, *this); + // must create a local interpreter because dawg/gaddag versions might not match + LexiconInterpreter* interpreter = createInterpreter(versionByte); + if (interpreter != NULL) + { + interpreter->loadGaddag(file, *this); + delete interpreter; + } + else + unloadGaddag(); } string LexiconParameters::findDictionaryFile(const string &lexicon) { - return DataManager::self()->findDataFile("lexica", lexicon); + return QUACKLE_DATAMANAGER->findDataFile("lexica", lexicon); +} + +UVString LexiconParameters::hashString(bool shortened) const +{ + const char hex[] = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' }; + string hashStr; + for (size_t i = 0; i < sizeof(m_hash); i++) + { + hashStr.push_back(hex[(m_hash[i] & 0xF0) >> 4]); + hashStr.push_back(hex[m_hash[i] & 0x0F]); + if (shortened && i == 5) + break; + if (i % 2 == 1) + hashStr.push_back('-'); + } + return hashStr; } -QString hashString() const +LexiconInterpreter* LexiconParameters::createInterpreter(char version) const { - return QString(QByteArray(m_hash, sizeof(m_hash)).toHex()); + switch(version) + { + case 0: + return new V0LexiconInterpreter(); + case 1: + return new V1LexiconInterpreter(); + default: + return NULL; + } } diff --git a/lexiconparameters.h b/lexiconparameters.h index b5bc564..3890d8d 100644 --- a/lexiconparameters.h +++ b/lexiconparameters.h @@ -19,8 +19,6 @@ #ifndef QUACKLE_LEXICONPARAMETERS_H #define QUACKLE_LEXICONPARAMETERS_H -#include -#include "alphabetparameters.h" #include "gaddag.h" namespace Quackle @@ -77,7 +75,7 @@ public: } const GaddagNode *gaddagRoot() const { return (GaddagNode *) &m_gaddag[0]; }; - QString hashString() const; + UVString hashString(bool shortened) const; protected: unsigned char *m_dawg; @@ -86,6 +84,8 @@ protected: LexiconInterpreter *m_interpreter; char m_hash[16]; int m_wordcount; + + LexiconInterpreter* createInterpreter(char version) const; }; } diff --git a/quacker/settings.cpp b/quacker/settings.cpp index 362e916..1febdb5 100644 --- a/quacker/settings.cpp +++ b/quacker/settings.cpp @@ -93,6 +93,8 @@ Settings::Settings(QWidget *parent) m_appDataDir = directory.absolutePath(); } m_userDataDir = QDesktopServices::storageLocation(QDesktopServices::DataLocation); + QDir qdir(m_userDataDir); + qdir.mkpath("lexica"); } void Settings::createGUI() @@ -195,49 +197,97 @@ void Settings::initialize() if (lexiconName == "cswfeb07") lexiconName = "cswapr07"; - setQuackleToUseLexiconName(QuackleIO::Util::qstringToStdString(lexiconName)); - setQuackleToUseAlphabetName(QuackleIO::Util::qstringToStdString(settings.value("quackle/settings/alphabet-name", QString("english")).toString())); + setQuackleToUseLexiconName(lexiconName); + setQuackleToUseAlphabetName(settings.value("quackle/settings/alphabet-name", QString("english")).toString()); setQuackleToUseThemeName(settings.value("quackle/settings/theme-name", QString("traditional")).toString()); setQuackleToUseBoardName(settings.value("quackle/settings/board-name", QString("")).toString()); } -void Settings::setQuackleToUseLexiconName(const string &lexiconName) +void Settings::buildGaddag(const string &filename) { - if (QUACKLE_LEXICON_PARAMETERS->lexiconName() != lexiconName) + GaddagFactory factory((UVString())); + Quackle::LetterString word; + + pushIndex(factory, word, 1); + factory.generate(); + factory.writeIndex(filename); +} + +void Settings::pushIndex(GaddagFactory &factory, Quackle::LetterString &word, int index) +{ + unsigned int p; + Quackle::Letter letter; + bool t; + bool lastchild; + bool british; + int playability; + + do + { + QUACKLE_LEXICON_PARAMETERS->dawgAt(index, p, letter, t, lastchild, british, playability); + word.push_back(letter); + if (t) + factory.pushWord(word); + if (p) + pushIndex(factory, word, p); + index++; + word.pop_back(); + } while (!lastchild); +} + + +void Settings::setQuackleToUseLexiconName(const QString &lexiconName) +{ + string lexiconNameStr = lexiconName.toStdString(); + if (QUACKLE_LEXICON_PARAMETERS->lexiconName() != lexiconNameStr) { - QUACKLE_LEXICON_PARAMETERS->setLexiconName(lexiconName); + QUACKLE_LEXICON_PARAMETERS->setLexiconName(lexiconNameStr); - string dawgFile = Quackle::LexiconParameters::findDictionaryFile(lexiconName + ".dawg"); + string dawgFile = Quackle::LexiconParameters::findDictionaryFile(lexiconNameStr + ".dawg"); if (dawgFile.empty()) { - UVcout << "Dawg for lexicon '" << lexiconName << "' does not exist." << endl; + UVcout << "Dawg for lexicon '" << lexiconNameStr << "' does not exist." << endl; QUACKLE_LEXICON_PARAMETERS->unloadDawg(); } else QUACKLE_LEXICON_PARAMETERS->loadDawg(dawgFile); - string gaddagFile = Quackle::LexiconParameters::findDictionaryFile(lexiconName + ".gaddag"); + if (!QUACKLE_LEXICON_PARAMETERS->hasDawg()) + { + QUACKLE_LEXICON_PARAMETERS->unloadGaddag(); + return; + } + + string gaddagFile = Quackle::LexiconParameters::findDictionaryFile(lexiconNameStr + ".gaddag"); if (gaddagFile.empty()) { - UVcout << "Gaddag for lexicon '" << lexiconName << "' does not exist." << endl; + UVcout << "Gaddag for lexicon '" << lexiconNameStr << "' does not exist." << endl; QUACKLE_LEXICON_PARAMETERS->unloadGaddag(); } else QUACKLE_LEXICON_PARAMETERS->loadGaddag(gaddagFile); - QUACKLE_STRATEGY_PARAMETERS->initialize(lexiconName); + if (!QUACKLE_LEXICON_PARAMETERS->hasGaddag()) + { + gaddagFile = QUACKLE_DATAMANAGER->makeDataFilename("lexica", lexiconNameStr + ".gaddag", true); + buildGaddag(gaddagFile); + QUACKLE_LEXICON_PARAMETERS->loadGaddag(gaddagFile); + } + + QUACKLE_STRATEGY_PARAMETERS->initialize(lexiconNameStr); } } -void Settings::setQuackleToUseAlphabetName(const string &alphabetName) +void Settings::setQuackleToUseAlphabetName(const QString &alphabetName) { - if (QUACKLE_ALPHABET_PARAMETERS->alphabetName() != alphabetName) + string alphabetNameStr = alphabetName.toStdString(); + if (QUACKLE_ALPHABET_PARAMETERS->alphabetName() != alphabetNameStr) { - QString alphabetFile = QuackleIO::Util::stdStringToQString(Quackle::AlphabetParameters::findAlphabetFile(alphabetName + ".quackle_alphabet")); + QString alphabetFileStr = QuackleIO::Util::stdStringToQString(Quackle::AlphabetParameters::findAlphabetFile(alphabetNameStr + ".quackle_alphabet")); QuackleIO::FlexibleAlphabetParameters *flexure = new QuackleIO::FlexibleAlphabetParameters; - flexure->setAlphabetName(alphabetName); - if (flexure->load(alphabetFile)) + flexure->setAlphabetName(alphabetNameStr); + if (flexure->load(alphabetFileStr)) { if (flexure->length() != QUACKLE_ALPHABET_PARAMETERS->length() && QUACKLE_ALPHABET_PARAMETERS->alphabetName() != "default") { @@ -295,8 +345,7 @@ void Settings::lexiconChanged(const QString &lexiconName) editLexicon(); return; } - string lexiconNameString = QuackleIO::Util::qstringToStdString(lexiconName); - setQuackleToUseLexiconName(lexiconNameString); + setQuackleToUseLexiconName(lexiconName); CustomQSettings settings; settings.setValue("quackle/settings/lexicon-name", lexiconName); @@ -311,8 +360,7 @@ void Settings::alphabetChanged(const QString &alphabetName) editAlphabet(); return; } - string alphabetNameString = QuackleIO::Util::qstringToStdString(alphabetName); - setQuackleToUseAlphabetName(alphabetNameString); + setQuackleToUseAlphabetName(alphabetName); CustomQSettings settings; settings.setValue("quackle/settings/alphabet-name", alphabetName); diff --git a/quacker/settings.h b/quacker/settings.h index cee0562..fab2f3f 100644 --- a/quacker/settings.h +++ b/quacker/settings.h @@ -24,6 +24,8 @@ #include #include +#include "quackleio/gaddagfactory.h" + class QComboBox; class QCheckBox; class QPushButton; @@ -72,8 +74,8 @@ protected slots: void editAlphabet(); void editTheme(); - void setQuackleToUseLexiconName(const string &lexiconName); - void setQuackleToUseAlphabetName(const string &alphabetName); + void setQuackleToUseLexiconName(const QString &lexiconName); + void setQuackleToUseAlphabetName(const QString &alphabetName); void setQuackleToUseThemeName(const QString &themeName); void setQuackleToUseBoardName(const QString &lexiconName); @@ -94,6 +96,9 @@ private: // populate the popup based on what's in QSettings void loadBoardNameCombo(); + void buildGaddag(const string &filename); + void pushIndex(GaddagFactory &factory, Quackle::LetterString &word, int index); + static Settings *m_self; }; diff --git a/quackleio/dawgfactory.cpp b/quackleio/dawgfactory.cpp index 74b4346..3a971a3 100644 --- a/quackleio/dawgfactory.cpp +++ b/quackleio/dawgfactory.cpp @@ -25,10 +25,10 @@ #include "util.h" -DawgFactory::DawgFactory(const QString& alphabetFile) +DawgFactory::DawgFactory(const UVString& alphabetFile) { QuackleIO::FlexibleAlphabetParameters *flexure = new QuackleIO::FlexibleAlphabetParameters; - flexure->load(alphabetFile); + flexure->load(QuackleIO::Util::uvStringToQString(alphabetFile)); m_alphas = flexure; m_root.insmallerdict = false; @@ -45,12 +45,10 @@ DawgFactory::~DawgFactory() delete m_alphas; } -bool DawgFactory::pushWord(const QString& word, bool inSmaller, int playability) +bool DawgFactory::pushWord(const UVString& word, bool inSmaller, int playability) { - UVString originalString = QuackleIO::Util::qstringToString(word); - UVString leftover; - Quackle::LetterString encodedWord = m_alphas->encode(originalString, &leftover); + Quackle::LetterString encodedWord = m_alphas->encode(word, &leftover); if (leftover.empty()) { if (m_root.pushWord(encodedWord, inSmaller, playability)) @@ -129,9 +127,9 @@ void DawgFactory::generate() m_root.print(m_nodelist); } -void DawgFactory::writeIndex(const QString& filename) +void DawgFactory::writeIndex(const UVString& filename) { - ofstream out(QuackleIO::Util::qstringToStdString(filename).c_str(), ios::out | ios::binary); + ofstream out(filename.c_str(), ios::out | ios::binary); unsigned char bytes[7]; bytes[0] = (m_encodableWords & 0x00FF0000) >> 16; diff --git a/quackleio/dawgfactory.h b/quackleio/dawgfactory.h index 23bb4f5..051e632 100644 --- a/quackleio/dawgfactory.h +++ b/quackleio/dawgfactory.h @@ -26,7 +26,7 @@ class DawgFactory { public: - DawgFactory(const QString& alphabetFile); + DawgFactory(const UVString& alphabetFile); ~DawgFactory(); int wordCount() const { return m_root.wordCount(); }; @@ -35,10 +35,10 @@ public: int unencodableWords() const { return m_unencodableWords; }; int duplicateWords() const { return m_duplicateWords; }; - bool pushWord(const QString& word, bool inSmaller, int playability); + bool pushWord(const UVString& word, bool inSmaller, int playability); void hashWord(const Quackle::LetterString &word); void generate(); - void writeIndex(const QString& fname); + void writeIndex(const UVString& filename); const char* hashBytes() { return m_hash.charptr; }; diff --git a/quackleio/flexiblealphabet.h b/quackleio/flexiblealphabet.h index 89bd1f4..d5db68a 100644 --- a/quackleio/flexiblealphabet.h +++ b/quackleio/flexiblealphabet.h @@ -21,8 +21,6 @@ #include "alphabetparameters.h" -class QString; - namespace QuackleIO { diff --git a/quackleio/gaddagfactory.cpp b/quackleio/gaddagfactory.cpp index 7f666cb..53ccf04 100644 --- a/quackleio/gaddagfactory.cpp +++ b/quackleio/gaddagfactory.cpp @@ -24,11 +24,15 @@ #include "gaddagfactory.h" #include "util.h" -GaddagFactory::GaddagFactory(const QString& alphabetFile) +GaddagFactory::GaddagFactory(const UVString &alphabetFile) + : m_encodableWords(0), m_unencodableWords(0), m_alphas(NULL) { - QuackleIO::FlexibleAlphabetParameters *flexure = new QuackleIO::FlexibleAlphabetParameters; - flexure->load(alphabetFile); - m_alphas = flexure; + if (!alphabetFile.empty()) + { + QuackleIO::FlexibleAlphabetParameters *flexure = new QuackleIO::FlexibleAlphabetParameters; + flexure->load(QuackleIO::Util::uvStringToQString(alphabetFile)); + m_alphas = flexure; + } // So the separator is sorted to last. m_root.t = false; @@ -44,35 +48,13 @@ GaddagFactory::~GaddagFactory() delete m_alphas; } -bool GaddagFactory::pushWord(const QString& word) +bool GaddagFactory::pushWord(const UVString &word) { - UVString originalString = QuackleIO::Util::qstringToString(word); - UVString leftover; - Quackle::LetterString encodedWord = m_alphas->encode(originalString, &leftover); + Quackle::LetterString encodedWord = m_alphas->encode(word, &leftover); if (leftover.empty()) { - ++m_encodableWords; - hashWord(encodedWord); - // FIXME: This hash will fail if duplicate words are passed in. - // But testing for duplicate words isn't so easy without keeping - // an entirely separate list. - - for (unsigned i = 1; i <= encodedWord.length(); i++) - { - Quackle::LetterString newword; - - for (int j = i - 1; j >= 0; j--) - newword.push_back(encodedWord[j]); - - if (i < encodedWord.length()) - { - newword.push_back(internalSeparatorRepresentation); // "^" - for (unsigned j = i; j < encodedWord.length(); j++) - newword.push_back(encodedWord[j]); - } - m_gaddagizedWords.push_back(newword); - } + pushWord(encodedWord); return true; } @@ -80,6 +62,32 @@ bool GaddagFactory::pushWord(const QString& word) return false; } +bool GaddagFactory::pushWord(const Quackle::LetterString &word) +{ + ++m_encodableWords; + hashWord(word); + // FIXME: This hash will fail if duplicate words are passed in. + // But testing for duplicate words isn't so easy without keeping + // an entirely separate list. + + for (unsigned i = 1; i <= word.length(); i++) + { + Quackle::LetterString newword; + + for (int j = i - 1; j >= 0; j--) + newword.push_back(word[j]); + + if (i < word.length()) + { + newword.push_back(internalSeparatorRepresentation); // "^" + for (unsigned j = i; j < word.length(); j++) + newword.push_back(word[j]); + } + m_gaddagizedWords.push_back(newword); + } + return true; +} + void GaddagFactory::hashWord(const Quackle::LetterString &word) { QCryptographicHash wordhash(QCryptographicHash::Md5); @@ -93,6 +101,7 @@ void GaddagFactory::hashWord(const Quackle::LetterString &word) void GaddagFactory::generate() { + sort(m_gaddagizedWords.begin(), m_gaddagizedWords.end()); Quackle::WordList::const_iterator wordsEnd = m_gaddagizedWords.end(); for (Quackle::WordList::const_iterator wordsIt = m_gaddagizedWords.begin(); wordsIt != wordsEnd; ++wordsIt) m_root.pushWord(*wordsIt); @@ -100,13 +109,13 @@ void GaddagFactory::generate() // m_root.pushWord(words); } -void GaddagFactory::writeIndex(const QString &fname) +void GaddagFactory::writeIndex(const string &fname) { m_nodelist.push_back(&m_root); m_root.print(m_nodelist); - ofstream out(QuackleIO::Util::qstringToStdString(fname).c_str(), ios::out | ios::binary); + ofstream out(fname.c_str(), ios::out | ios::binary); out.put(1); // GADDAG format version 1 out.write(m_hash.charptr, sizeof(m_hash.charptr)); diff --git a/quackleio/gaddagfactory.h b/quackleio/gaddagfactory.h index 03cb546..415baff 100644 --- a/quackleio/gaddagfactory.h +++ b/quackleio/gaddagfactory.h @@ -27,7 +27,7 @@ public: static const Quackle::Letter internalSeparatorRepresentation = QUACKLE_FIRST_LETTER + QUACKLE_MAXIMUM_ALPHABET_SIZE; - GaddagFactory(const QString& alphabetFile); + GaddagFactory(const UVString &alphabetFile); ~GaddagFactory(); int wordCount() const { return m_gaddagizedWords.size(); }; @@ -35,11 +35,12 @@ public: int encodableWords() const { return m_encodableWords; }; int unencodableWords() const { return m_unencodableWords; }; - bool pushWord(const QString& word); + bool pushWord(const UVString &word); + bool pushWord(const Quackle::LetterString &word); void hashWord(const Quackle::LetterString &word); void sortWords() { sort(m_gaddagizedWords.begin(), m_gaddagizedWords.end()); }; void generate(); - void writeIndex(const QString& fname); + void writeIndex(const string &fname); const char* hashBytes() { return m_hash.charptr; }; -- cgit v1.2.3