From 6613f3fd45b4ecf6821ee7bb07c95f86f43b0db2 Mon Sep 17 00:00:00 2001 From: John Fultz Date: Thu, 20 Aug 2015 04:59:36 -0500 Subject: Move DAWG generation into quackleio. Same thing I just did for the gaddag code I'm now doing for the dawg code. While I was at it, I made some improvements to the dawg code... * Instead of adding multiple cross-checks for various kinds of node metadata, there's now only one cross- check...a hash applied to each node. * Some useless variables/members have been excised. * Add ability to do a word count (cryptohash coming soon). * Make it possible to call generate() and writeIndex() multiple times without corrupting the dictionary. --- quackleio/dawgfactory.cpp | 282 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 282 insertions(+) create mode 100644 quackleio/dawgfactory.cpp (limited to 'quackleio/dawgfactory.cpp') diff --git a/quackleio/dawgfactory.cpp b/quackleio/dawgfactory.cpp new file mode 100644 index 0000000..8a37766 --- /dev/null +++ b/quackleio/dawgfactory.cpp @@ -0,0 +1,282 @@ +/* + * Quackle -- Crossword game artificial intelligence and analysis tool + * Copyright (C) 2005-2014 Jason Katz-Brown and John O'Laughlin. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + + +#include +#include + +#include "dawgfactory.h" +#include "util.h" + + +DawgFactory::DawgFactory(const QString& alphabetFile) +{ + QuackleIO::FlexibleAlphabetParameters *flexure = new QuackleIO::FlexibleAlphabetParameters; + flexure->load(alphabetFile); + alphas = flexure; + + root.t = false; + root.insmallerdict = false; + root.playability = 0; + root.c = QUACKLE_BLANK_MARK; + root.pointer = 0; + root.lastchild = true; +} + +DawgFactory::~DawgFactory() +{ + delete alphas; +} + +bool DawgFactory::pushWord(const QString& word, bool inSmaller, int playability) +{ + UVString originalString = QuackleIO::Util::qstringToString(word); + + UVString leftover; + Quackle::LetterString encodedWord = alphas->encode(originalString, &leftover); + if (leftover.empty()) + { + ++m_encodableWords; + root.pushWord(encodedWord, inSmaller, playability); + return true; + } + + ++m_unencodableWords; + return false; +} + +void DawgFactory::generate() +{ + const int bucketcount = 2000; + vector< int > bucket[bucketcount]; + + nodelist.clear(); + nodelist.push_back(&root); + root.print(nodelist); + + nodelist[0]->letterSum(); + + for (unsigned int i = 0; i < nodelist.size(); i++) + { + bucket[nodelist[i]->sum % bucketcount].push_back(i); + nodelist[i]->pointer = 0; + nodelist[i]->written = false; + nodelist[i]->deleted = false; + nodelist[i]->cloneof = NULL; + } + + for (int b = 0; b < bucketcount; b++) + { + if (bucket[b].size() == 0) + continue; + for (vector::iterator it = bucket[b].begin(); it != bucket[b].end() - 1; it++) + { + if (!nodelist[(*it)]->deleted) + { + for (vector::iterator jt = it + 1; jt != bucket[b].end(); jt++) + { + if (!nodelist[(*jt)]->deleted) + { + // cout << "Comparing " << (*it) << " and " << (*jt) << endl; + if (nodelist[(*it)]->equals(nodelist[(*jt)][0])) + { + //cout << "Hey! " << (*it) << " == " << (*jt) << endl; + // ones[l].erase(jt); + nodelist[(*jt)]->deleted = true; + nodelist[(*jt)]->cloneof = nodelist[(*it)]; + } + } + } + } + } + } + + nodelist.clear(); + nodelist.push_back(&root); + root.print(nodelist); +} + +void DawgFactory::writeIndex(const QString& filename) +{ + ofstream out(QuackleIO::Util::qstringToStdString(filename).c_str(), ios::out | ios::binary); + + for (unsigned int i = 0; i < nodelist.size(); i++) { + //cout << nodelist[i]->c << " " << nodelist[i]->pointer << " " << nodelist[i]->t << " " << nodelist[i]->lastchild << endl; + Node* n = nodelist[i]; + unsigned int p; + if (nodelist[i]->deleted) + { + p = (unsigned int)(nodelist[i]->cloneof->pointer); + // n = nodelist[i]->cloneof; + } + else + p = (unsigned int)(nodelist[i]->pointer); + + char bytes[7]; + unsigned char n1 = (p & 0x00FF0000) >> 16; + unsigned char n2 = (p & 0x0000FF00) >> 8; + unsigned char n3 = (p & 0x000000FF); + unsigned char n4 = n->c - QUACKLE_FIRST_LETTER; + + unsigned int pb = n->playability; + unsigned char n5 = (pb & 0x00FF0000) >> 16; + unsigned char n6 = (pb & 0x0000FF00) >> 8; + unsigned char n7 = (pb & 0x000000FF); + + if (n->t) { + n4 |= 32; + } + if (n->lastchild) { + n4 |= 64; + } + if (n->insmallerdict) { + n4 |= 128; + } + + bytes[0] = n1; bytes[1] = n2; bytes[2] = n3; bytes[3] = n4; + bytes[4] = n5; bytes[5] = n6; bytes[6] = n7; + out.write(bytes, 7); + } +} + + + +void DawgFactory::Node::print(vector< Node* >& nodelist) +{ + written = true; + + if (children.size() == 0) + return; + + if (!deleted) + { + //cout << " Setting pointer to " << nodelist.size() << " before I push_back the children." << endl; + pointer = nodelist.size(); + } + else + { + pointer = cloneof->pointer; + //cout << " Setting pointer to clone's (" << pointer << ") and not pushing anything." << endl; + } + + if (!deleted) + { + for (unsigned int i = 0; i < children.size(); i++) { + nodelist.push_back(&children[i]); + } + + for (unsigned int i = 0; i < children.size(); i++) { + if (!children[i].deleted) + children[i].print(nodelist); + else if (!children[i].cloneof->written) + children[i].cloneof->print(nodelist); + } + } + + if (children.size() > 0) + children[children.size() - 1].lastchild = true; +} + + +void DawgFactory::Node::pushWord(const Quackle::LetterString& word, bool inSmaller, int pb) +{ + if (word.length() == 0) { + t = true; + playability = pb; + insmallerdict = inSmaller; + } + else { + char first = word[0]; + Quackle::LetterString rest = word.substr(1, word.length() - 1); + int index = -1; + + // cout << "first: " << first << ", rest: " << rest << endl; + + for (unsigned int i = 0; i < children.size(); i++) { + if (children[i].c == first) { + index = i; + break; + } + } + + if (index == -1) { + Node n; + n.c = first; + n.t = false; + n.playability = 0; + n.insmallerdict = false; + n.pointer = 0; + n.lastchild = false; + children.push_back(n); + index = children.size() - 1; + } + + children[index].pushWord(rest, inSmaller, pb); + } + + sumexplored = false; + deleted = false; + written = false; +} + + +bool DawgFactory::Node::equals(const Node &n) const +{ + if (playability != n.playability) + return false; + if (c != n.c) + return false; + if (children.size() != n.children.size()) + return false; + if (t != n.t) + return false; + if (insmallerdict != n.insmallerdict) + return false; + if (sum != n.sum) + return false; + + for (unsigned int i = 0; i < children.size(); i++) + if (!children[i].equals(n.children[i])) + return false; + + return true; +} + +int DawgFactory::Node::wordCount() const +{ + int wordCount = (t ? 0 : 1); + for (size_t i = 0; i < children.size(); i++) + wordCount += children[i].wordCount(); + return wordCount; +} + +int DawgFactory::Node::letterSum() const +{ + if (sumexplored) + return sum; + + sumexplored = true; + + // djb2 checksum + sum = 5381 * 33 + (int) c; + + for (unsigned int i = 0; i < children.size(); i++) + sum = (sum << 5) + sum + children[i].letterSum(); + + return sum; +} -- cgit v1.2.3 From d1f5f768764d439f02520d9c6c017fcd3ae96b83 Mon Sep 17 00:00:00 2001 From: John Fultz Date: Mon, 24 Aug 2015 00:51:48 -0500 Subject: Add a new DAWG format. Make reader and writer for the new format, while maintaing compatibility with the old. Things to note of the new format... * Now has a header, with version number, MD5, and word count. * No longer has terminator bit. Nodes are terminated by a non-zero playability. * Which means letters have one more bit. So we can now support more than 32 letters. Important for Slovak alphabet. Also, various cleanups and refactorings. --- fixedstring.h | 1 + lexiconparameters.cpp | 29 ++++++---- lexiconparameters.h | 9 ++- quackleio/dawgfactory.cpp | 144 ++++++++++++++++++++++++++-------------------- quackleio/dawgfactory.h | 26 ++++++--- 5 files changed, 125 insertions(+), 84 deletions(-) (limited to 'quackleio/dawgfactory.cpp') diff --git a/fixedstring.h b/fixedstring.h index 46d1011..e8db0bf 100644 --- a/fixedstring.h +++ b/fixedstring.h @@ -54,6 +54,7 @@ class FixedLengthString size_type size() const { return length(); } void clear() { m_end = m_data; } void push_back(char c); + const char* constData() const { return m_data; } int compare(const FixedLengthString& s) const; diff --git a/lexiconparameters.cpp b/lexiconparameters.cpp index 9826ca3..ca09fa5 100644 --- a/lexiconparameters.cpp +++ b/lexiconparameters.cpp @@ -25,15 +25,16 @@ using namespace Quackle; -class V0DawgInterpreter : public DawgInterpreter +class Quackle::V0DawgInterpreter : public DawgInterpreter { - virtual void loadDawg(ifstream &file, unsigned char *dawg) + virtual void loadDawg(ifstream &file, LexiconParameters &lexparams) { int i = 0; + file.unget(); // version 0 doesn't have a version byte...it's just the node byte which is always set to 0 while (!file.eof()) { - file.read((char*)(dawg) + i, 7); + file.read((char*)(lexparams.m_dawg) + i, 7); i += 7; } } @@ -54,15 +55,19 @@ class V0DawgInterpreter : public DawgInterpreter virtual int versionNumber() const { return 0; } }; -class V1DawgInterpreter : public DawgInterpreter +class Quackle::V1DawgInterpreter : public DawgInterpreter { - virtual void loadDawg(ifstream &file, unsigned char *dawg) + virtual void loadDawg(ifstream &file, LexiconParameters &lexparams) { int i = 0; + unsigned char bytes[3]; + file.read(lexparams.m_hash, sizeof(lexparams.m_hash)); + file.read((char*)bytes, 3); + lexparams.m_wordcount = (bytes[0] << 16) | (bytes[1] << 8) | bytes[2]; while (!file.eof()) { - file.read((char*)(dawg) + i, 7); + file.read((char*)(lexparams.m_dawg) + i, 7); i += 7; } } @@ -73,10 +78,10 @@ class V1DawgInterpreter : public DawgInterpreter p = (dawg[index] << 16) + (dawg[index + 1] << 8) + (dawg[index + 2]); letter = dawg[index + 3]; - t = (letter & 32) != 0; - lastchild = (letter & 64) != 0; + t = (p != 0); + lastchild = ((letter & 64) != 0); british = !(letter & 128); - letter = (letter & 31) + QUACKLE_FIRST_LETTER; + letter = (letter & 63) + QUACKLE_FIRST_LETTER; playability = (dawg[index + 4] << 16) + (dawg[index + 5] << 8) + (dawg[index + 6]); } @@ -84,8 +89,9 @@ class V1DawgInterpreter : public DawgInterpreter }; LexiconParameters::LexiconParameters() - : m_dawg(0), m_gaddag(0) + : m_dawg(NULL), m_gaddag(NULL), m_interpreter(NULL), m_wordcount(0) { + memset(m_hash, 0, sizeof(m_hash)); } LexiconParameters::~LexiconParameters() @@ -124,7 +130,6 @@ void LexiconParameters::loadDawg(const string &filename) } char versionByte = file.get(); - file.unget(); switch(versionByte) { case 0: @@ -140,7 +145,7 @@ void LexiconParameters::loadDawg(const string &filename) m_dawg = new unsigned char[7000000]; - m_interpreter->loadDawg(file, m_dawg); + m_interpreter->loadDawg(file, *this); } void LexiconParameters::loadGaddag(const string &filename) diff --git a/lexiconparameters.h b/lexiconparameters.h index 4c77cd1..4b6369d 100644 --- a/lexiconparameters.h +++ b/lexiconparameters.h @@ -28,15 +28,20 @@ namespace Quackle class DawgInterpreter { public: - virtual void loadDawg(ifstream &file, unsigned char *dawg) = 0; + virtual void loadDawg(ifstream &file, LexiconParameters &lexparams) = 0; virtual void dawgAt(const unsigned char *dawg, int index, unsigned int &p, Letter &letter, bool &t, bool &lastchild, bool &british, int &playability) const = 0; virtual int versionNumber() const = 0; virtual ~DawgInterpreter() {}; }; +class V0DawgInterpreter; +class V1DawgInterpreter; class LexiconParameters { + friend class Quackle::V0DawgInterpreter; + friend class Quackle::V1DawgInterpreter; + public: LexiconParameters(); ~LexiconParameters(); @@ -75,6 +80,8 @@ protected: unsigned char *m_gaddag; string m_lexiconName; DawgInterpreter *m_interpreter; + char m_hash[16]; + int m_wordcount; }; } diff --git a/quackleio/dawgfactory.cpp b/quackleio/dawgfactory.cpp index 8a37766..6fb5be0 100644 --- a/quackleio/dawgfactory.cpp +++ b/quackleio/dawgfactory.cpp @@ -19,6 +19,7 @@ #include #include +#include #include "dawgfactory.h" #include "util.h" @@ -28,19 +29,20 @@ DawgFactory::DawgFactory(const QString& alphabetFile) { QuackleIO::FlexibleAlphabetParameters *flexure = new QuackleIO::FlexibleAlphabetParameters; flexure->load(alphabetFile); - alphas = flexure; - - root.t = false; - root.insmallerdict = false; - root.playability = 0; - root.c = QUACKLE_BLANK_MARK; - root.pointer = 0; - root.lastchild = true; + m_alphas = flexure; + + m_root.insmallerdict = false; + m_root.playability = 0; + m_root.c = QUACKLE_BLANK_MARK; + m_root.pointer = 0; + m_root.lastchild = true; + + m_hash.int32ptr[0] = m_hash.int32ptr[1] = m_hash.int32ptr[2] = m_hash.int32ptr[3] = 0; } DawgFactory::~DawgFactory() { - delete alphas; + delete m_alphas; } bool DawgFactory::pushWord(const QString& word, bool inSmaller, int playability) @@ -48,36 +50,52 @@ bool DawgFactory::pushWord(const QString& word, bool inSmaller, int playability) UVString originalString = QuackleIO::Util::qstringToString(word); UVString leftover; - Quackle::LetterString encodedWord = alphas->encode(originalString, &leftover); + Quackle::LetterString encodedWord = m_alphas->encode(originalString, &leftover); if (leftover.empty()) { - ++m_encodableWords; - root.pushWord(encodedWord, inSmaller, playability); - return true; + if (m_root.pushWord(encodedWord, inSmaller, playability)) + { + ++m_encodableWords; + hashWord(encodedWord); + return true; + } + ++m_duplicateWords; + return false; } ++m_unencodableWords; return false; } +void DawgFactory::hashWord(const Quackle::LetterString &word) +{ + QCryptographicHash wordhash(QCryptographicHash::Md5); + wordhash.addData(word.constData(), word.length()); + QByteArray wordhashbytes = wordhash.result(); + m_hash.int32ptr[0] ^= ((const int32_t*)wordhashbytes.constData())[0]; + m_hash.int32ptr[1] ^= ((const int32_t*)wordhashbytes.constData())[1]; + m_hash.int32ptr[2] ^= ((const int32_t*)wordhashbytes.constData())[2]; + m_hash.int32ptr[3] ^= ((const int32_t*)wordhashbytes.constData())[3]; +} + void DawgFactory::generate() { const int bucketcount = 2000; vector< int > bucket[bucketcount]; - nodelist.clear(); - nodelist.push_back(&root); - root.print(nodelist); + m_nodelist.clear(); + m_nodelist.push_back(&m_root); + m_root.print(m_nodelist); - nodelist[0]->letterSum(); + m_nodelist[0]->letterSum(); - for (unsigned int i = 0; i < nodelist.size(); i++) + for (unsigned int i = 0; i < m_nodelist.size(); i++) { - bucket[nodelist[i]->sum % bucketcount].push_back(i); - nodelist[i]->pointer = 0; - nodelist[i]->written = false; - nodelist[i]->deleted = false; - nodelist[i]->cloneof = NULL; + bucket[m_nodelist[i]->sum % bucketcount].push_back(i); + m_nodelist[i]->pointer = 0; + m_nodelist[i]->written = false; + m_nodelist[i]->deleted = false; + m_nodelist[i]->cloneof = NULL; } for (int b = 0; b < bucketcount; b++) @@ -86,19 +104,19 @@ void DawgFactory::generate() continue; for (vector::iterator it = bucket[b].begin(); it != bucket[b].end() - 1; it++) { - if (!nodelist[(*it)]->deleted) + if (!m_nodelist[(*it)]->deleted) { for (vector::iterator jt = it + 1; jt != bucket[b].end(); jt++) { - if (!nodelist[(*jt)]->deleted) + if (!m_nodelist[(*jt)]->deleted) { // cout << "Comparing " << (*it) << " and " << (*jt) << endl; - if (nodelist[(*it)]->equals(nodelist[(*jt)][0])) + if (m_nodelist[(*it)]->equals(m_nodelist[(*jt)][0])) { //cout << "Hey! " << (*it) << " == " << (*jt) << endl; // ones[l].erase(jt); - nodelist[(*jt)]->deleted = true; - nodelist[(*jt)]->cloneof = nodelist[(*it)]; + m_nodelist[(*jt)]->deleted = true; + m_nodelist[(*jt)]->cloneof = m_nodelist[(*it)]; } } } @@ -106,51 +124,53 @@ void DawgFactory::generate() } } - nodelist.clear(); - nodelist.push_back(&root); - root.print(nodelist); + m_nodelist.clear(); + m_nodelist.push_back(&m_root); + m_root.print(m_nodelist); } void DawgFactory::writeIndex(const QString& filename) { ofstream out(QuackleIO::Util::qstringToStdString(filename).c_str(), ios::out | ios::binary); + unsigned char bytes[7]; + + bytes[0] = (m_encodableWords & 0x00FF0000) >> 16; + bytes[1] = (m_encodableWords & 0x0000FF00) >> 8; + bytes[2] = (m_encodableWords & 0x000000FF); - for (unsigned int i = 0; i < nodelist.size(); i++) { - //cout << nodelist[i]->c << " " << nodelist[i]->pointer << " " << nodelist[i]->t << " " << nodelist[i]->lastchild << endl; - Node* n = nodelist[i]; + out.write(m_hash.charptr, sizeof(m_hash.charptr)); + out.write((char*)bytes, 3); + + for (unsigned int i = 0; i < m_nodelist.size(); i++) { + //cout << m_nodelist[i]->c << " " << m_nodelist[i]->pointer << " " << m_nodelist[i]->t << " " << m_nodelist[i]->lastchild << endl; + Node* n = m_nodelist[i]; unsigned int p; - if (nodelist[i]->deleted) + if (m_nodelist[i]->deleted) { - p = (unsigned int)(nodelist[i]->cloneof->pointer); - // n = nodelist[i]->cloneof; + p = (unsigned int)(m_nodelist[i]->cloneof->pointer); + // n = m_nodelist[i]->cloneof; } else - p = (unsigned int)(nodelist[i]->pointer); + p = (unsigned int)(m_nodelist[i]->pointer); - char bytes[7]; - unsigned char n1 = (p & 0x00FF0000) >> 16; - unsigned char n2 = (p & 0x0000FF00) >> 8; - unsigned char n3 = (p & 0x000000FF); - unsigned char n4 = n->c - QUACKLE_FIRST_LETTER; + bytes[0] = (p & 0x00FF0000) >> 16; + bytes[1] = (p & 0x0000FF00) >> 8; + bytes[2] = (p & 0x000000FF); + bytes[3] = n->c - QUACKLE_FIRST_LETTER; unsigned int pb = n->playability; - unsigned char n5 = (pb & 0x00FF0000) >> 16; - unsigned char n6 = (pb & 0x0000FF00) >> 8; - unsigned char n7 = (pb & 0x000000FF); + bytes[4] = (pb & 0x00FF0000) >> 16; + bytes[5] = (pb & 0x0000FF00) >> 8; + bytes[6] = (pb & 0x000000FF); - if (n->t) { - n4 |= 32; - } if (n->lastchild) { - n4 |= 64; + bytes[3] |= 64; } if (n->insmallerdict) { - n4 |= 128; + bytes[3] |= 128; } - bytes[0] = n1; bytes[1] = n2; bytes[2] = n3; bytes[3] = n4; - bytes[4] = n5; bytes[5] = n6; bytes[6] = n7; - out.write(bytes, 7); + out.write((char*)bytes, 7); } } @@ -193,11 +213,13 @@ void DawgFactory::Node::print(vector< Node* >& nodelist) } -void DawgFactory::Node::pushWord(const Quackle::LetterString& word, bool inSmaller, int pb) +// returns true if the word was actually added...false if it's a duplicate. +bool DawgFactory::Node::pushWord(const Quackle::LetterString& word, bool inSmaller, int pb) { + bool added; if (word.length() == 0) { - t = true; - playability = pb; + added = (playability == 0); + playability = (pb == 0) ? 1 : pb; // word terminators nodes are marked by nonzero playability in the v1 DAWG format insmallerdict = inSmaller; } else { @@ -217,7 +239,6 @@ void DawgFactory::Node::pushWord(const Quackle::LetterString& word, bool inSmall if (index == -1) { Node n; n.c = first; - n.t = false; n.playability = 0; n.insmallerdict = false; n.pointer = 0; @@ -226,12 +247,13 @@ void DawgFactory::Node::pushWord(const Quackle::LetterString& word, bool inSmall index = children.size() - 1; } - children[index].pushWord(rest, inSmaller, pb); + added = children[index].pushWord(rest, inSmaller, pb); } sumexplored = false; deleted = false; written = false; + return added; } @@ -243,8 +265,6 @@ bool DawgFactory::Node::equals(const Node &n) const return false; if (children.size() != n.children.size()) return false; - if (t != n.t) - return false; if (insmallerdict != n.insmallerdict) return false; if (sum != n.sum) @@ -259,7 +279,7 @@ bool DawgFactory::Node::equals(const Node &n) const int DawgFactory::Node::wordCount() const { - int wordCount = (t ? 0 : 1); + int wordCount = ((playability == 0) ? 0 : 1); for (size_t i = 0; i < children.size(); i++) wordCount += children[i].wordCount(); return wordCount; diff --git a/quackleio/dawgfactory.h b/quackleio/dawgfactory.h index b2cfb76..13837c4 100644 --- a/quackleio/dawgfactory.h +++ b/quackleio/dawgfactory.h @@ -29,29 +29,30 @@ public: DawgFactory(const QString& alphabetFile); ~DawgFactory(); - int wordCount() const { return root.wordCount(); }; - int nodeCount() const { return nodelist.size(); }; + int wordCount() const { return m_root.wordCount(); }; + int nodeCount() const { return m_nodelist.size(); }; int encodableWords() const { return m_encodableWords; }; int unencodableWords() const { return m_unencodableWords; }; + int duplicateWords() const { return m_duplicateWords; }; bool pushWord(const QString& word, bool inSmaller, int playability); + void hashWord(const Quackle::LetterString &word); void generate(); void writeIndex(const QString& fname); private: class Node { public: - void pushWord(const Quackle::LetterString& word, bool inSmaller, int pb); - void print(vector< Node* >& nodelist); + bool pushWord(const Quackle::LetterString& word, bool inSmaller, int pb); + void print(vector< Node* >& m_nodelist); int letterSum() const; int wordCount() const; bool equals(const Node &n) const; Quackle::Letter c; - bool t; bool insmallerdict; - int playability; + int playability; // if nonzero, then terminates word vector children; int pointer; @@ -69,9 +70,16 @@ private: int m_encodableWords; int m_unencodableWords; - vector< Node* > nodelist; - Quackle::AlphabetParameters *alphas; - Node root; + int m_duplicateWords; + vector< Node* > m_nodelist; + Quackle::AlphabetParameters *m_alphas; + Node m_root; + union { + char charptr[16]; + int32_t int32ptr[4]; + } m_hash; + + static const char m_versionNumber = 1; }; #endif -- cgit v1.2.3 From 1f7b8ef6f96e1d5a2c50565a0f52cc633215e485 Mon Sep 17 00:00:00 2001 From: John Fultz Date: Mon, 24 Aug 2015 04:45:27 -0500 Subject: Version the GADDAGs. Basically the same thing I just did to the DAWG files, now done to GADDAGs. Also, add hashing, and make sure GADDAGs only load if their hash matches that of the DAWG files. --- lexiconparameters.cpp | 52 +++++++++++++++++++++++++++++--------- lexiconparameters.h | 15 +++++------ quacker/settings.cpp | 19 +++++++------- quackleio/dawgfactory.cpp | 1 + quackleio/gaddagfactory.cpp | 61 ++++++++++++++++++++++++++++++--------------- quackleio/gaddagfactory.h | 23 +++++++++-------- quackletest.cpp | 4 +-- test/testharness.cpp | 6 ++--- 8 files changed, 117 insertions(+), 64 deletions(-) (limited to 'quackleio/dawgfactory.cpp') diff --git a/lexiconparameters.cpp b/lexiconparameters.cpp index ca09fa5..e014048 100644 --- a/lexiconparameters.cpp +++ b/lexiconparameters.cpp @@ -19,13 +19,14 @@ #include #include + #include "datamanager.h" #include "lexiconparameters.h" #include "uv.h" using namespace Quackle; -class Quackle::V0DawgInterpreter : public DawgInterpreter +class Quackle::V0LexiconInterpreter : public LexiconInterpreter { virtual void loadDawg(ifstream &file, LexiconParameters &lexparams) @@ -39,6 +40,17 @@ class Quackle::V0DawgInterpreter : public DawgInterpreter } } + virtual void loadGaddag(ifstream &file, LexiconParameters &lexparams) + { + int i = 0; + file.unget(); + while (!file.eof()) + { + file.read((char*)(lexparams.m_gaddag) + i, 4); + i += 4; + } + } + virtual void dawgAt(const unsigned char *dawg, int index, unsigned int &p, Letter &letter, bool &t, bool &lastchild, bool &british, int &playability) const { index *= 7; @@ -55,7 +67,7 @@ class Quackle::V0DawgInterpreter : public DawgInterpreter virtual int versionNumber() const { return 0; } }; -class Quackle::V1DawgInterpreter : public DawgInterpreter +class Quackle::V1LexiconInterpreter : public LexiconInterpreter { virtual void loadDawg(ifstream &file, LexiconParameters &lexparams) @@ -72,6 +84,24 @@ class Quackle::V1DawgInterpreter : public DawgInterpreter } } + virtual void loadGaddag(ifstream &file, LexiconParameters &lexparams) + { + char hash[16]; + file.read(hash, sizeof(hash)); + if (memcmp(hash, lexparams.m_hash, sizeof(hash))) + { + lexparams.unloadGaddag(); // don't use a mismatched gaddag + return; + } + + int i = 0; + while (!file.eof()) + { + file.read((char*)(lexparams.m_gaddag) + i, 4); + i += 4; + } + } + virtual void dawgAt(const unsigned char *dawg, int index, unsigned int &p, Letter &letter, bool &t, bool &lastchild, bool &british, int &playability) const { index *= 7; @@ -108,14 +138,14 @@ void LexiconParameters::unloadAll() void LexiconParameters::unloadDawg() { delete[] m_dawg; - m_dawg = 0; + m_dawg = NULL; delete m_interpreter; } void LexiconParameters::unloadGaddag() { delete[] m_gaddag; - m_gaddag = 0; + m_gaddag = NULL; } void LexiconParameters::loadDawg(const string &filename) @@ -133,10 +163,10 @@ void LexiconParameters::loadDawg(const string &filename) switch(versionByte) { case 0: - m_interpreter = new V0DawgInterpreter(); + m_interpreter = new V0LexiconInterpreter(); break; case 1: - m_interpreter = new V1DawgInterpreter(); + m_interpreter = new V1LexiconInterpreter(); break; default: UVcout << "couldn't open dawg " << filename.c_str() << endl; @@ -160,14 +190,12 @@ void LexiconParameters::loadGaddag(const string &filename) return; } + char versionByte = file.get(); + if (versionByte != m_interpreter->versionNumber()) + return; m_gaddag = new unsigned char[40000000]; - int i = 0; - while (!file.eof()) - { - file.read((char*)(m_gaddag) + i, 4); - i += 4; - } + m_interpreter->loadGaddag(file, *this); } string LexiconParameters::findDictionaryFile(const string &lexicon) diff --git a/lexiconparameters.h b/lexiconparameters.h index 4b6369d..04ad4e7 100644 --- a/lexiconparameters.h +++ b/lexiconparameters.h @@ -25,22 +25,23 @@ namespace Quackle { -class DawgInterpreter +class LexiconInterpreter { public: virtual void loadDawg(ifstream &file, LexiconParameters &lexparams) = 0; + virtual void loadGaddag(ifstream &file, LexiconParameters &lexparams) = 0; virtual void dawgAt(const unsigned char *dawg, int index, unsigned int &p, Letter &letter, bool &t, bool &lastchild, bool &british, int &playability) const = 0; virtual int versionNumber() const = 0; - virtual ~DawgInterpreter() {}; + virtual ~LexiconInterpreter() {}; }; -class V0DawgInterpreter; -class V1DawgInterpreter; +class V0LexiconInterpreter; +class V1LexiconInterpreter; class LexiconParameters { - friend class Quackle::V0DawgInterpreter; - friend class Quackle::V1DawgInterpreter; + friend class Quackle::V0LexiconInterpreter; + friend class Quackle::V1LexiconInterpreter; public: LexiconParameters(); @@ -79,7 +80,7 @@ protected: unsigned char *m_dawg; unsigned char *m_gaddag; string m_lexiconName; - DawgInterpreter *m_interpreter; + LexiconInterpreter *m_interpreter; char m_hash[16]; int m_wordcount; }; diff --git a/quacker/settings.cpp b/quacker/settings.cpp index 3c42a39..362e916 100644 --- a/quacker/settings.cpp +++ b/quacker/settings.cpp @@ -207,16 +207,6 @@ void Settings::setQuackleToUseLexiconName(const string &lexiconName) { QUACKLE_LEXICON_PARAMETERS->setLexiconName(lexiconName); - string gaddagFile = Quackle::LexiconParameters::findDictionaryFile(lexiconName + ".gaddag"); - - if (gaddagFile.empty()) - { - UVcout << "Gaddag for lexicon '" << lexiconName << "' does not exist." << endl; - QUACKLE_LEXICON_PARAMETERS->unloadGaddag(); - } - else - QUACKLE_LEXICON_PARAMETERS->loadGaddag(gaddagFile); - string dawgFile = Quackle::LexiconParameters::findDictionaryFile(lexiconName + ".dawg"); if (dawgFile.empty()) { @@ -226,6 +216,15 @@ void Settings::setQuackleToUseLexiconName(const string &lexiconName) else QUACKLE_LEXICON_PARAMETERS->loadDawg(dawgFile); + string gaddagFile = Quackle::LexiconParameters::findDictionaryFile(lexiconName + ".gaddag"); + if (gaddagFile.empty()) + { + UVcout << "Gaddag for lexicon '" << lexiconName << "' does not exist." << endl; + QUACKLE_LEXICON_PARAMETERS->unloadGaddag(); + } + else + QUACKLE_LEXICON_PARAMETERS->loadGaddag(gaddagFile); + QUACKLE_STRATEGY_PARAMETERS->initialize(lexiconName); } } diff --git a/quackleio/dawgfactory.cpp b/quackleio/dawgfactory.cpp index 6fb5be0..74b4346 100644 --- a/quackleio/dawgfactory.cpp +++ b/quackleio/dawgfactory.cpp @@ -138,6 +138,7 @@ void DawgFactory::writeIndex(const QString& filename) bytes[1] = (m_encodableWords & 0x0000FF00) >> 8; bytes[2] = (m_encodableWords & 0x000000FF); + out.put(1); // DAWG format version 1 out.write(m_hash.charptr, sizeof(m_hash.charptr)); out.write((char*)bytes, 3); diff --git a/quackleio/gaddagfactory.cpp b/quackleio/gaddagfactory.cpp index e2c726d..7f666cb 100644 --- a/quackleio/gaddagfactory.cpp +++ b/quackleio/gaddagfactory.cpp @@ -19,6 +19,7 @@ #include #include +#include #include "gaddagfactory.h" #include "util.h" @@ -27,18 +28,20 @@ GaddagFactory::GaddagFactory(const QString& alphabetFile) { QuackleIO::FlexibleAlphabetParameters *flexure = new QuackleIO::FlexibleAlphabetParameters; flexure->load(alphabetFile); - alphas = flexure; + m_alphas = flexure; // So the separator is sorted to last. - root.t = false; - root.c = QUACKLE_NULL_MARK; // "_" - root.pointer = 0; - root.lastchild = true; + m_root.t = false; + m_root.c = QUACKLE_NULL_MARK; // "_" + m_root.pointer = 0; + m_root.lastchild = true; + + m_hash.int32ptr[0] = m_hash.int32ptr[1] = m_hash.int32ptr[2] = m_hash.int32ptr[3] = 0; } GaddagFactory::~GaddagFactory() { - delete alphas; + delete m_alphas; } bool GaddagFactory::pushWord(const QString& word) @@ -46,10 +49,14 @@ bool GaddagFactory::pushWord(const QString& word) UVString originalString = QuackleIO::Util::qstringToString(word); UVString leftover; - Quackle::LetterString encodedWord = alphas->encode(originalString, &leftover); + Quackle::LetterString encodedWord = m_alphas->encode(originalString, &leftover); if (leftover.empty()) { ++m_encodableWords; + hashWord(encodedWord); + // FIXME: This hash will fail if duplicate words are passed in. + // But testing for duplicate words isn't so easy without keeping + // an entirely separate list. for (unsigned i = 1; i <= encodedWord.length(); i++) { @@ -64,7 +71,7 @@ bool GaddagFactory::pushWord(const QString& word) for (unsigned j = i; j < encodedWord.length(); j++) newword.push_back(encodedWord[j]); } - gaddagizedWords.push_back(newword); + m_gaddagizedWords.push_back(newword); } return true; } @@ -73,26 +80,40 @@ bool GaddagFactory::pushWord(const QString& word) return false; } +void GaddagFactory::hashWord(const Quackle::LetterString &word) +{ + QCryptographicHash wordhash(QCryptographicHash::Md5); + wordhash.addData(word.constData(), word.length()); + QByteArray wordhashbytes = wordhash.result(); + m_hash.int32ptr[0] ^= ((const int32_t*)wordhashbytes.constData())[0]; + m_hash.int32ptr[1] ^= ((const int32_t*)wordhashbytes.constData())[1]; + m_hash.int32ptr[2] ^= ((const int32_t*)wordhashbytes.constData())[2]; + m_hash.int32ptr[3] ^= ((const int32_t*)wordhashbytes.constData())[3]; +} + void GaddagFactory::generate() { - Quackle::WordList::const_iterator wordsEnd = gaddagizedWords.end(); - for (Quackle::WordList::const_iterator wordsIt = gaddagizedWords.begin(); wordsIt != wordsEnd; ++wordsIt) - root.pushWord(*wordsIt); + Quackle::WordList::const_iterator wordsEnd = m_gaddagizedWords.end(); + for (Quackle::WordList::const_iterator wordsIt = m_gaddagizedWords.begin(); wordsIt != wordsEnd; ++wordsIt) + m_root.pushWord(*wordsIt); // for (const auto& words : gaddaggizedWords) - // root.pushWord(words); + // m_root.pushWord(words); } -void GaddagFactory::writeIndex(const QString& fname) +void GaddagFactory::writeIndex(const QString &fname) { - nodelist.push_back(&root); + m_nodelist.push_back(&m_root); - root.print(nodelist); + m_root.print(m_nodelist); ofstream out(QuackleIO::Util::qstringToStdString(fname).c_str(), ios::out | ios::binary); - for (size_t i = 0; i < nodelist.size(); i++) + out.put(1); // GADDAG format version 1 + out.write(m_hash.charptr, sizeof(m_hash.charptr)); + + for (size_t i = 0; i < m_nodelist.size(); i++) { - unsigned int p = (unsigned int)(nodelist[i]->pointer); + unsigned int p = (unsigned int)(m_nodelist[i]->pointer); if (p != 0) p -= i; // offset indexing @@ -102,14 +123,14 @@ void GaddagFactory::writeIndex(const QString& fname) unsigned char n3 = (p & 0x000000FF) >> 0; unsigned char n4; - n4 = nodelist[i]->c; + n4 = m_nodelist[i]->c; if (n4 == internalSeparatorRepresentation) n4 = QUACKLE_NULL_MARK; - if (nodelist[i]->t) + if (m_nodelist[i]->t) n4 |= 64; - if (nodelist[i]->lastchild) + if (m_nodelist[i]->lastchild) n4 |= 128; bytes[0] = n1; bytes[1] = n2; bytes[2] = n3; bytes[3] = n4; diff --git a/quackleio/gaddagfactory.h b/quackleio/gaddagfactory.h index 9eb8d72..2d21192 100644 --- a/quackleio/gaddagfactory.h +++ b/quackleio/gaddagfactory.h @@ -30,13 +30,14 @@ public: GaddagFactory(const QString& alphabetFile); ~GaddagFactory(); - int wordCount() const { return gaddagizedWords.size(); }; - int nodeCount() const { return nodelist.size(); }; + int wordCount() const { return m_gaddagizedWords.size(); }; + int nodeCount() const { return m_nodelist.size(); }; int encodableWords() const { return m_encodableWords; }; int unencodableWords() const { return m_unencodableWords; }; bool pushWord(const QString& word); - void sortWords() { sort(gaddagizedWords.begin(), gaddagizedWords.end()); }; + void hashWord(const Quackle::LetterString &word); + void sortWords() { sort(m_gaddagizedWords.begin(), m_gaddagizedWords.end()); }; void generate(); void writeIndex(const QString& fname); @@ -49,17 +50,19 @@ private: int pointer; bool lastchild; void pushWord(const Quackle::LetterString& word); - void print(vector< Node* >& nodelist); + void print(vector< Node* >& m_nodelist); }; int m_encodableWords; int m_unencodableWords; - Quackle::WordList gaddagizedWords; - vector< Node* > nodelist; - Quackle::AlphabetParameters *alphas; - Node root; - - + Quackle::WordList m_gaddagizedWords; + vector< Node* > m_nodelist; + Quackle::AlphabetParameters *m_alphas; + Node m_root; + union { + char charptr[16]; + int32_t int32ptr[4]; + } m_hash; }; #endif diff --git a/quackletest.cpp b/quackletest.cpp index e69c2cb..7ea5d10 100644 --- a/quackletest.cpp +++ b/quackletest.cpp @@ -47,7 +47,7 @@ int main() dataManager.setAppDataDirectory("data"); dataManager.lexiconParameters()->loadDawg(Quackle::LexiconParameters::findDictionaryFile("twl06.dawg")); - dataManager.lexiconParameters()->loadGaddag(Quackle::LexiconParameters::findDictionaryFile("twl06.gaddag")); + dataManager.lexiconParameters()->loadGaddag(Quackle::LexiconParameters::findDictionaryFile("twl06.gaddag")); dataManager.strategyParameters()->initialize("twl06"); dataManager.setBoardParameters(new Quackle::EnglishBoard()); @@ -58,7 +58,7 @@ int main() const int gameCnt = 1000; //const int gameCnt = 1; for (int game = 0; game < gameCnt; ++game) { - testGame(); + testGame(); } return 0; diff --git a/test/testharness.cpp b/test/testharness.cpp index 683443f..3f390c1 100644 --- a/test/testharness.cpp +++ b/test/testharness.cpp @@ -207,13 +207,13 @@ void TestHarness::startUp() m_dataManager.setBoardParameters(new ScrabbleBoard()); - m_dataManager.lexiconParameters()->loadGaddag(Quackle::LexiconParameters::findDictionaryFile(QuackleIO::Util::qstringToStdString(m_lexicon + ".gaddag"))); + m_dataManager.lexiconParameters()->loadDawg(Quackle::LexiconParameters::findDictionaryFile(QuackleIO::Util::qstringToStdString(m_lexicon + ".dawg"))); UVcout << "."; - m_dataManager.lexiconParameters()->loadDawg(Quackle::LexiconParameters::findDictionaryFile(QuackleIO::Util::qstringToStdString(m_lexicon + ".dawg"))); + m_dataManager.lexiconParameters()->loadGaddag(Quackle::LexiconParameters::findDictionaryFile(QuackleIO::Util::qstringToStdString(m_lexicon + ".gaddag"))); + UVcout << "."; m_dataManager.strategyParameters()->initialize(QuackleIO::Util::qstringToStdString(m_lexicon)); - UVcout << "."; UVcout << endl; -- cgit v1.2.3 From 5350a57f1be22b28914fca14225c73dac5b30b24 Mon Sep 17 00:00:00 2001 From: John Fultz Date: Mon, 7 Sep 2015 14:19:46 -0500 Subject: Auto-generate gaddags Need to add a user interface, but gaddags are now auto-generated if they can't be found. Some specific improvements here: * FixedLengthString gained a pop_back member. * Add code to allow v1 gaddags and v0 dawgs to work together. * Change memory allocation of dawgs and gaddags to be dynamic (the old limit didn't accommodate the ridiculously large Polish dictionary in the gaddag) * The Settings class now knows a bit about generating gaddags. This will be important for giving UI feedback. * Fixed several places using filenames which should be using string, not UVString. * Dawg/GaddagFactory should have been using UVString, not QString. My misunderstanding. --- fixedstring.h | 8 +++++ lexiconparameters.cpp | 81 +++++++++++++++++++++++++++++------------ lexiconparameters.h | 6 ++-- quacker/settings.cpp | 86 ++++++++++++++++++++++++++++++++++---------- quacker/settings.h | 9 +++-- quackleio/dawgfactory.cpp | 14 ++++---- quackleio/dawgfactory.h | 6 ++-- quackleio/flexiblealphabet.h | 2 -- quackleio/gaddagfactory.cpp | 71 ++++++++++++++++++++---------------- quackleio/gaddagfactory.h | 7 ++-- 10 files changed, 197 insertions(+), 93 deletions(-) (limited to 'quackleio/dawgfactory.cpp') diff --git a/fixedstring.h b/fixedstring.h index e8db0bf..a31ecd6 100644 --- a/fixedstring.h +++ b/fixedstring.h @@ -54,6 +54,7 @@ class FixedLengthString size_type size() const { return length(); } void clear() { m_end = m_data; } void push_back(char c); + void pop_back(); const char* constData() const { return m_data; } int compare(const FixedLengthString& s) const; @@ -221,6 +222,13 @@ FixedLengthString::push_back(char c) *this += c; } +inline void +FixedLengthString::pop_back() +{ + assert(size() > 0); + m_end--; +} + inline int FixedLengthString::compare(const FixedLengthString& s) const { diff --git a/lexiconparameters.cpp b/lexiconparameters.cpp index f6e646b..6761fc1 100644 --- a/lexiconparameters.cpp +++ b/lexiconparameters.cpp @@ -32,7 +32,6 @@ class Quackle::V0LexiconInterpreter : public LexiconInterpreter virtual void loadDawg(ifstream &file, LexiconParameters &lexparams) { int i = 0; - file.unget(); // version 0 doesn't have a version byte...it's just the node byte which is always set to 0 while (!file.eof()) { file.read((char*)(lexparams.m_dawg) + i, 7); @@ -43,7 +42,6 @@ class Quackle::V0LexiconInterpreter : public LexiconInterpreter virtual void loadGaddag(ifstream &file, LexiconParameters &lexparams) { int i = 0; - file.unget(); while (!file.eof()) { file.read((char*)(lexparams.m_gaddag) + i, 4); @@ -74,6 +72,7 @@ class Quackle::V1LexiconInterpreter : public LexiconInterpreter { int i = 0; unsigned char bytes[3]; + file.get(); // skip past version byte file.read(lexparams.m_hash, sizeof(lexparams.m_hash)); file.read((char*)bytes, 3); lexparams.m_wordcount = (bytes[0] << 16) | (bytes[1] << 8) | bytes[2]; @@ -87,14 +86,22 @@ class Quackle::V1LexiconInterpreter : public LexiconInterpreter virtual void loadGaddag(ifstream &file, LexiconParameters &lexparams) { char hash[16]; + file.get(); // skip past version byte file.read(hash, sizeof(hash)); if (memcmp(hash, lexparams.m_hash, sizeof(hash))) { - lexparams.unloadGaddag(); // don't use a mismatched gaddag - return; + // If we're using a v0 DAWG, then ignore the hash + for (size_t i = 0; i < sizeof(lexparams.m_hash); i++) + { + if (lexparams.m_hash[0] != 0) + { + lexparams.unloadGaddag(); // don't use a mismatched gaddag + return; + } + } } - int i = 0; + size_t i = 0; while (!file.eof()) { file.read((char*)(lexparams.m_gaddag) + i, 4); @@ -160,20 +167,16 @@ void LexiconParameters::loadDawg(const string &filename) } char versionByte = file.get(); - switch(versionByte) + m_interpreter = createInterpreter(versionByte); + if (m_interpreter == NULL) { - case 0: - m_interpreter = new V0LexiconInterpreter(); - break; - case 1: - m_interpreter = new V1LexiconInterpreter(); - break; - default: - UVcout << "couldn't open dawg " << filename.c_str() << endl; - return; + UVcout << "couldn't open file " << filename.c_str() << endl; + return; } - m_dawg = new unsigned char[7000000]; + file.seekg(0, ios_base::end); + m_dawg = new unsigned char[file.tellg()]; + file.seekg(0, ios_base::beg); m_interpreter->loadDawg(file, *this); } @@ -191,19 +194,53 @@ void LexiconParameters::loadGaddag(const string &filename) } char versionByte = file.get(); - if (versionByte != m_interpreter->versionNumber()) + if (versionByte < m_interpreter->versionNumber()) return; - m_gaddag = new unsigned char[40000000]; + file.seekg(0, ios_base::end); + m_gaddag = new unsigned char[file.tellg()]; + file.seekg(0, ios_base::beg); - m_interpreter->loadGaddag(file, *this); + // must create a local interpreter because dawg/gaddag versions might not match + LexiconInterpreter* interpreter = createInterpreter(versionByte); + if (interpreter != NULL) + { + interpreter->loadGaddag(file, *this); + delete interpreter; + } + else + unloadGaddag(); } string LexiconParameters::findDictionaryFile(const string &lexicon) { - return DataManager::self()->findDataFile("lexica", lexicon); + return QUACKLE_DATAMANAGER->findDataFile("lexica", lexicon); +} + +UVString LexiconParameters::hashString(bool shortened) const +{ + const char hex[] = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' }; + string hashStr; + for (size_t i = 0; i < sizeof(m_hash); i++) + { + hashStr.push_back(hex[(m_hash[i] & 0xF0) >> 4]); + hashStr.push_back(hex[m_hash[i] & 0x0F]); + if (shortened && i == 5) + break; + if (i % 2 == 1) + hashStr.push_back('-'); + } + return hashStr; } -QString hashString() const +LexiconInterpreter* LexiconParameters::createInterpreter(char version) const { - return QString(QByteArray(m_hash, sizeof(m_hash)).toHex()); + switch(version) + { + case 0: + return new V0LexiconInterpreter(); + case 1: + return new V1LexiconInterpreter(); + default: + return NULL; + } } diff --git a/lexiconparameters.h b/lexiconparameters.h index b5bc564..3890d8d 100644 --- a/lexiconparameters.h +++ b/lexiconparameters.h @@ -19,8 +19,6 @@ #ifndef QUACKLE_LEXICONPARAMETERS_H #define QUACKLE_LEXICONPARAMETERS_H -#include -#include "alphabetparameters.h" #include "gaddag.h" namespace Quackle @@ -77,7 +75,7 @@ public: } const GaddagNode *gaddagRoot() const { return (GaddagNode *) &m_gaddag[0]; }; - QString hashString() const; + UVString hashString(bool shortened) const; protected: unsigned char *m_dawg; @@ -86,6 +84,8 @@ protected: LexiconInterpreter *m_interpreter; char m_hash[16]; int m_wordcount; + + LexiconInterpreter* createInterpreter(char version) const; }; } diff --git a/quacker/settings.cpp b/quacker/settings.cpp index 362e916..1febdb5 100644 --- a/quacker/settings.cpp +++ b/quacker/settings.cpp @@ -93,6 +93,8 @@ Settings::Settings(QWidget *parent) m_appDataDir = directory.absolutePath(); } m_userDataDir = QDesktopServices::storageLocation(QDesktopServices::DataLocation); + QDir qdir(m_userDataDir); + qdir.mkpath("lexica"); } void Settings::createGUI() @@ -195,49 +197,97 @@ void Settings::initialize() if (lexiconName == "cswfeb07") lexiconName = "cswapr07"; - setQuackleToUseLexiconName(QuackleIO::Util::qstringToStdString(lexiconName)); - setQuackleToUseAlphabetName(QuackleIO::Util::qstringToStdString(settings.value("quackle/settings/alphabet-name", QString("english")).toString())); + setQuackleToUseLexiconName(lexiconName); + setQuackleToUseAlphabetName(settings.value("quackle/settings/alphabet-name", QString("english")).toString()); setQuackleToUseThemeName(settings.value("quackle/settings/theme-name", QString("traditional")).toString()); setQuackleToUseBoardName(settings.value("quackle/settings/board-name", QString("")).toString()); } -void Settings::setQuackleToUseLexiconName(const string &lexiconName) +void Settings::buildGaddag(const string &filename) { - if (QUACKLE_LEXICON_PARAMETERS->lexiconName() != lexiconName) + GaddagFactory factory((UVString())); + Quackle::LetterString word; + + pushIndex(factory, word, 1); + factory.generate(); + factory.writeIndex(filename); +} + +void Settings::pushIndex(GaddagFactory &factory, Quackle::LetterString &word, int index) +{ + unsigned int p; + Quackle::Letter letter; + bool t; + bool lastchild; + bool british; + int playability; + + do + { + QUACKLE_LEXICON_PARAMETERS->dawgAt(index, p, letter, t, lastchild, british, playability); + word.push_back(letter); + if (t) + factory.pushWord(word); + if (p) + pushIndex(factory, word, p); + index++; + word.pop_back(); + } while (!lastchild); +} + + +void Settings::setQuackleToUseLexiconName(const QString &lexiconName) +{ + string lexiconNameStr = lexiconName.toStdString(); + if (QUACKLE_LEXICON_PARAMETERS->lexiconName() != lexiconNameStr) { - QUACKLE_LEXICON_PARAMETERS->setLexiconName(lexiconName); + QUACKLE_LEXICON_PARAMETERS->setLexiconName(lexiconNameStr); - string dawgFile = Quackle::LexiconParameters::findDictionaryFile(lexiconName + ".dawg"); + string dawgFile = Quackle::LexiconParameters::findDictionaryFile(lexiconNameStr + ".dawg"); if (dawgFile.empty()) { - UVcout << "Dawg for lexicon '" << lexiconName << "' does not exist." << endl; + UVcout << "Dawg for lexicon '" << lexiconNameStr << "' does not exist." << endl; QUACKLE_LEXICON_PARAMETERS->unloadDawg(); } else QUACKLE_LEXICON_PARAMETERS->loadDawg(dawgFile); - string gaddagFile = Quackle::LexiconParameters::findDictionaryFile(lexiconName + ".gaddag"); + if (!QUACKLE_LEXICON_PARAMETERS->hasDawg()) + { + QUACKLE_LEXICON_PARAMETERS->unloadGaddag(); + return; + } + + string gaddagFile = Quackle::LexiconParameters::findDictionaryFile(lexiconNameStr + ".gaddag"); if (gaddagFile.empty()) { - UVcout << "Gaddag for lexicon '" << lexiconName << "' does not exist." << endl; + UVcout << "Gaddag for lexicon '" << lexiconNameStr << "' does not exist." << endl; QUACKLE_LEXICON_PARAMETERS->unloadGaddag(); } else QUACKLE_LEXICON_PARAMETERS->loadGaddag(gaddagFile); - QUACKLE_STRATEGY_PARAMETERS->initialize(lexiconName); + if (!QUACKLE_LEXICON_PARAMETERS->hasGaddag()) + { + gaddagFile = QUACKLE_DATAMANAGER->makeDataFilename("lexica", lexiconNameStr + ".gaddag", true); + buildGaddag(gaddagFile); + QUACKLE_LEXICON_PARAMETERS->loadGaddag(gaddagFile); + } + + QUACKLE_STRATEGY_PARAMETERS->initialize(lexiconNameStr); } } -void Settings::setQuackleToUseAlphabetName(const string &alphabetName) +void Settings::setQuackleToUseAlphabetName(const QString &alphabetName) { - if (QUACKLE_ALPHABET_PARAMETERS->alphabetName() != alphabetName) + string alphabetNameStr = alphabetName.toStdString(); + if (QUACKLE_ALPHABET_PARAMETERS->alphabetName() != alphabetNameStr) { - QString alphabetFile = QuackleIO::Util::stdStringToQString(Quackle::AlphabetParameters::findAlphabetFile(alphabetName + ".quackle_alphabet")); + QString alphabetFileStr = QuackleIO::Util::stdStringToQString(Quackle::AlphabetParameters::findAlphabetFile(alphabetNameStr + ".quackle_alphabet")); QuackleIO::FlexibleAlphabetParameters *flexure = new QuackleIO::FlexibleAlphabetParameters; - flexure->setAlphabetName(alphabetName); - if (flexure->load(alphabetFile)) + flexure->setAlphabetName(alphabetNameStr); + if (flexure->load(alphabetFileStr)) { if (flexure->length() != QUACKLE_ALPHABET_PARAMETERS->length() && QUACKLE_ALPHABET_PARAMETERS->alphabetName() != "default") { @@ -295,8 +345,7 @@ void Settings::lexiconChanged(const QString &lexiconName) editLexicon(); return; } - string lexiconNameString = QuackleIO::Util::qstringToStdString(lexiconName); - setQuackleToUseLexiconName(lexiconNameString); + setQuackleToUseLexiconName(lexiconName); CustomQSettings settings; settings.setValue("quackle/settings/lexicon-name", lexiconName); @@ -311,8 +360,7 @@ void Settings::alphabetChanged(const QString &alphabetName) editAlphabet(); return; } - string alphabetNameString = QuackleIO::Util::qstringToStdString(alphabetName); - setQuackleToUseAlphabetName(alphabetNameString); + setQuackleToUseAlphabetName(alphabetName); CustomQSettings settings; settings.setValue("quackle/settings/alphabet-name", alphabetName); diff --git a/quacker/settings.h b/quacker/settings.h index cee0562..fab2f3f 100644 --- a/quacker/settings.h +++ b/quacker/settings.h @@ -24,6 +24,8 @@ #include #include +#include "quackleio/gaddagfactory.h" + class QComboBox; class QCheckBox; class QPushButton; @@ -72,8 +74,8 @@ protected slots: void editAlphabet(); void editTheme(); - void setQuackleToUseLexiconName(const string &lexiconName); - void setQuackleToUseAlphabetName(const string &alphabetName); + void setQuackleToUseLexiconName(const QString &lexiconName); + void setQuackleToUseAlphabetName(const QString &alphabetName); void setQuackleToUseThemeName(const QString &themeName); void setQuackleToUseBoardName(const QString &lexiconName); @@ -94,6 +96,9 @@ private: // populate the popup based on what's in QSettings void loadBoardNameCombo(); + void buildGaddag(const string &filename); + void pushIndex(GaddagFactory &factory, Quackle::LetterString &word, int index); + static Settings *m_self; }; diff --git a/quackleio/dawgfactory.cpp b/quackleio/dawgfactory.cpp index 74b4346..3a971a3 100644 --- a/quackleio/dawgfactory.cpp +++ b/quackleio/dawgfactory.cpp @@ -25,10 +25,10 @@ #include "util.h" -DawgFactory::DawgFactory(const QString& alphabetFile) +DawgFactory::DawgFactory(const UVString& alphabetFile) { QuackleIO::FlexibleAlphabetParameters *flexure = new QuackleIO::FlexibleAlphabetParameters; - flexure->load(alphabetFile); + flexure->load(QuackleIO::Util::uvStringToQString(alphabetFile)); m_alphas = flexure; m_root.insmallerdict = false; @@ -45,12 +45,10 @@ DawgFactory::~DawgFactory() delete m_alphas; } -bool DawgFactory::pushWord(const QString& word, bool inSmaller, int playability) +bool DawgFactory::pushWord(const UVString& word, bool inSmaller, int playability) { - UVString originalString = QuackleIO::Util::qstringToString(word); - UVString leftover; - Quackle::LetterString encodedWord = m_alphas->encode(originalString, &leftover); + Quackle::LetterString encodedWord = m_alphas->encode(word, &leftover); if (leftover.empty()) { if (m_root.pushWord(encodedWord, inSmaller, playability)) @@ -129,9 +127,9 @@ void DawgFactory::generate() m_root.print(m_nodelist); } -void DawgFactory::writeIndex(const QString& filename) +void DawgFactory::writeIndex(const UVString& filename) { - ofstream out(QuackleIO::Util::qstringToStdString(filename).c_str(), ios::out | ios::binary); + ofstream out(filename.c_str(), ios::out | ios::binary); unsigned char bytes[7]; bytes[0] = (m_encodableWords & 0x00FF0000) >> 16; diff --git a/quackleio/dawgfactory.h b/quackleio/dawgfactory.h index 23bb4f5..051e632 100644 --- a/quackleio/dawgfactory.h +++ b/quackleio/dawgfactory.h @@ -26,7 +26,7 @@ class DawgFactory { public: - DawgFactory(const QString& alphabetFile); + DawgFactory(const UVString& alphabetFile); ~DawgFactory(); int wordCount() const { return m_root.wordCount(); }; @@ -35,10 +35,10 @@ public: int unencodableWords() const { return m_unencodableWords; }; int duplicateWords() const { return m_duplicateWords; }; - bool pushWord(const QString& word, bool inSmaller, int playability); + bool pushWord(const UVString& word, bool inSmaller, int playability); void hashWord(const Quackle::LetterString &word); void generate(); - void writeIndex(const QString& fname); + void writeIndex(const UVString& filename); const char* hashBytes() { return m_hash.charptr; }; diff --git a/quackleio/flexiblealphabet.h b/quackleio/flexiblealphabet.h index 89bd1f4..d5db68a 100644 --- a/quackleio/flexiblealphabet.h +++ b/quackleio/flexiblealphabet.h @@ -21,8 +21,6 @@ #include "alphabetparameters.h" -class QString; - namespace QuackleIO { diff --git a/quackleio/gaddagfactory.cpp b/quackleio/gaddagfactory.cpp index 7f666cb..53ccf04 100644 --- a/quackleio/gaddagfactory.cpp +++ b/quackleio/gaddagfactory.cpp @@ -24,11 +24,15 @@ #include "gaddagfactory.h" #include "util.h" -GaddagFactory::GaddagFactory(const QString& alphabetFile) +GaddagFactory::GaddagFactory(const UVString &alphabetFile) + : m_encodableWords(0), m_unencodableWords(0), m_alphas(NULL) { - QuackleIO::FlexibleAlphabetParameters *flexure = new QuackleIO::FlexibleAlphabetParameters; - flexure->load(alphabetFile); - m_alphas = flexure; + if (!alphabetFile.empty()) + { + QuackleIO::FlexibleAlphabetParameters *flexure = new QuackleIO::FlexibleAlphabetParameters; + flexure->load(QuackleIO::Util::uvStringToQString(alphabetFile)); + m_alphas = flexure; + } // So the separator is sorted to last. m_root.t = false; @@ -44,35 +48,13 @@ GaddagFactory::~GaddagFactory() delete m_alphas; } -bool GaddagFactory::pushWord(const QString& word) +bool GaddagFactory::pushWord(const UVString &word) { - UVString originalString = QuackleIO::Util::qstringToString(word); - UVString leftover; - Quackle::LetterString encodedWord = m_alphas->encode(originalString, &leftover); + Quackle::LetterString encodedWord = m_alphas->encode(word, &leftover); if (leftover.empty()) { - ++m_encodableWords; - hashWord(encodedWord); - // FIXME: This hash will fail if duplicate words are passed in. - // But testing for duplicate words isn't so easy without keeping - // an entirely separate list. - - for (unsigned i = 1; i <= encodedWord.length(); i++) - { - Quackle::LetterString newword; - - for (int j = i - 1; j >= 0; j--) - newword.push_back(encodedWord[j]); - - if (i < encodedWord.length()) - { - newword.push_back(internalSeparatorRepresentation); // "^" - for (unsigned j = i; j < encodedWord.length(); j++) - newword.push_back(encodedWord[j]); - } - m_gaddagizedWords.push_back(newword); - } + pushWord(encodedWord); return true; } @@ -80,6 +62,32 @@ bool GaddagFactory::pushWord(const QString& word) return false; } +bool GaddagFactory::pushWord(const Quackle::LetterString &word) +{ + ++m_encodableWords; + hashWord(word); + // FIXME: This hash will fail if duplicate words are passed in. + // But testing for duplicate words isn't so easy without keeping + // an entirely separate list. + + for (unsigned i = 1; i <= word.length(); i++) + { + Quackle::LetterString newword; + + for (int j = i - 1; j >= 0; j--) + newword.push_back(word[j]); + + if (i < word.length()) + { + newword.push_back(internalSeparatorRepresentation); // "^" + for (unsigned j = i; j < word.length(); j++) + newword.push_back(word[j]); + } + m_gaddagizedWords.push_back(newword); + } + return true; +} + void GaddagFactory::hashWord(const Quackle::LetterString &word) { QCryptographicHash wordhash(QCryptographicHash::Md5); @@ -93,6 +101,7 @@ void GaddagFactory::hashWord(const Quackle::LetterString &word) void GaddagFactory::generate() { + sort(m_gaddagizedWords.begin(), m_gaddagizedWords.end()); Quackle::WordList::const_iterator wordsEnd = m_gaddagizedWords.end(); for (Quackle::WordList::const_iterator wordsIt = m_gaddagizedWords.begin(); wordsIt != wordsEnd; ++wordsIt) m_root.pushWord(*wordsIt); @@ -100,13 +109,13 @@ void GaddagFactory::generate() // m_root.pushWord(words); } -void GaddagFactory::writeIndex(const QString &fname) +void GaddagFactory::writeIndex(const string &fname) { m_nodelist.push_back(&m_root); m_root.print(m_nodelist); - ofstream out(QuackleIO::Util::qstringToStdString(fname).c_str(), ios::out | ios::binary); + ofstream out(fname.c_str(), ios::out | ios::binary); out.put(1); // GADDAG format version 1 out.write(m_hash.charptr, sizeof(m_hash.charptr)); diff --git a/quackleio/gaddagfactory.h b/quackleio/gaddagfactory.h index 03cb546..415baff 100644 --- a/quackleio/gaddagfactory.h +++ b/quackleio/gaddagfactory.h @@ -27,7 +27,7 @@ public: static const Quackle::Letter internalSeparatorRepresentation = QUACKLE_FIRST_LETTER + QUACKLE_MAXIMUM_ALPHABET_SIZE; - GaddagFactory(const QString& alphabetFile); + GaddagFactory(const UVString &alphabetFile); ~GaddagFactory(); int wordCount() const { return m_gaddagizedWords.size(); }; @@ -35,11 +35,12 @@ public: int encodableWords() const { return m_encodableWords; }; int unencodableWords() const { return m_unencodableWords; }; - bool pushWord(const QString& word); + bool pushWord(const UVString &word); + bool pushWord(const Quackle::LetterString &word); void hashWord(const Quackle::LetterString &word); void sortWords() { sort(m_gaddagizedWords.begin(), m_gaddagizedWords.end()); }; void generate(); - void writeIndex(const QString& fname); + void writeIndex(const string &fname); const char* hashBytes() { return m_hash.charptr; }; -- cgit v1.2.3 From 1214533715a1acfbc35ebe29ff78afee2f850226 Mon Sep 17 00:00:00 2001 From: John Fultz Date: Sat, 26 Sep 2015 10:47:07 -0500 Subject: Work on DAWG generation. V1 DAWGs now include an alphabet. Begin creating DAWGs which extend other DAWGs. In general, laying the groundwork for plain text import to DAWG. --- lexiconparameters.cpp | 8 ++++++++ lexiconparameters.h | 5 +++++ quacker/lexicondialog.cpp | 43 +++++++++++++++++++++++++++++++++++++++++-- quacker/lexicondialog.h | 11 ++++++++++- quackleio/dawgfactory.cpp | 31 +++++++++++++++++++++---------- quackleio/dawgfactory.h | 1 + 6 files changed, 86 insertions(+), 13 deletions(-) (limited to 'quackleio/dawgfactory.cpp') diff --git a/lexiconparameters.cpp b/lexiconparameters.cpp index 6761fc1..9da3b70 100644 --- a/lexiconparameters.cpp +++ b/lexiconparameters.cpp @@ -75,6 +75,14 @@ class Quackle::V1LexiconInterpreter : public LexiconInterpreter file.get(); // skip past version byte file.read(lexparams.m_hash, sizeof(lexparams.m_hash)); file.read((char*)bytes, 3); + + lexparams.m_utf8Alphabet.resize(file.get()); + for (size_t i = 0; i < lexparams.m_utf8Alphabet.size(); i++) + { + file >> lexparams.m_utf8Alphabet[i]; + file.get(); // separator space + } + file.get(); // whitespace separator lexparams.m_wordcount = (bytes[0] << 16) | (bytes[1] << 8) | bytes[2]; while (!file.eof()) { diff --git a/lexiconparameters.h b/lexiconparameters.h index 3890d8d..9f34be6 100644 --- a/lexiconparameters.h +++ b/lexiconparameters.h @@ -19,11 +19,15 @@ #ifndef QUACKLE_LEXICONPARAMETERS_H #define QUACKLE_LEXICONPARAMETERS_H +#include + #include "gaddag.h" namespace Quackle { +class LexiconParameters; + class LexiconInterpreter { public: @@ -84,6 +88,7 @@ protected: LexiconInterpreter *m_interpreter; char m_hash[16]; int m_wordcount; + vector m_utf8Alphabet; LexiconInterpreter* createInterpreter(char version) const; }; diff --git a/quacker/lexicondialog.cpp b/quacker/lexicondialog.cpp index 9d1998c..f9c6399 100644 --- a/quacker/lexicondialog.cpp +++ b/quacker/lexicondialog.cpp @@ -25,9 +25,10 @@ #include "customqsettings.h" #include "settings.h" #include "geometry.h" +#include "quackleio/dawgfactory.h" - -LexiconDialog::LexiconDialog(QWidget *parent, const QString &originalName) : QDialog(parent) +LexiconDialog::LexiconDialog(QWidget *parent, const QString &originalName) : QDialog(parent), + m_wordFactory(NULL) { m_originalName = originalName; @@ -117,7 +118,45 @@ void LexiconDialog::deleteLexicon() void LexiconDialog::addWordsFromFile() { + QFileDialog browser(this, tr("Choose a file containing words to be added to the lexicon...")); +} + +void LexiconDialog::addWordsFromDawg(const string &dawgfile, const string &alphabetfile) +{ + delete m_wordFactory; + m_wordFactory = NULL; + + LexiconParameters lexParams; + lexParams.loadDawg(dawgfile); + if (!lexParams.hasDawg()) + return; + m_wordFactory = new DawgFactory(alphabetfile); + Quackle::LetterString word; + + addWordsFromDawgRecursive(lexParams, word, 1); +} + +void LexiconDialog::addWordsFromDawgRecursive(const LexiconParameters &lexParams, Quackle::LetterString &word, int index) +{ + unsigned int p; + Quackle::Letter letter; + bool t; + bool lastchild; + bool british; + int playability; + + do + { + lexParams.dawgAt(index, p, letter, t, lastchild, british, playability); + word.push_back(letter); + if (t) + m_wordFactory->pushWord(word, !british, playability); + if (p) + addWordsFromDawgRecursive(lexParams, word, p); + index++; + word.pop_back(); + } while (!lastchild); } void LexiconDialog::accept() diff --git a/quacker/lexicondialog.h b/quacker/lexicondialog.h index cdc0a59..573d48b 100644 --- a/quacker/lexicondialog.h +++ b/quacker/lexicondialog.h @@ -20,17 +20,20 @@ #define QUACKER_LEXICONDIALOG_H #include -#include +#include "game.h" +#include "lexiconparameters.h" #include #include using namespace std; +using namespace Quackle; class QComboBox; class QLabel; class QLineEdit; class QPushButton; +class DawgFactory; class LexiconDialog : public QDialog { @@ -47,6 +50,10 @@ protected slots: void deleteLexicon(); void addWordsFromFile(); +protected: + void addWordsFromDawg(const string &dawgfile, const string &alphabetfile); + void addWordsFromDawgRecursive(const LexiconParameters &lexParams, Quackle::LetterString &word, int index); + private: QLineEdit *m_lexiconName; QComboBox *m_alphabetCombo; @@ -59,6 +66,8 @@ private: QPushButton *m_deleteLexicon; QString m_originalName; + + DawgFactory *m_wordFactory; }; #endif diff --git a/quackleio/dawgfactory.cpp b/quackleio/dawgfactory.cpp index 3a971a3..565778a 100644 --- a/quackleio/dawgfactory.cpp +++ b/quackleio/dawgfactory.cpp @@ -50,21 +50,24 @@ bool DawgFactory::pushWord(const UVString& word, bool inSmaller, int playability UVString leftover; Quackle::LetterString encodedWord = m_alphas->encode(word, &leftover); if (leftover.empty()) - { - if (m_root.pushWord(encodedWord, inSmaller, playability)) - { - ++m_encodableWords; - hashWord(encodedWord); - return true; - } - ++m_duplicateWords; - return false; - } + return pushWord(encodedWord, inSmaller, playability); ++m_unencodableWords; return false; } +bool DawgFactory::pushWord(const Quackle::LetterString& word, bool inSmaller, int playability) +{ + if (m_root.pushWord(word, inSmaller, playability)) + { + ++m_encodableWords; + hashWord(word); + return true; + } + ++m_duplicateWords; + return false; +} + void DawgFactory::hashWord(const Quackle::LetterString &word) { QCryptographicHash wordhash(QCryptographicHash::Md5); @@ -139,6 +142,14 @@ void DawgFactory::writeIndex(const UVString& filename) out.put(1); // DAWG format version 1 out.write(m_hash.charptr, sizeof(m_hash.charptr)); out.write((char*)bytes, 3); + out.put((char)m_alphas->length()); + for (Quackle::Letter i = m_alphas->firstLetter(); i <= m_alphas->lastLetter(); i++) + { + QString letterText = QuackleIO::Util::uvStringToQString(m_alphas->letterParameter(i).text()); + QByteArray utf8bytes = letterText.toUtf8(); + string utf8LetterText(utf8bytes.constData()); + out << utf8LetterText << ' '; + } for (unsigned int i = 0; i < m_nodelist.size(); i++) { //cout << m_nodelist[i]->c << " " << m_nodelist[i]->pointer << " " << m_nodelist[i]->t << " " << m_nodelist[i]->lastchild << endl; diff --git a/quackleio/dawgfactory.h b/quackleio/dawgfactory.h index 051e632..2a55461 100644 --- a/quackleio/dawgfactory.h +++ b/quackleio/dawgfactory.h @@ -36,6 +36,7 @@ public: int duplicateWords() const { return m_duplicateWords; }; bool pushWord(const UVString& word, bool inSmaller, int playability); + bool pushWord(const Quackle::LetterString& word, bool inSmaller, int playability); void hashWord(const Quackle::LetterString &word); void generate(); void writeIndex(const UVString& filename); -- cgit v1.2.3 From 81554a201cc5e0748110add6eca05cc16c18850c Mon Sep 17 00:00:00 2001 From: John Fultz Date: Mon, 28 Sep 2015 13:00:57 -0500 Subject: Now able to load text and dawg files. Words are loaded and hashed. Duplicates are discovered. Alphabets are dealt with. Merging of multiple word lists works. This is good stuff. Saving the resulting dictionaries has not been tried, yet, and the gui code for saving needs to be finished off, yet. --- quacker/lexicondialog.cpp | 71 +++++++++++++++++++++++++++++++++++++++++++---- quacker/lexicondialog.h | 5 +++- quackleio/dawgfactory.cpp | 4 +-- quackleio/dawgfactory.h | 2 +- 4 files changed, 73 insertions(+), 9 deletions(-) (limited to 'quackleio/dawgfactory.cpp') diff --git a/quacker/lexicondialog.cpp b/quacker/lexicondialog.cpp index f9c6399..a7566c6 100644 --- a/quacker/lexicondialog.cpp +++ b/quacker/lexicondialog.cpp @@ -97,10 +97,12 @@ LexiconDialog::LexiconDialog(QWidget *parent, const QString &originalName) : QDi connect(m_saveChanges, SIGNAL(clicked()), this, SLOT(accept())); connect(m_cancel, SIGNAL(clicked()), this, SLOT(reject())); connect(m_deleteLexicon, SIGNAL(clicked()), this, SLOT(deleteLexicon())); - + connect(m_alphabetCombo, SIGNAL(activated(const QString &)), this, SLOT(alphabetChanged(const QString &))); + setWindowTitle(tr("Configure Lexicon - Quackle")); Settings::populateComboFromFilenames(m_alphabetCombo, "alphabets", ""); + alphabetChanged(m_alphabetCombo->currentText()); updateLexiconInformation(); // sync game board with control states and draw board @@ -113,25 +115,49 @@ LexiconDialog::~LexiconDialog() void LexiconDialog::deleteLexicon() { - + delete m_wordFactory; + m_wordFactory = NULL; + updateLexiconInformation(); } void LexiconDialog::addWordsFromFile() { QFileDialog browser(this, tr("Choose a file containing words to be added to the lexicon...")); + QStringList filters; + filters << "Dictionary files (*.txt *.dawg *.raw)" + << "All files (*.*)"; + browser.setNameFilters(filters); + browser.setFileMode(QFileDialog::ExistingFiles); + browser.exec(); + + QStringList files = browser.selectedFiles(); + for (QList::const_iterator it = files.begin(); it != files.end(); it++) + { + if (it->endsWith(".dawg", Qt::CaseInsensitive)) + addWordsFromDawgFile(*it, m_alphabetCombo->currentText()); + else + addWordsFromTextFile(*it, m_alphabetCombo->currentText()); + } + updateLexiconInformation(); } -void LexiconDialog::addWordsFromDawg(const string &dawgfile, const string &alphabetfile) +void LexiconDialog::alphabetChanged(const QString &alphabet) { delete m_wordFactory; m_wordFactory = NULL; + updateLexiconInformation(); + m_alphabetFileName = QString::fromStdString(AlphabetParameters::findAlphabetFile(QuackleIO::Util::qstringToStdString(alphabet))); +} +void LexiconDialog::addWordsFromDawgFile(const QString &dawgfile, const QString &alphabetfile) +{ + if (!m_wordFactory) + m_wordFactory = new DawgFactory(m_alphabetFileName); LexiconParameters lexParams; - lexParams.loadDawg(dawgfile); + lexParams.loadDawg(QuackleIO::Util::qstringToStdString(dawgfile)); if (!lexParams.hasDawg()) return; - m_wordFactory = new DawgFactory(alphabetfile); Quackle::LetterString word; addWordsFromDawgRecursive(lexParams, word, 1); @@ -159,6 +185,37 @@ void LexiconDialog::addWordsFromDawgRecursive(const LexiconParameters &lexParams } while (!lastchild); } +void LexiconDialog::addWordsFromTextFile(const QString &textFile, const QString &alphabetfile) +{ + if (!m_wordFactory) + m_wordFactory = new DawgFactory(m_alphabetFileName); + + QFile file(textFile); + if (!file.open(QIODevice::ReadOnly | QIODevice::Text)) + return; + + QTextStream stream(&file); + stream.setCodec("UTF-8"); + QString word; + while (!stream.atEnd()) + { + stream >> word; + word = word.trimmed().toUpper(); + if (word.isEmpty()) + continue; + QChar firstChar = word[0]; + if (firstChar < 'A') + continue; // allows the usage of most punctuation characters as comments + int playability = 0; + for (int i = word.size() - 1; i > 0; i--) + { + if (word[i].isDigit()) + playability = playability * 10 + word[i].digitValue(); + } + m_wordFactory->pushWord(QuackleIO::Util::qstringToString(word), true, playability); + } +} + void LexiconDialog::accept() { QDialog::accept(); @@ -166,11 +223,15 @@ void LexiconDialog::accept() void LexiconDialog::updateLexiconInformation() { + int wordCount = m_wordFactory ? m_wordFactory->wordCount() : 0; + QByteArray hash = m_wordFactory ? QByteArray(m_wordFactory->hashBytes(), 16).toHex() : ""; QString text; text.append(tr("File name: ")); text.append(tr("\n\nFile size: ")); text.append(tr("\n\nWord count: ")); + text.append(QString("%L1").arg(wordCount)); text.append(tr("\n\nLexicon hash: ")); + text.append(hash); m_lexiconInformation->setText(text); } diff --git a/quacker/lexicondialog.h b/quacker/lexicondialog.h index 573d48b..4df6138 100644 --- a/quacker/lexicondialog.h +++ b/quacker/lexicondialog.h @@ -49,10 +49,12 @@ public: protected slots: void deleteLexicon(); void addWordsFromFile(); + void alphabetChanged(const QString &); protected: - void addWordsFromDawg(const string &dawgfile, const string &alphabetfile); + void addWordsFromDawgFile(const QString &dawgfile, const QString &alphabetfile); void addWordsFromDawgRecursive(const LexiconParameters &lexParams, Quackle::LetterString &word, int index); + void addWordsFromTextFile(const QString &textFile, const QString &alphabetfile); private: QLineEdit *m_lexiconName; @@ -66,6 +68,7 @@ private: QPushButton *m_deleteLexicon; QString m_originalName; + QString m_alphabetFileName; DawgFactory *m_wordFactory; }; diff --git a/quackleio/dawgfactory.cpp b/quackleio/dawgfactory.cpp index 565778a..e7ada85 100644 --- a/quackleio/dawgfactory.cpp +++ b/quackleio/dawgfactory.cpp @@ -25,10 +25,10 @@ #include "util.h" -DawgFactory::DawgFactory(const UVString& alphabetFile) +DawgFactory::DawgFactory(const QString &alphabetFile) { QuackleIO::FlexibleAlphabetParameters *flexure = new QuackleIO::FlexibleAlphabetParameters; - flexure->load(QuackleIO::Util::uvStringToQString(alphabetFile)); + flexure->load(alphabetFile); m_alphas = flexure; m_root.insmallerdict = false; diff --git a/quackleio/dawgfactory.h b/quackleio/dawgfactory.h index 2a55461..1a1aa7d 100644 --- a/quackleio/dawgfactory.h +++ b/quackleio/dawgfactory.h @@ -26,7 +26,7 @@ class DawgFactory { public: - DawgFactory(const UVString& alphabetFile); + DawgFactory(const QString &alphabetFile); ~DawgFactory(); int wordCount() const { return m_root.wordCount(); }; -- cgit v1.2.3 From 69e3dcefb882c743b136df8e5c81b4182b135f6b Mon Sep 17 00:00:00 2001 From: John Fultz Date: Sat, 10 Oct 2015 19:09:36 -0500 Subject: Progress on the lexicon dialog. Now prints better stats. Now loads the dictionary you're editing. Now disables the Delete button at appropriate times. --- datamanager.cpp | 6 ++++++ datamanager.h | 3 +++ lexiconparameters.cpp | 5 +++++ lexiconparameters.h | 1 + quacker/lexicondialog.cpp | 36 +++++++++++++++++++++++++++--------- quacker/lexicondialog.h | 4 ++-- quacker/settings.cpp | 30 ++++++++++++++++++++++++++---- quacker/settings.h | 2 ++ quackleio/dawgfactory.cpp | 37 +++++++++++++++++++++++++++++++++++-- quackleio/dawgfactory.h | 8 ++++++-- 10 files changed, 113 insertions(+), 19 deletions(-) (limited to 'quackleio/dawgfactory.cpp') diff --git a/datamanager.cpp b/datamanager.cpp index eb65afd..916610a 100644 --- a/datamanager.cpp +++ b/datamanager.cpp @@ -156,6 +156,12 @@ string DataManager::findDataFile(const string &subDirectory, const string &file) return fname; } +bool DataManager::hasUserDataFile(const string &subDirectory, const string &file) +{ + string fname = makeDataFilename(subDirectory, file, true); + return fileExists(fname); +} + string DataManager::makeDataFilename(const string &subDirectory, const string &lexicon, const string &file, bool user) { return (user ? m_userDataDirectory : m_appDataDirectory) + "/" + subDirectory + "/" + lexicon + "/" + file; diff --git a/datamanager.h b/datamanager.h index 196d525..75bce54 100644 --- a/datamanager.h +++ b/datamanager.h @@ -105,6 +105,9 @@ public: // Returns empty string if the file is not found. string findDataFile(const string &subDirectory, const string &file); + // Returns true if the data file is in user-land. + bool hasUserDataFile(const string &subDirectory, const string &file); + // returns similarly-named file string makeDataFilename(const string &subDirectory, const string &lexicon, const string &file, bool user); string makeDataFilename(const string &subDirectory, const string &file, bool user); diff --git a/lexiconparameters.cpp b/lexiconparameters.cpp index 9da3b70..bc10773 100644 --- a/lexiconparameters.cpp +++ b/lexiconparameters.cpp @@ -224,6 +224,11 @@ string LexiconParameters::findDictionaryFile(const string &lexicon) return QUACKLE_DATAMANAGER->findDataFile("lexica", lexicon); } +bool LexiconParameters::hasUserDictionaryFile(const string &lexicon) +{ + return QUACKLE_DATAMANAGER->hasUserDataFile("lexica", lexicon); +} + UVString LexiconParameters::hashString(bool shortened) const { const char hex[] = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' }; diff --git a/lexiconparameters.h b/lexiconparameters.h index 9f34be6..4eda4f3 100644 --- a/lexiconparameters.h +++ b/lexiconparameters.h @@ -68,6 +68,7 @@ public: // finds a file in the lexica data directory static string findDictionaryFile(const string &lexicon); + static bool hasUserDictionaryFile(const string &lexicon); // a convenience field; this is unused by libquackle string lexiconName() const { return m_lexiconName; }; diff --git a/quacker/lexicondialog.cpp b/quacker/lexicondialog.cpp index a7566c6..e11ae41 100644 --- a/quacker/lexicondialog.cpp +++ b/quacker/lexicondialog.cpp @@ -103,9 +103,21 @@ LexiconDialog::LexiconDialog(QWidget *parent, const QString &originalName) : QDi Settings::populateComboFromFilenames(m_alphabetCombo, "alphabets", ""); alphabetChanged(m_alphabetCombo->currentText()); - updateLexiconInformation(); - // sync game board with control states and draw board + string dawgFileName = originalName.toStdString() + ".dawg"; + QString dawgFullFileName; + if (!originalName.isEmpty()) + dawgFullFileName = QString::fromStdString(Quackle::LexiconParameters::findDictionaryFile(dawgFileName)); + + if (!dawgFullFileName.isEmpty()) + { + m_deleteLexicon->setEnabled(Quackle::LexiconParameters::hasUserDictionaryFile(dawgFileName)); + addWordsFromDawgFile(dawgFullFileName); + } + else + m_deleteLexicon->setEnabled(false); + + updateLexiconInformation(); } LexiconDialog::~LexiconDialog() @@ -134,9 +146,9 @@ void LexiconDialog::addWordsFromFile() for (QList::const_iterator it = files.begin(); it != files.end(); it++) { if (it->endsWith(".dawg", Qt::CaseInsensitive)) - addWordsFromDawgFile(*it, m_alphabetCombo->currentText()); + addWordsFromDawgFile(*it); else - addWordsFromTextFile(*it, m_alphabetCombo->currentText()); + addWordsFromTextFile(*it); } updateLexiconInformation(); } @@ -149,7 +161,7 @@ void LexiconDialog::alphabetChanged(const QString &alphabet) m_alphabetFileName = QString::fromStdString(AlphabetParameters::findAlphabetFile(QuackleIO::Util::qstringToStdString(alphabet))); } -void LexiconDialog::addWordsFromDawgFile(const QString &dawgfile, const QString &alphabetfile) +void LexiconDialog::addWordsFromDawgFile(const QString &dawgfile) { if (!m_wordFactory) m_wordFactory = new DawgFactory(m_alphabetFileName); @@ -185,7 +197,7 @@ void LexiconDialog::addWordsFromDawgRecursive(const LexiconParameters &lexParams } while (!lastchild); } -void LexiconDialog::addWordsFromTextFile(const QString &textFile, const QString &alphabetfile) +void LexiconDialog::addWordsFromTextFile(const QString &textFile) { if (!m_wordFactory) m_wordFactory = new DawgFactory(m_alphabetFileName); @@ -224,14 +236,20 @@ void LexiconDialog::accept() void LexiconDialog::updateLexiconInformation() { int wordCount = m_wordFactory ? m_wordFactory->wordCount() : 0; - QByteArray hash = m_wordFactory ? QByteArray(m_wordFactory->hashBytes(), 16).toHex() : ""; + QByteArray hash = (m_wordFactory && wordCount) ? QByteArray(m_wordFactory->hashBytes(), 16).toHex() : ""; QString text; + QString lengthText; + if (m_wordFactory) + lengthText = QString::fromStdString(m_wordFactory->letterCountString()); + text.append(tr("File name: ")); text.append(tr("\n\nFile size: ")); text.append(tr("\n\nWord count: ")); text.append(QString("%L1").arg(wordCount)); - text.append(tr("\n\nLexicon hash: ")); - text.append(hash); + text.append("\n"); + text.append(lengthText); + text.append(tr("\nLexicon hash: ")); + text.append(hash.left(8)); m_lexiconInformation->setText(text); } diff --git a/quacker/lexicondialog.h b/quacker/lexicondialog.h index 4df6138..39cd546 100644 --- a/quacker/lexicondialog.h +++ b/quacker/lexicondialog.h @@ -52,9 +52,9 @@ protected slots: void alphabetChanged(const QString &); protected: - void addWordsFromDawgFile(const QString &dawgfile, const QString &alphabetfile); + void addWordsFromDawgFile(const QString &dawgfile); void addWordsFromDawgRecursive(const LexiconParameters &lexParams, Quackle::LetterString &word, int index); - void addWordsFromTextFile(const QString &textFile, const QString &alphabetfile); + void addWordsFromTextFile(const QString &textFile); private: QLineEdit *m_lexiconName; diff --git a/quacker/settings.cpp b/quacker/settings.cpp index ce8583f..3319955 100644 --- a/quacker/settings.cpp +++ b/quacker/settings.cpp @@ -172,9 +172,11 @@ void Settings::createGUI() void Settings::load() { m_lexiconNameCombo->setCurrentIndex(m_lexiconNameCombo->findText(QuackleIO::Util::stdStringToQString(QUACKLE_LEXICON_PARAMETERS->lexiconName()))); + m_lastGoodLexiconValue = m_lexiconNameCombo->currentIndex(); m_alphabetNameCombo->setCurrentIndex(m_alphabetNameCombo->findText(QuackleIO::Util::stdStringToQString(QUACKLE_ALPHABET_PARAMETERS->alphabetName()))); m_themeNameCombo->setCurrentIndex(m_themeNameCombo->findText(m_themeName)); m_boardNameCombo->setCurrentIndex(m_boardNameCombo->findText(QuackleIO::Util::uvStringToQString(QUACKLE_BOARD_PARAMETERS->name()))); + m_lastGoodBoardValue = m_boardNameCombo->currentIndex(); } void Settings::preInitialize() @@ -343,9 +345,13 @@ void Settings::lexiconChanged(const QString &lexiconName) if (m_lexiconNameCombo->currentIndex() == m_lexiconNameCombo->count() - 1) { editLexicon(); + if (m_lexiconNameCombo->currentIndex() == m_lexiconNameCombo->count() - 1 && + m_lexiconNameCombo->currentIndex() != 0) + m_lexiconNameCombo->setCurrentIndex(m_lastGoodLexiconValue); return; } setQuackleToUseLexiconName(lexiconName); + m_lastGoodLexiconValue = m_lexiconNameCombo->currentIndex(); CustomQSettings settings; settings.setValue("quackle/settings/lexicon-name", lexiconName); @@ -388,6 +394,9 @@ void Settings::boardChanged(const QString &boardName) if (m_boardNameCombo->currentIndex() == m_boardNameCombo->count() - 1) { addBoard(); + if (m_boardNameCombo->currentIndex() == m_boardNameCombo->count() - 1 && + m_boardNameCombo->currentIndex() != 0) + m_boardNameCombo->setCurrentIndex(m_lastGoodBoardValue); return; } CustomQSettings settings; @@ -530,14 +539,14 @@ void Settings::populateComboFromFilenames(QComboBox* combo, const QString &path, if (dir.cd(path)) fileList << dir.entryList(QDir::Files | QDir::Readable, QDir::Name); - QStringListIterator i(fileList); + QStringList::iterator i; QString fileName; QStringList list; int periodPos; - while (i.hasNext()) + for (i = fileList.begin(); i != fileList.end(); ++i) { - fileName = i.next(); + fileName = *i; periodPos = fileName.indexOf('.'); if (periodPos) { @@ -545,7 +554,20 @@ void Settings::populateComboFromFilenames(QComboBox* combo, const QString &path, list << fileName; } } - list.removeDuplicates(); + + for (i = fileList.begin(); i != fileList.end(); ++i) + { + QStringList::iterator j = i; + for (++j; j != fileList.end(); ++j) + { + if (*i == *j) + { + *i = "* " + *i; + list.erase(j); + break; + } + } + } combo->addItems(list); if (label.size() > 0) diff --git a/quacker/settings.h b/quacker/settings.h index fab2f3f..7c5738e 100644 --- a/quacker/settings.h +++ b/quacker/settings.h @@ -100,6 +100,8 @@ private: void pushIndex(GaddagFactory &factory, Quackle::LetterString &word, int index); static Settings *m_self; + int m_lastGoodLexiconValue; + int m_lastGoodBoardValue; }; #endif diff --git a/quackleio/dawgfactory.cpp b/quackleio/dawgfactory.cpp index e7ada85..362dfdc 100644 --- a/quackleio/dawgfactory.cpp +++ b/quackleio/dawgfactory.cpp @@ -17,6 +17,8 @@ */ +#include +#include #include #include #include @@ -184,6 +186,34 @@ void DawgFactory::writeIndex(const UVString& filename) } } +int DawgFactory::wordCount() const +{ + m_countsByLength.resize(0); + return m_root.wordCount(0, m_countsByLength); +} + +string DawgFactory::letterCountString() const +{ + ostringstream str; + if (m_countsByLength.size() < 16) + m_countsByLength.resize(16, 0); + str << "2s: " << std::setw(7) << std::right << std::setfill(' ') << m_countsByLength[2]; + str << "\t6s: " << std::setw(7) << std::right << std::setfill(' ') << m_countsByLength[6]; + str << "\t10s: " << std::setw(7) << std::right << std::setfill(' ') << m_countsByLength[10]; + str << "\t14s: " << std::setw(7) << std::right << std::setfill(' ') << m_countsByLength[14]; + str << "\n3s: " << std::setw(7) << std::right << std::setfill(' ') << m_countsByLength[3]; + str << "\t7s: " << std::setw(7) << std::right << std::setfill(' ') << m_countsByLength[7]; + str << "\t11s: " << std::setw(7) << std::right << std::setfill(' ') << m_countsByLength[11]; + str << "\t15s: " << std::setw(7) << std::right << std::setfill(' ') << m_countsByLength[15]; + str << "\n4s: " << std::setw(7) << std::right << std::setfill(' ') << m_countsByLength[4]; + str << "\t8s: " << std::setw(7) << std::right << std::setfill(' ') << m_countsByLength[8]; + str << "\t12s: " << std::setw(7) << std::right << std::setfill(' ') << m_countsByLength[12]; + str << "\n5s: " << std::setw(7) << std::right << std::setfill(' ') << m_countsByLength[5]; + str << "\t9s: " << std::setw(7) << std::right << std::setfill(' ') << m_countsByLength[9]; + str << "\t13s: " << std::setw(7) << std::right << std::setfill(' ') << m_countsByLength[13]; + str << "\n"; + return str.str(); +} void DawgFactory::Node::print(vector< Node* >& nodelist) @@ -287,11 +317,14 @@ bool DawgFactory::Node::equals(const Node &n) const return true; } -int DawgFactory::Node::wordCount() const +int DawgFactory::Node::wordCount(unsigned int depth, vector &countsByLength) const { int wordCount = ((playability == 0) ? 0 : 1); + if (countsByLength.size() < depth + 1) + countsByLength.resize(depth + 1, 0); + countsByLength[depth] += wordCount; for (size_t i = 0; i < children.size(); i++) - wordCount += children[i].wordCount(); + wordCount += children[i].wordCount(depth + 1, countsByLength); return wordCount; } diff --git a/quackleio/dawgfactory.h b/quackleio/dawgfactory.h index 1a1aa7d..8dd6e03 100644 --- a/quackleio/dawgfactory.h +++ b/quackleio/dawgfactory.h @@ -19,6 +19,7 @@ #ifndef QUACKLE_DAWGFACTORY_H #define QUACKLE_DAWGFACTORY_H +#include #include #include "flexiblealphabet.h" @@ -29,7 +30,8 @@ public: DawgFactory(const QString &alphabetFile); ~DawgFactory(); - int wordCount() const { return m_root.wordCount(); }; + int wordCount() const; + string letterCountString() const; int nodeCount() const { return m_nodelist.size(); }; int encodableWords() const { return m_encodableWords; }; int unencodableWords() const { return m_unencodableWords; }; @@ -50,7 +52,7 @@ private: void print(vector< Node* >& m_nodelist); int letterSum() const; - int wordCount() const; + int wordCount(unsigned int depth, vector &countsByLength) const; bool equals(const Node &n) const; Quackle::Letter c; @@ -65,6 +67,7 @@ private: mutable bool sumexplored; mutable int sum; + mutable vector counts; bool deleted; Node* cloneof; @@ -75,6 +78,7 @@ private: int m_unencodableWords; int m_duplicateWords; vector< Node* > m_nodelist; + mutable vector m_countsByLength; Quackle::AlphabetParameters *m_alphas; Node m_root; union { -- cgit v1.2.3 From 6339dec22e2190fd341500206c80425593324bdc Mon Sep 17 00:00:00 2001 From: John Fultz Date: Sun, 11 Oct 2015 18:19:20 -0500 Subject: Fix up lexicon dialog box checks. Get enables and disables right, efficient computation of word counts, etc. --- data/lexica/csw12.gaddag | Bin 25846356 -> 0 bytes data/lexica/twl06.gaddag | Bin 16811716 -> 0 bytes quacker/lexicondialog.cpp | 46 ++++++++++++++++++++++++++++++++++++++++------ quacker/lexicondialog.h | 7 ++++++- quackleio/dawgfactory.cpp | 5 +++-- quackleio/dawgfactory.h | 4 +++- 6 files changed, 52 insertions(+), 10 deletions(-) delete mode 100644 data/lexica/csw12.gaddag delete mode 100644 data/lexica/twl06.gaddag (limited to 'quackleio/dawgfactory.cpp') diff --git a/data/lexica/csw12.gaddag b/data/lexica/csw12.gaddag deleted file mode 100644 index 511c99f..0000000 Binary files a/data/lexica/csw12.gaddag and /dev/null differ diff --git a/data/lexica/twl06.gaddag b/data/lexica/twl06.gaddag deleted file mode 100644 index db93e2e..0000000 Binary files a/data/lexica/twl06.gaddag and /dev/null differ diff --git a/quacker/lexicondialog.cpp b/quacker/lexicondialog.cpp index e11ae41..f92efb1 100644 --- a/quacker/lexicondialog.cpp +++ b/quacker/lexicondialog.cpp @@ -27,6 +27,18 @@ #include "geometry.h" #include "quackleio/dawgfactory.h" +class FileNameValidator : public QValidator +{ +public: + virtual State validate(QString &input, int &pos) const + { + for (QString::ConstIterator i = input.begin(); i != input.end(); ++i) + if (*i == '/' || *i == '?' || *i == '\\' || *i == '*') + return Invalid; + return Acceptable; + } +}; + LexiconDialog::LexiconDialog(QWidget *parent, const QString &originalName) : QDialog(parent), m_wordFactory(NULL) { @@ -37,6 +49,7 @@ LexiconDialog::LexiconDialog(QWidget *parent, const QString &originalName) : QDi // construct the UI elements m_lexiconName = new QLineEdit(); m_alphabetCombo = new QComboBox(); + m_fileNameValidator = new FileNameValidator(); m_addWordsFromFile = new QPushButton(tr("Add words from &file...")); m_clearAllWords = new QPushButton(tr("Clear &words and start again")); @@ -92,7 +105,7 @@ LexiconDialog::LexiconDialog(QWidget *parent, const QString &originalName) : QDi m_saveChanges->setDefault(true); // hook up signals and slots - // connect(m_lexiconName, SIGNAL(textEdited(const QString &)), this, SLOT(parametersChanged(const QString &))); + connect(m_lexiconName, SIGNAL(textEdited(const QString &)), this, SLOT(parametersChanged(const QString &))); connect(m_addWordsFromFile, SIGNAL(clicked()), this, SLOT(addWordsFromFile())); connect(m_saveChanges, SIGNAL(clicked()), this, SLOT(accept())); connect(m_cancel, SIGNAL(clicked()), this, SLOT(reject())); @@ -109,6 +122,9 @@ LexiconDialog::LexiconDialog(QWidget *parent, const QString &originalName) : QDi if (!originalName.isEmpty()) dawgFullFileName = QString::fromStdString(Quackle::LexiconParameters::findDictionaryFile(dawgFileName)); + m_lexiconName->setValidator(m_fileNameValidator); + m_lexiconName->setText(m_originalName); + if (!dawgFullFileName.isEmpty()) { m_deleteLexicon->setEnabled(Quackle::LexiconParameters::hasUserDictionaryFile(dawgFileName)); @@ -117,12 +133,13 @@ LexiconDialog::LexiconDialog(QWidget *parent, const QString &originalName) : QDi else m_deleteLexicon->setEnabled(false); - updateLexiconInformation(); + updateLexiconInformation(true); } LexiconDialog::~LexiconDialog() { - + delete m_fileNameValidator; + delete m_wordFactory; } void LexiconDialog::deleteLexicon() @@ -233,15 +250,30 @@ void LexiconDialog::accept() QDialog::accept(); } -void LexiconDialog::updateLexiconInformation() +void LexiconDialog::updateLexiconInformation(bool firstTime) { - int wordCount = m_wordFactory ? m_wordFactory->wordCount() : 0; - QByteArray hash = (m_wordFactory && wordCount) ? QByteArray(m_wordFactory->hashBytes(), 16).toHex() : ""; + QByteArray hash = m_wordFactory ? QByteArray(m_wordFactory->hashBytes(), 16).toHex() : ""; QString text; QString lengthText; + + // only recompute word count when the dictionary changes + if (m_wordFactory && hash != m_previousHash) + { + m_wordFactory->computeWordCount(); + m_previousHash = hash; + } + int wordCount = m_wordFactory ? m_wordFactory->wordCount() : 0; + if (wordCount == 0) + { + delete m_wordFactory; + m_wordFactory = NULL; + } if (m_wordFactory) lengthText = QString::fromStdString(m_wordFactory->letterCountString()); + if (firstTime) + m_originalHash = hash; + text.append(tr("File name: ")); text.append(tr("\n\nFile size: ")); text.append(tr("\n\nWord count: ")); @@ -252,4 +284,6 @@ void LexiconDialog::updateLexiconInformation() text.append(hash.left(8)); m_lexiconInformation->setText(text); + + m_saveChanges->setEnabled(hash != m_originalHash && !m_lexiconName->text().isEmpty()); } diff --git a/quacker/lexicondialog.h b/quacker/lexicondialog.h index 39cd546..fa80ec1 100644 --- a/quacker/lexicondialog.h +++ b/quacker/lexicondialog.h @@ -34,6 +34,7 @@ class QLabel; class QLineEdit; class QPushButton; class DawgFactory; +class FileNameValidator; class LexiconDialog : public QDialog { @@ -44,9 +45,10 @@ public: ~LexiconDialog(); virtual void accept(); - void updateLexiconInformation(); + void updateLexiconInformation(bool firstTime = false); protected slots: + void parametersChanged(const QString &) { updateLexiconInformation(); }; void deleteLexicon(); void addWordsFromFile(); void alphabetChanged(const QString &); @@ -62,6 +64,7 @@ private: QPushButton *m_addWordsFromFile; QPushButton *m_clearAllWords; QLabel *m_lexiconInformation; + FileNameValidator * m_fileNameValidator; QPushButton *m_saveChanges; QPushButton *m_cancel; @@ -69,6 +72,8 @@ private: QString m_originalName; QString m_alphabetFileName; + QByteArray m_originalHash; + QByteArray m_previousHash; DawgFactory *m_wordFactory; }; diff --git a/quackleio/dawgfactory.cpp b/quackleio/dawgfactory.cpp index 362dfdc..869ef8e 100644 --- a/quackleio/dawgfactory.cpp +++ b/quackleio/dawgfactory.cpp @@ -40,6 +40,7 @@ DawgFactory::DawgFactory(const QString &alphabetFile) m_root.lastchild = true; m_hash.int32ptr[0] = m_hash.int32ptr[1] = m_hash.int32ptr[2] = m_hash.int32ptr[3] = 0; + m_encodableWords = m_unencodableWords = m_duplicateWords = m_wordCount = 0; } DawgFactory::~DawgFactory() @@ -186,10 +187,10 @@ void DawgFactory::writeIndex(const UVString& filename) } } -int DawgFactory::wordCount() const +void DawgFactory::computeWordCount() const { m_countsByLength.resize(0); - return m_root.wordCount(0, m_countsByLength); + m_wordCount = m_root.wordCount(0, m_countsByLength); } string DawgFactory::letterCountString() const diff --git a/quackleio/dawgfactory.h b/quackleio/dawgfactory.h index 8dd6e03..5872dc3 100644 --- a/quackleio/dawgfactory.h +++ b/quackleio/dawgfactory.h @@ -30,7 +30,8 @@ public: DawgFactory(const QString &alphabetFile); ~DawgFactory(); - int wordCount() const; + void computeWordCount() const; + int wordCount() const { return m_wordCount; }; string letterCountString() const; int nodeCount() const { return m_nodelist.size(); }; int encodableWords() const { return m_encodableWords; }; @@ -78,6 +79,7 @@ private: int m_unencodableWords; int m_duplicateWords; vector< Node* > m_nodelist; + mutable int m_wordCount; mutable vector m_countsByLength; Quackle::AlphabetParameters *m_alphas; Node m_root; -- cgit v1.2.3 From ef4273ba47a2da9cea0aed59235e2d0a86bb8d7e Mon Sep 17 00:00:00 2001 From: John Fultz Date: Tue, 13 Oct 2015 12:00:13 -0500 Subject: Saving custom dictionaries now really works. * Fix a number of remaining bugs in the lexicon dialog. * Fix an error reading the v1 DAWG. * Improve the word counting mechanism. * Make sure the lexicn dialog properly selects and loads its dictionary after it's done. * Implement deleting of user dictionaries. * Clean up dictionary info text in lexicon dialog. * Disable gaddag generation...still have to fix that up to happen at sensible times and with user notification. --- lexiconparameters.cpp | 7 ++--- lexiconparameters.h | 2 +- quacker/lexicondialog.cpp | 72 +++++++++++++++++++++++++++++------------------ quacker/lexicondialog.h | 7 ++++- quacker/settings.cpp | 68 +++++++++++++++++++++++++++++--------------- quacker/settings.h | 2 +- quackleio/dawgfactory.cpp | 60 +++++++++++++-------------------------- quackleio/dawgfactory.h | 17 +++++------ 8 files changed, 128 insertions(+), 107 deletions(-) (limited to 'quackleio/dawgfactory.cpp') diff --git a/lexiconparameters.cpp b/lexiconparameters.cpp index bc10773..74de78f 100644 --- a/lexiconparameters.cpp +++ b/lexiconparameters.cpp @@ -82,8 +82,6 @@ class Quackle::V1LexiconInterpreter : public LexiconInterpreter file >> lexparams.m_utf8Alphabet[i]; file.get(); // separator space } - file.get(); // whitespace separator - lexparams.m_wordcount = (bytes[0] << 16) | (bytes[1] << 8) | bytes[2]; while (!file.eof()) { file.read((char*)(lexparams.m_dawg) + i, 7); @@ -123,18 +121,18 @@ class Quackle::V1LexiconInterpreter : public LexiconInterpreter p = (dawg[index] << 16) + (dawg[index + 1] << 8) + (dawg[index + 2]); letter = dawg[index + 3]; - t = (p != 0); lastchild = ((letter & 64) != 0); british = !(letter & 128); letter = (letter & 63) + QUACKLE_FIRST_LETTER; playability = (dawg[index + 4] << 16) + (dawg[index + 5] << 8) + (dawg[index + 6]); + t = (playability != 0); } virtual int versionNumber() const { return 1; } }; LexiconParameters::LexiconParameters() - : m_dawg(NULL), m_gaddag(NULL), m_interpreter(NULL), m_wordcount(0) + : m_dawg(NULL), m_gaddag(NULL), m_interpreter(NULL), m_wordCount(0) { memset(m_hash, 0, sizeof(m_hash)); } @@ -155,6 +153,7 @@ void LexiconParameters::unloadDawg() delete[] m_dawg; m_dawg = NULL; delete m_interpreter; + m_interpreter = NULL; } void LexiconParameters::unloadGaddag() diff --git a/lexiconparameters.h b/lexiconparameters.h index 4eda4f3..f29a589 100644 --- a/lexiconparameters.h +++ b/lexiconparameters.h @@ -88,7 +88,7 @@ protected: string m_lexiconName; LexiconInterpreter *m_interpreter; char m_hash[16]; - int m_wordcount; + int m_wordCount; vector m_utf8Alphabet; LexiconInterpreter* createInterpreter(char version) const; diff --git a/quacker/lexicondialog.cpp b/quacker/lexicondialog.cpp index f92efb1..6630e1f 100644 --- a/quacker/lexicondialog.cpp +++ b/quacker/lexicondialog.cpp @@ -40,7 +40,7 @@ public: }; LexiconDialog::LexiconDialog(QWidget *parent, const QString &originalName) : QDialog(parent), - m_wordFactory(NULL) + m_deleted(false), m_wordFactory(NULL) { m_originalName = originalName; @@ -57,6 +57,7 @@ LexiconDialog::LexiconDialog(QWidget *parent, const QString &originalName) : QDi m_lexiconInformation = new QLabel(""); m_lexiconInformation->setWordWrap(true); + m_lexiconInformation->setTextInteractionFlags(Qt::TextBrowserInteraction); m_saveChanges = new QPushButton(tr("&Save Changes")); m_cancel = new QPushButton(tr("&Cancel")); @@ -107,6 +108,7 @@ LexiconDialog::LexiconDialog(QWidget *parent, const QString &originalName) : QDi // hook up signals and slots connect(m_lexiconName, SIGNAL(textEdited(const QString &)), this, SLOT(parametersChanged(const QString &))); connect(m_addWordsFromFile, SIGNAL(clicked()), this, SLOT(addWordsFromFile())); + connect(m_clearAllWords, SIGNAL(clicked()), this, SLOT(loadOriginalDictionary())); connect(m_saveChanges, SIGNAL(clicked()), this, SLOT(accept())); connect(m_cancel, SIGNAL(clicked()), this, SLOT(reject())); connect(m_deleteLexicon, SIGNAL(clicked()), this, SLOT(deleteLexicon())); @@ -114,26 +116,13 @@ LexiconDialog::LexiconDialog(QWidget *parent, const QString &originalName) : QDi setWindowTitle(tr("Configure Lexicon - Quackle")); - Settings::populateComboFromFilenames(m_alphabetCombo, "alphabets", ""); + Settings::populateComboFromFilenames(m_alphabetCombo, "alphabets", ".quackle_alphabet", ""); alphabetChanged(m_alphabetCombo->currentText()); - string dawgFileName = originalName.toStdString() + ".dawg"; - QString dawgFullFileName; - if (!originalName.isEmpty()) - dawgFullFileName = QString::fromStdString(Quackle::LexiconParameters::findDictionaryFile(dawgFileName)); - m_lexiconName->setValidator(m_fileNameValidator); m_lexiconName->setText(m_originalName); - if (!dawgFullFileName.isEmpty()) - { - m_deleteLexicon->setEnabled(Quackle::LexiconParameters::hasUserDictionaryFile(dawgFileName)); - addWordsFromDawgFile(dawgFullFileName); - } - else - m_deleteLexicon->setEnabled(false); - - updateLexiconInformation(true); + loadOriginalDictionary(); } LexiconDialog::~LexiconDialog() @@ -144,9 +133,11 @@ LexiconDialog::~LexiconDialog() void LexiconDialog::deleteLexicon() { - delete m_wordFactory; - m_wordFactory = NULL; - updateLexiconInformation(); + string lexiconNameStr = m_originalName.toStdString(); + string filename = QUACKLE_DATAMANAGER->makeDataFilename("lexica", lexiconNameStr + ".dawg", true); + QFile(QString::fromStdString(filename)).remove(); + m_deleted = true; + QDialog::accept(); } void LexiconDialog::addWordsFromFile() @@ -245,8 +236,40 @@ void LexiconDialog::addWordsFromTextFile(const QString &textFile) } } +void LexiconDialog::loadOriginalDictionary() +{ + delete m_wordFactory; + m_wordFactory = NULL; + string dawgFileName = m_originalName.toStdString() + ".dawg"; + QString dawgFullFileName; + if (!m_originalName.isEmpty()) + dawgFullFileName = QString::fromStdString(Quackle::LexiconParameters::findDictionaryFile(dawgFileName)); + + if (!dawgFullFileName.isEmpty()) + { + m_deleteLexicon->setEnabled(Quackle::LexiconParameters::hasUserDictionaryFile(dawgFileName)); + m_lexiconInformation->setText(tr("Loading dictionary...")); + show(); + qApp->processEvents(); + addWordsFromDawgFile(dawgFullFileName); + } + else + m_deleteLexicon->setEnabled(false); + + updateLexiconInformation(true); +} + void LexiconDialog::accept() { + string lexiconNameStr = m_lexiconName->text().toStdString(); + string filename = QUACKLE_DATAMANAGER->makeDataFilename("lexica", lexiconNameStr + ".dawg", true); + m_lexiconInformation->setText(tr("Compressing and writing dictionary file...\nThis may take a few minutes.")); + qApp->processEvents(); + m_wordFactory->generate(); + m_lexiconInformation->setText(tr("Writing dictionary file...")); + qApp->processEvents(); + m_wordFactory->writeIndex(filename); + m_finalLexiconName = m_lexiconName->text(); QDialog::accept(); } @@ -256,12 +279,6 @@ void LexiconDialog::updateLexiconInformation(bool firstTime) QString text; QString lengthText; - // only recompute word count when the dictionary changes - if (m_wordFactory && hash != m_previousHash) - { - m_wordFactory->computeWordCount(); - m_previousHash = hash; - } int wordCount = m_wordFactory ? m_wordFactory->wordCount() : 0; if (wordCount == 0) { @@ -274,9 +291,7 @@ void LexiconDialog::updateLexiconInformation(bool firstTime) if (firstTime) m_originalHash = hash; - text.append(tr("File name: ")); - text.append(tr("\n\nFile size: ")); - text.append(tr("\n\nWord count: ")); + text.append(tr("Word count: ")); text.append(QString("%L1").arg(wordCount)); text.append("\n"); text.append(lengthText); @@ -286,4 +301,5 @@ void LexiconDialog::updateLexiconInformation(bool firstTime) m_lexiconInformation->setText(text); m_saveChanges->setEnabled(hash != m_originalHash && !m_lexiconName->text().isEmpty()); + m_clearAllWords->setEnabled(hash != m_originalHash); } diff --git a/quacker/lexicondialog.h b/quacker/lexicondialog.h index fa80ec1..1f1605b 100644 --- a/quacker/lexicondialog.h +++ b/quacker/lexicondialog.h @@ -45,6 +45,9 @@ public: ~LexiconDialog(); virtual void accept(); + bool itemWasDeleted() { return m_deleted; }; + const QString &lexiconName() { return m_finalLexiconName; }; + void updateLexiconInformation(bool firstTime = false); protected slots: @@ -52,6 +55,7 @@ protected slots: void deleteLexicon(); void addWordsFromFile(); void alphabetChanged(const QString &); + void loadOriginalDictionary(); protected: void addWordsFromDawgFile(const QString &dawgfile); @@ -73,7 +77,8 @@ private: QString m_originalName; QString m_alphabetFileName; QByteArray m_originalHash; - QByteArray m_previousHash; + QString m_finalLexiconName; + bool m_deleted; DawgFactory *m_wordFactory; }; diff --git a/quacker/settings.cpp b/quacker/settings.cpp index 3319955..6435605 100644 --- a/quacker/settings.cpp +++ b/quacker/settings.cpp @@ -107,7 +107,7 @@ void Settings::createGUI() m_lexiconNameCombo = new QComboBox; connect(m_lexiconNameCombo, SIGNAL(activated(const QString &)), this, SLOT(lexiconChanged(const QString &))); - populateComboFromFilenames(m_lexiconNameCombo, "lexica", "lexicon"); + populateComboFromFilenames(m_lexiconNameCombo, "lexica", ".dawg", "lexicon"); QLabel *lexiconNameLabel = new QLabel(tr("&Lexicon:")); lexiconNameLabel->setBuddy(m_lexiconNameCombo); @@ -118,7 +118,7 @@ void Settings::createGUI() m_alphabetNameCombo = new QComboBox; connect(m_alphabetNameCombo, SIGNAL(activated(const QString &)), this, SLOT(alphabetChanged(const QString &))); - populateComboFromFilenames(m_alphabetNameCombo, "alphabets", ""); + populateComboFromFilenames(m_alphabetNameCombo, "alphabets", ".quackle_alphabet", ""); QLabel *alphabetNameLabel = new QLabel(tr("&Alphabet:")); alphabetNameLabel->setBuddy(m_alphabetNameCombo); @@ -129,7 +129,7 @@ void Settings::createGUI() m_themeNameCombo = new QComboBox; connect(m_themeNameCombo, SIGNAL(activated(const QString &)), this, SLOT(themeChanged(const QString &))); - populateComboFromFilenames(m_themeNameCombo, "themes", ""); + populateComboFromFilenames(m_themeNameCombo, "themes", ".ini", ""); QLabel *themeNameLabel = new QLabel(tr("&Theme:")); themeNameLabel->setBuddy(m_themeNameCombo); @@ -140,7 +140,7 @@ void Settings::createGUI() m_boardNameCombo = new QComboBox; connect(m_boardNameCombo, SIGNAL(activated(const QString &)), this, SLOT(boardChanged(const QString &))); - populateComboFromFilenames(m_boardNameCombo, "boards", "board"); + populateComboFromFilenames(m_boardNameCombo, "boards", "", "board"); QLabel *boardNameLabel = new QLabel(tr("&Board:")); boardNameLabel->setBuddy(m_boardNameCombo); @@ -172,6 +172,8 @@ void Settings::createGUI() void Settings::load() { m_lexiconNameCombo->setCurrentIndex(m_lexiconNameCombo->findText(QuackleIO::Util::stdStringToQString(QUACKLE_LEXICON_PARAMETERS->lexiconName()))); + if (m_lexiconNameCombo->currentIndex() == -1) + m_lexiconNameCombo->setCurrentIndex(m_lexiconNameCombo->findText(QuackleIO::Util::stdStringToQString(QUACKLE_LEXICON_PARAMETERS->lexiconName()) + "*")); m_lastGoodLexiconValue = m_lexiconNameCombo->currentIndex(); m_alphabetNameCombo->setCurrentIndex(m_alphabetNameCombo->findText(QuackleIO::Util::stdStringToQString(QUACKLE_ALPHABET_PARAMETERS->alphabetName()))); m_themeNameCombo->setCurrentIndex(m_themeNameCombo->findText(m_themeName)); @@ -269,12 +271,12 @@ void Settings::setQuackleToUseLexiconName(const QString &lexiconName) else QUACKLE_LEXICON_PARAMETERS->loadGaddag(gaddagFile); - if (!QUACKLE_LEXICON_PARAMETERS->hasGaddag()) - { - gaddagFile = QUACKLE_DATAMANAGER->makeDataFilename("lexica", lexiconNameStr + ".gaddag", true); - buildGaddag(gaddagFile); - QUACKLE_LEXICON_PARAMETERS->loadGaddag(gaddagFile); - } + // if (!QUACKLE_LEXICON_PARAMETERS->hasGaddag()) + // { + // gaddagFile = QUACKLE_DATAMANAGER->makeDataFilename("lexica", lexiconNameStr + ".gaddag", true); + // buildGaddag(gaddagFile); + // QUACKLE_LEXICON_PARAMETERS->loadGaddag(gaddagFile); + // } QUACKLE_STRATEGY_PARAMETERS->initialize(lexiconNameStr); } @@ -342,6 +344,9 @@ void Settings::setQuackleToUseBoardName(const QString &boardName) void Settings::lexiconChanged(const QString &lexiconName) { + QString lexicon = lexiconName; + if (lexicon.endsWith("*")) + lexicon.truncate(lexicon.size() - 1); if (m_lexiconNameCombo->currentIndex() == m_lexiconNameCombo->count() - 1) { editLexicon(); @@ -350,11 +355,11 @@ void Settings::lexiconChanged(const QString &lexiconName) m_lexiconNameCombo->setCurrentIndex(m_lastGoodLexiconValue); return; } - setQuackleToUseLexiconName(lexiconName); + setQuackleToUseLexiconName(lexicon); m_lastGoodLexiconValue = m_lexiconNameCombo->currentIndex(); CustomQSettings settings; - settings.setValue("quackle/settings/lexicon-name", lexiconName); + settings.setValue("quackle/settings/lexicon-name", lexicon); emit refreshViews(); } @@ -465,9 +470,6 @@ void Settings::editBoard() void Settings::loadBoardNameCombo() { - if (m_lexiconNameCombo == 0) - return; - while (m_boardNameCombo->count() > 0) m_boardNameCombo->removeItem(0); @@ -489,12 +491,30 @@ void Settings::loadBoardNameCombo() void Settings::editLexicon() { QString name = m_lexiconNameCombo->currentText(); + if (name.endsWith("*")) + name.truncate(name.size() - 1); if (m_lexiconNameCombo->currentIndex() == m_lexiconNameCombo->count() - 1) name = ""; LexiconDialog dialog(this, name); if (dialog.exec()) { - populateComboFromFilenames(m_lexiconNameCombo, "lexica", "lexicon"); + populateComboFromFilenames(m_lexiconNameCombo, "lexica", ".dawg", "lexicon"); + qApp->processEvents(); + if (dialog.itemWasDeleted()) + { + m_lexiconNameCombo->setCurrentIndex(m_lexiconNameCombo->findText(name)); + QUACKLE_LEXICON_PARAMETERS->setLexiconName(""); // force lexicon to reload + QUACKLE_LEXICON_PARAMETERS->unloadAll(); + if (m_lexiconNameCombo->currentIndex() != -1) + setQuackleToUseLexiconName(name); + } + else if (!dialog.lexiconName().isEmpty()) + { + QUACKLE_LEXICON_PARAMETERS->setLexiconName(""); // force lexicon to reload + QUACKLE_LEXICON_PARAMETERS->unloadAll(); + setQuackleToUseLexiconName(dialog.lexiconName()); + m_lexiconNameCombo->setCurrentIndex(m_lexiconNameCombo->findText(name + "*")); + } load(); } } @@ -508,7 +528,7 @@ void Settings::editAlphabet() AlphabetDialog dialog(this); if (dialog.exec()) { - populateComboFromFilenames(m_alphabetNameCombo, "alphabets", "alphabet"); + populateComboFromFilenames(m_alphabetNameCombo, "alphabets", ".quackle_alphabet", "alphabet"); load(); } #endif // 0 @@ -529,8 +549,11 @@ void Settings::editTheme() #endif // 0 } -void Settings::populateComboFromFilenames(QComboBox* combo, const QString &path, const QString &label) +void Settings::populateComboFromFilenames(QComboBox* combo, const QString &path, const QString &extension, const QString &label) { + while (combo->count() > 0) + combo->removeItem(0); + QStringList fileList; QDir dir(self()->m_appDataDir); if (dir.cd(path)) @@ -547,6 +570,8 @@ void Settings::populateComboFromFilenames(QComboBox* combo, const QString &path, for (i = fileList.begin(); i != fileList.end(); ++i) { fileName = *i; + if (!fileName.endsWith(extension)) + continue; periodPos = fileName.indexOf('.'); if (periodPos) { @@ -555,14 +580,13 @@ void Settings::populateComboFromFilenames(QComboBox* combo, const QString &path, } } - for (i = fileList.begin(); i != fileList.end(); ++i) + for (i = list.begin(); i != list.end(); ++i) { - QStringList::iterator j = i; - for (++j; j != fileList.end(); ++j) + for (QStringList::iterator j = i + 1; j != list.end(); ++j) { if (*i == *j) { - *i = "* " + *i; + *i = *i + "*"; list.erase(j); break; } diff --git a/quacker/settings.h b/quacker/settings.h index 7c5738e..babea3c 100644 --- a/quacker/settings.h +++ b/quacker/settings.h @@ -42,7 +42,7 @@ public: static Settings *self(); // load up an item list based on a list of filenames - static void populateComboFromFilenames(QComboBox* combo, const QString &path, const QString &label); + static void populateComboFromFilenames(QComboBox* combo, const QString &path, const QString &extension, const QString &label); signals: void refreshViews(); diff --git a/quackleio/dawgfactory.cpp b/quackleio/dawgfactory.cpp index 869ef8e..4dd4ec3 100644 --- a/quackleio/dawgfactory.cpp +++ b/quackleio/dawgfactory.cpp @@ -28,6 +28,8 @@ DawgFactory::DawgFactory(const QString &alphabetFile) + : m_encodableWords(0), m_unencodableWords(0), m_duplicateWords(0), + m_countsByLength(Quackle::FixedLengthString::maxSize, 0) { QuackleIO::FlexibleAlphabetParameters *flexure = new QuackleIO::FlexibleAlphabetParameters; flexure->load(alphabetFile); @@ -40,7 +42,6 @@ DawgFactory::DawgFactory(const QString &alphabetFile) m_root.lastchild = true; m_hash.int32ptr[0] = m_hash.int32ptr[1] = m_hash.int32ptr[2] = m_hash.int32ptr[3] = 0; - m_encodableWords = m_unencodableWords = m_duplicateWords = m_wordCount = 0; } DawgFactory::~DawgFactory() @@ -48,7 +49,7 @@ DawgFactory::~DawgFactory() delete m_alphas; } -bool DawgFactory::pushWord(const UVString& word, bool inSmaller, int playability) +bool DawgFactory::pushWord(const UVString &word, bool inSmaller, int playability) { UVString leftover; Quackle::LetterString encodedWord = m_alphas->encode(word, &leftover); @@ -59,11 +60,12 @@ bool DawgFactory::pushWord(const UVString& word, bool inSmaller, int playability return false; } -bool DawgFactory::pushWord(const Quackle::LetterString& word, bool inSmaller, int playability) +bool DawgFactory::pushWord(const Quackle::LetterString &word, bool inSmaller, int playability) { if (m_root.pushWord(word, inSmaller, playability)) { ++m_encodableWords; + ++m_countsByLength[word.length()]; hashWord(word); return true; } @@ -133,7 +135,7 @@ void DawgFactory::generate() m_root.print(m_nodelist); } -void DawgFactory::writeIndex(const UVString& filename) +void DawgFactory::writeIndex(const string &filename) { ofstream out(filename.c_str(), ios::out | ios::binary); unsigned char bytes[7]; @@ -187,37 +189,26 @@ void DawgFactory::writeIndex(const UVString& filename) } } -void DawgFactory::computeWordCount() const -{ - m_countsByLength.resize(0); - m_wordCount = m_root.wordCount(0, m_countsByLength); -} - string DawgFactory::letterCountString() const { ostringstream str; - if (m_countsByLength.size() < 16) - m_countsByLength.resize(16, 0); - str << "2s: " << std::setw(7) << std::right << std::setfill(' ') << m_countsByLength[2]; - str << "\t6s: " << std::setw(7) << std::right << std::setfill(' ') << m_countsByLength[6]; - str << "\t10s: " << std::setw(7) << std::right << std::setfill(' ') << m_countsByLength[10]; - str << "\t14s: " << std::setw(7) << std::right << std::setfill(' ') << m_countsByLength[14]; - str << "\n3s: " << std::setw(7) << std::right << std::setfill(' ') << m_countsByLength[3]; - str << "\t7s: " << std::setw(7) << std::right << std::setfill(' ') << m_countsByLength[7]; - str << "\t11s: " << std::setw(7) << std::right << std::setfill(' ') << m_countsByLength[11]; - str << "\t15s: " << std::setw(7) << std::right << std::setfill(' ') << m_countsByLength[15]; - str << "\n4s: " << std::setw(7) << std::right << std::setfill(' ') << m_countsByLength[4]; - str << "\t8s: " << std::setw(7) << std::right << std::setfill(' ') << m_countsByLength[8]; - str << "\t12s: " << std::setw(7) << std::right << std::setfill(' ') << m_countsByLength[12]; - str << "\n5s: " << std::setw(7) << std::right << std::setfill(' ') << m_countsByLength[5]; - str << "\t9s: " << std::setw(7) << std::right << std::setfill(' ') << m_countsByLength[9]; - str << "\t13s: " << std::setw(7) << std::right << std::setfill(' ') << m_countsByLength[13]; - str << "\n"; + for (int i = 0; i < 4; i++) + { + for (int j = 0; j < 4; j++) + { + const int letterCount = j * 4 + i + 2; + if (j != 0) + str << "\t"; + if (m_countsByLength[letterCount] > 0) + str << letterCount << "s: " << std::setw(7) << std::right << std::setfill(' ') << m_countsByLength[letterCount]; + } + str << "\n"; + } return str.str(); } -void DawgFactory::Node::print(vector< Node* >& nodelist) +void DawgFactory::Node::print(vector< Node* > &nodelist) { written = true; @@ -255,7 +246,7 @@ void DawgFactory::Node::print(vector< Node* >& nodelist) // returns true if the word was actually added...false if it's a duplicate. -bool DawgFactory::Node::pushWord(const Quackle::LetterString& word, bool inSmaller, int pb) +bool DawgFactory::Node::pushWord(const Quackle::LetterString &word, bool inSmaller, int pb) { bool added; if (word.length() == 0) { @@ -318,17 +309,6 @@ bool DawgFactory::Node::equals(const Node &n) const return true; } -int DawgFactory::Node::wordCount(unsigned int depth, vector &countsByLength) const -{ - int wordCount = ((playability == 0) ? 0 : 1); - if (countsByLength.size() < depth + 1) - countsByLength.resize(depth + 1, 0); - countsByLength[depth] += wordCount; - for (size_t i = 0; i < children.size(); i++) - wordCount += children[i].wordCount(depth + 1, countsByLength); - return wordCount; -} - int DawgFactory::Node::letterSum() const { if (sumexplored) diff --git a/quackleio/dawgfactory.h b/quackleio/dawgfactory.h index 5872dc3..efcc455 100644 --- a/quackleio/dawgfactory.h +++ b/quackleio/dawgfactory.h @@ -30,19 +30,18 @@ public: DawgFactory(const QString &alphabetFile); ~DawgFactory(); - void computeWordCount() const; - int wordCount() const { return m_wordCount; }; + int wordCount() const { return m_encodableWords; }; string letterCountString() const; int nodeCount() const { return m_nodelist.size(); }; int encodableWords() const { return m_encodableWords; }; int unencodableWords() const { return m_unencodableWords; }; int duplicateWords() const { return m_duplicateWords; }; - bool pushWord(const UVString& word, bool inSmaller, int playability); - bool pushWord(const Quackle::LetterString& word, bool inSmaller, int playability); + bool pushWord(const UVString &word, bool inSmaller, int playability); + bool pushWord(const Quackle::LetterString &word, bool inSmaller, int playability); void hashWord(const Quackle::LetterString &word); void generate(); - void writeIndex(const UVString& filename); + void writeIndex(const string &filename); const char* hashBytes() { return m_hash.charptr; }; @@ -50,10 +49,9 @@ private: class Node { public: bool pushWord(const Quackle::LetterString& word, bool inSmaller, int pb); - void print(vector< Node* >& m_nodelist); + void print(vector< Node* > &m_nodelist); int letterSum() const; - int wordCount(unsigned int depth, vector &countsByLength) const; bool equals(const Node &n) const; Quackle::Letter c; @@ -67,7 +65,7 @@ private: bool lastchild; mutable bool sumexplored; - mutable int sum; + mutable unsigned int sum; mutable vector counts; bool deleted; @@ -79,8 +77,7 @@ private: int m_unencodableWords; int m_duplicateWords; vector< Node* > m_nodelist; - mutable int m_wordCount; - mutable vector m_countsByLength; + vector m_countsByLength; Quackle::AlphabetParameters *m_alphas; Node m_root; union { -- cgit v1.2.3