diff options
-rw-r--r-- | makeminidawg/makeminidawg.pro | 4 | ||||
-rw-r--r-- | makeminidawg/makeminidawgmain.cpp | 124 | ||||
-rw-r--r-- | makeminidawg/minidawgmaker.cpp | 516 | ||||
-rw-r--r-- | makeminidawg/minidawgmaker.h | 37 | ||||
-rw-r--r-- | quackleio/dawgfactory.cpp | 282 | ||||
-rw-r--r-- | quackleio/dawgfactory.h | 79 |
6 files changed, 482 insertions, 560 deletions
diff --git a/makeminidawg/makeminidawg.pro b/makeminidawg/makeminidawg.pro index 8af3bb2..729e6b4 100644 --- a/makeminidawg/makeminidawg.pro +++ b/makeminidawg/makeminidawg.pro @@ -24,8 +24,8 @@ QMAKE_LFLAGS_RELEASE += -L../lib/release -L../quackleio/lib/release QMAKE_LFLAGS_DEBUG += -L../lib/debug -L../quackleio/lib/debug # Input -HEADERS += minidawgmaker.h -SOURCES += minidawgmaker.cpp makeminidawgmain.cpp +HEADERS += +SOURCES += makeminidawgmain.cpp macx-g++ { diff --git a/makeminidawg/makeminidawgmain.cpp b/makeminidawg/makeminidawgmain.cpp index 82871be..89afb68 100644 --- a/makeminidawg/makeminidawgmain.cpp +++ b/makeminidawg/makeminidawgmain.cpp @@ -16,16 +16,130 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ -#include <QCoreApplication> -#include <QStringList> +#include <map> +#include <QtCore> -#include "minidawgmaker.h" +#include "quackleio/dawgfactory.h" +#include "quackleio/froggetopt.h" +#include "quackleio/util.h" + +std::map< QString, bool> smallerMap; +std::map< QString, int> playabilityMap; int main(int argc, char **argv) { QCoreApplication a(argc, argv); - MiniDawgMaker maker; - return maker.executeFromArguments(); + GetOpt opts; + QString alphabet; + opts.addOption('a', "alphabet", &alphabet); + if (!opts.parse()) + return 1; + + if (alphabet.isNull()) + alphabet = "english"; + + QString alphabetFile = QString("../data/alphabets/%1.quackle_alphabet").arg(alphabet); + UVcout << "Using alphabet file: " << QuackleIO::Util::qstringToString(alphabetFile) << endl; + + DawgFactory factory(alphabetFile); + + + QString smallerDictFilename = "smaller.raw"; + QFile smallerDict(smallerDictFilename); + if (!smallerDict.exists()) + { + UVcout << "smaller dictionary does not exist: " << QuackleIO::Util::qstringToString(smallerDictFilename) << endl; + return false; + } + + if (!smallerDict.open(QIODevice::ReadOnly | QIODevice::Text)) + { + UVcout << "Could not open " << QuackleIO::Util::qstringToString(smallerDictFilename) << endl; + return false; + } + + QTextStream smallerStream(&smallerDict); + smallerStream.setCodec(QTextCodec::codecForName("UTF-8")); + + while (!smallerStream.atEnd()) + { + QString originalQString; + smallerStream >> originalQString; + //UVcout << "this word is in the smaller dictionary: " << QuackleIO::Util::qstringToString(originalQString) << endl; + smallerMap[originalQString] = true; + } + + QString playabilityFilename = "playabilities.raw"; + QFile playability(playabilityFilename); + if (!playability.exists()) + { + UVcout << "playability does not exist: " << QuackleIO::Util::qstringToString(playabilityFilename) << endl; + return false; + } + + if (!playability.open(QIODevice::ReadOnly | QIODevice::Text)) + { + UVcout << "Could not open " << QuackleIO::Util::qstringToString(playabilityFilename) << endl; + return false; + } + + QTextStream playabilityStream(&playability); + playabilityStream.setCodec(QTextCodec::codecForName("UTF-8")); + + while (!playabilityStream.atEnd()) + { + int pb; + playabilityStream >> pb; + QString originalQString; + playabilityStream >> originalQString; + //UVcout << "playability: " << QuackleIO::Util::qstringToString(originalQString) << " " << pb << endl; + playabilityMap[originalQString] = pb; + } + + QString dawgFilename = "dawginput.raw"; + QFile file(dawgFilename); + if (!file.exists()) + { + UVcout << "dawg does not exist: " << QuackleIO::Util::qstringToString(dawgFilename) << endl; + return false; + } + + if (!file.open(QIODevice::ReadOnly | QIODevice::Text)) + { + UVcout << "Could not open " << QuackleIO::Util::qstringToString(dawgFilename) << endl; + return false; + } + + QTextStream stream(&file); + stream.setCodec(QTextCodec::codecForName("UTF-8")); + + while (!stream.atEnd()) + { + QString word; + stream >> word; + + bool inSmaller = smallerMap[word]; + int pb = playabilityMap[word]; + + if (stream.atEnd()) + break; + + if (!factory.pushWord(word, inSmaller, pb)) + UVcout << "not encodable without leftover: " << QuackleIO::Util::qstringToString(word) << endl; + } + + file.close(); + + UVcout << "encodable words: " << factory.encodableWords() << ", unencodable words: " << factory.unencodableWords() << endl; + + UVcout << "nodelist.size(): " << factory.nodeCount() << endl; + + factory.generate(); + UVcout << "Compressed nodelist.size(): " << factory.nodeCount() << endl; + + factory.writeIndex("output.dawg"); + + return 0; } diff --git a/makeminidawg/minidawgmaker.cpp b/makeminidawg/minidawgmaker.cpp deleted file mode 100644 index fa4df2f..0000000 --- a/makeminidawg/minidawgmaker.cpp +++ /dev/null @@ -1,516 +0,0 @@ -/* - * Quackle -- Crossword game artificial intelligence and analysis tool - * Copyright (C) 2005-2014 Jason Katz-Brown and John O'Laughlin. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#include <string> -#include <iostream> -#include <iomanip> -#include <vector> -#include <map> - -#include <QtCore> - -#include <quackleio/flexiblealphabet.h> -#include <quackleio/froggetopt.h> -#include <quackleio/util.h> - -#include "minidawgmaker.h" - -using namespace std; - -class Node { -public: - void pushword(Quackle::LetterString word, bool inSmaller, int pb); - void print(Quackle::LetterString prefix); - - int depth(); - int subtreesize(); - int lettersum(); - bool equals(Node &n); - - Quackle::Letter c; - bool t; - bool insmallerdict; - int playability; - - vector<Node> children; - Node* parent; - int pointer; - int location; - int oldpointer; - - bool lastchild; - - bool dexplored; - bool sexplored; - bool lexplored; - - int d; - int s; - int l; - - bool deleted; - Node* cloneof; - bool written; -}; - - -vector< Node* > nodelist; -Node root; - -vector< int > ofdepth[20][100]; - -map< QString, bool> smallerMap; -map< QString, int> playabilityMap; - -bool Node::equals(Node &n) -{ - if (playability != n.playability) - return false; - if (c != n.c) - return false; - if (children.size() != n.children.size()) - return false; - if (t != n.t) - return false; - if (insmallerdict != n.insmallerdict) - return false; - if (l != n.l) - return false; - if (s != n.s) - return false; - if (d != n.d) - return false; - - for (unsigned int i = 0; i < children.size(); i++) - if (!children[i].equals(n.children[i])) - return false; - - return true; -} - -int Node::depth() -{ - if (dexplored) - return d; - - dexplored = true; - - if (children.size() == 0) - { - d = 1; - return d; - } - - int childmax = 0; - - for (unsigned int i = 0; i < children.size(); i++) - { - int d = children[i].depth(); - if (d > childmax) - childmax = d; - } - - d = 1 + childmax; - return d; -} - - -int Node::subtreesize() -{ - if (sexplored) - return s; - - sexplored = true; - - if (children.size() == 0) - { - s = 1; - return s; - } - - int childsum = 0; - - for (unsigned int i = 0; i < children.size(); i++) - { - int s = children[i].subtreesize(); - childsum += s; - } - - s = 1 + childsum; - return s; -} - - -int Node::lettersum() -{ - if (lexplored) - return l; - - lexplored = true; - - int thisletter = c; - - if (children.size() == 0) - { - l = thisletter; - return l; - } - - int childsum = 0; - - for (unsigned int i = 0; i < children.size(); i++) - { - int s = children[i].lettersum(); - childsum += s; - } - - l = thisletter + childsum; - return l; -} - - -void Node::print(Quackle::LetterString prefix) { - - written = true; - - if (t) { - //cout << prefix << endl; - } - - //cout << "prefix: " << prefix << ", children: " << children.size() << ", deleted: " << deleted << ", oldpointer: " << oldpointer << ", nthChild: " << nthChild << endl; - - if (children.size() == 0) - { - //cout << " no children. nothing to do." << endl; - return; - } - - if (!deleted) - { - //cout << " Setting pointer to " << nodelist.size() << " before I push_back the children." << endl; - pointer = nodelist.size(); - } - else - { - pointer = cloneof->pointer; - //cout << " Setting pointer to clone's (" << pointer << ") and not pushing anything." << endl; - } - - if (!deleted) - { - for (unsigned int i = 0; i < children.size(); i++) { - children[i].parent = this; - nodelist.push_back(&children[i]); - } - - for (unsigned int i = 0; i < children.size(); i++) { - if (!children[i].deleted) - children[i].print(prefix + children[i].c); - else if (!children[i].cloneof->written) - children[i].cloneof->print(prefix + children[i].c); - } - } - - if (children.size() > 0) - children[children.size() - 1].lastchild = true; -} - - -void Node::pushword(Quackle::LetterString word, bool inSmaller, int pb) { - if (word.length() == 0) { - t = true; - playability = pb; - insmallerdict = inSmaller; - } - else { - char first = word[0]; - Quackle::LetterString rest = word.substr(1, word.length() - 1); - int index = -1; - - // cout << "first: " << first << ", rest: " << rest << endl; - - for (unsigned int i = 0; i < children.size(); i++) { - if (children[i].c == first) { - index = i; - i = children.size(); - } - } - - if (index == -1) { - Node n; - n.c = first; - n.t = false; - n.playability = 0; - n.insmallerdict = false; - n.pointer = 0; - n.oldpointer = 0; - n.lastchild = false; - children.push_back(n); - index = children.size() - 1; - } - - children[index].pushword(rest, inSmaller, pb); - } - - dexplored = false; - sexplored = false; - lexplored = false; - deleted = false; - written = false; -} - - -void minimize() -{ - nodelist[0]->depth(); - nodelist[0]->subtreesize(); - nodelist[0]->lettersum(); - - for (unsigned int i = 0; i < nodelist.size(); i++) - { - int d = nodelist[i]->d; - if ((d >= 1) & (d <= 20)) - ofdepth[d - 1][nodelist[i]->l%100].push_back(i); - } - - for (int d = 0; d < 20; d++) - { - for (int l = 0; l < 100; l++) - { - //cout << "l: " << l << endl; - if (ofdepth[d][l].size() > 0) - for (vector<int>::iterator it = ofdepth[d][l].begin(); it != ofdepth[d][l].end() - 1; it++) - { - if (!nodelist[(*it)]->deleted) - { - for (vector<int>::iterator jt = it + 1; jt != ofdepth[d][l].end(); jt++) - { - if (!nodelist[(*jt)]->deleted) - { - // cout << "Comparing " << (*it) << " and " << (*jt) << endl; - if (nodelist[(*it)]->equals(nodelist[(*jt)][0])) - { - //cout << "Hey! " << (*it) << " == " << (*jt) << endl; - // ones[l].erase(jt); - nodelist[(*jt)]->deleted = true; - nodelist[(*jt)]->cloneof = nodelist[(*it)]; - } - } - } - } - } - } - - for (unsigned int i = 0; i < nodelist.size(); i++) - { - nodelist[i]->oldpointer = nodelist[i]->pointer; - nodelist[i]->pointer = 0; - nodelist[i]->written = false; - } - - } - - nodelist.clear(); - nodelist.push_back(&root); - root.print(""); - UVcout << "nodelist.size(): " << nodelist.size() << endl; -} - -int MiniDawgMaker::executeFromArguments() -{ - GetOpt opts; - QString alphabet; - opts.addOption('a', "alphabet", &alphabet); - if (!opts.parse()) - return 1; - - if (alphabet.isNull()) - alphabet = "english"; - - Quackle::AlphabetParameters *alphas = 0; - QString alphabetFile = QString("../data/alphabets/%1.quackle_alphabet").arg(alphabet); - UVcout << "Using alphabet file: " << QuackleIO::Util::qstringToString(alphabetFile) << endl; - QuackleIO::FlexibleAlphabetParameters *flexure = new QuackleIO::FlexibleAlphabetParameters; - flexure->load(alphabetFile); - alphas = flexure; - - root.t = false; - root.insmallerdict = false; - root.playability = 0; - root.c = QUACKLE_BLANK_MARK; - root.pointer = 0; - root.lastchild = true; - - QString smallerDictFilename = "smaller.raw"; - QFile smallerDict(smallerDictFilename); - if (!smallerDict.exists()) - { - UVcout << "smaller dictionary does not exist: " << QuackleIO::Util::qstringToString(smallerDictFilename) << endl; - return false; - } - - if (!smallerDict.open(QIODevice::ReadOnly | QIODevice::Text)) - { - UVcout << "Could not open " << QuackleIO::Util::qstringToString(smallerDictFilename) << endl; - return false; - } - - QTextStream smallerStream(&smallerDict); - smallerStream.setCodec(QTextCodec::codecForName("UTF-8")); - - while (!smallerStream.atEnd()) - { - QString originalQString; - smallerStream >> originalQString; - //UVcout << "this word is in the smaller dictionary: " << QuackleIO::Util::qstringToString(originalQString) << endl; - smallerMap[originalQString] = true; - } - - QString playabilityFilename = "playabilities.raw"; - QFile playability(playabilityFilename); - if (!playability.exists()) - { - UVcout << "playability does not exist: " << QuackleIO::Util::qstringToString(playabilityFilename) << endl; - return false; - } - - if (!playability.open(QIODevice::ReadOnly | QIODevice::Text)) - { - UVcout << "Could not open " << QuackleIO::Util::qstringToString(playabilityFilename) << endl; - return false; - } - - QTextStream playabilityStream(&playability); - playabilityStream.setCodec(QTextCodec::codecForName("UTF-8")); - - while (!playabilityStream.atEnd()) - { - int pb; - playabilityStream >> pb; - QString originalQString; - playabilityStream >> originalQString; - //UVcout << "playability: " << QuackleIO::Util::qstringToString(originalQString) << " " << pb << endl; - playabilityMap[originalQString] = pb; - } - - QString dawgFilename = "dawginput.raw"; - QFile file(dawgFilename); - if (!file.exists()) - { - UVcout << "dawg does not exist: " << QuackleIO::Util::qstringToString(dawgFilename) << endl; - return false; - } - - if (!file.open(QIODevice::ReadOnly | QIODevice::Text)) - { - UVcout << "Could not open " << QuackleIO::Util::qstringToString(dawgFilename) << endl; - return false; - } - - QTextStream stream(&file); - stream.setCodec(QTextCodec::codecForName("UTF-8")); - - int encodableWords = 0; - int unencodableWords = 0; - - while (!stream.atEnd()) - { - QString originalQString; - stream >> originalQString; - - bool inSmaller = smallerMap[originalQString]; - int pb = playabilityMap[originalQString]; - - if (stream.atEnd()) - break; - - UVString originalString = QuackleIO::Util::qstringToString(originalQString); - - //UVcout << "read original string: " << originalString; - //if (!inSmaller) UVcout << "#"; - //UVcout << endl; - - UVString leftover; - Quackle::LetterString encodedWord = alphas->encode(originalString, &leftover); - if (leftover.empty()) - { - //for (Quackle::LetterString::iterator it = encodedWord.begin(); it != encodedWord.end(); ++it) - //UVcout << "got encoded letter: " << (int)(*it) << endl; - - root.pushword(encodedWord, inSmaller, pb); - ++encodableWords; - } - else - { - UVcout << "not encodable without leftover: " << originalString << endl; - ++unencodableWords; - } - } - - file.close(); - delete alphas; - - UVcout << "encodable words: " << encodableWords << ", unencodable words: " << unencodableWords << endl; - - nodelist.push_back(&root); - root.print(""); - UVcout << "nodelist.size(): " << nodelist.size() << endl; - - minimize(); - - ofstream out("output.dawg", ios::out | ios::binary); - - for (unsigned int i = 0; i < nodelist.size(); i++) { - //cout << nodelist[i]->c << " " << nodelist[i]->pointer << " " << nodelist[i]->t << " " << nodelist[i]->lastchild << endl; - Node* n = nodelist[i]; - unsigned int p; - if (nodelist[i]->deleted) - { - p = (unsigned int)(nodelist[i]->cloneof->pointer); - // n = nodelist[i]->cloneof; - } - else - p = (unsigned int)(nodelist[i]->pointer); - - char bytes[7]; - unsigned char n1 = (p & 0x00FF0000) >> 16; - unsigned char n2 = (p & 0x0000FF00) >> 8; - unsigned char n3 = (p & 0x000000FF); - unsigned char n4 = n->c - QUACKLE_FIRST_LETTER; - - unsigned int pb = n->playability; - unsigned char n5 = (pb & 0x00FF0000) >> 16; - unsigned char n6 = (pb & 0x0000FF00) >> 8; - unsigned char n7 = (pb & 0x000000FF); - - if (n->t) { - n4 |= 32; - } - if (n->lastchild) { - n4 |= 64; - } - if (n->insmallerdict) { - n4 |= 128; - } - - bytes[0] = n1; bytes[1] = n2; bytes[2] = n3; bytes[3] = n4; - bytes[4] = n5; bytes[5] = n6; bytes[6] = n7; - out.write(bytes, 7); - } - return 0; -} diff --git a/makeminidawg/minidawgmaker.h b/makeminidawg/minidawgmaker.h deleted file mode 100644 index 7c8deee..0000000 --- a/makeminidawg/minidawgmaker.h +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Quackle -- Crossword game artificial intelligence and analysis tool - * Copyright (C) 2005-2014 Jason Katz-Brown and John O'Laughlin. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#ifndef QUACKLE_MINIDAWGMAKER_H -#define QUACKLE_MINIDAWGMAKER_H - -#include <QStringList> - -class MiniDawgMaker -{ -public: - MiniDawgMaker() {} - ~MiniDawgMaker() {} - - // parse and execute commands specified on command line - int executeFromArguments(); - - // TOOD further classify this code -}; - -#endif - diff --git a/quackleio/dawgfactory.cpp b/quackleio/dawgfactory.cpp new file mode 100644 index 0000000..8a37766 --- /dev/null +++ b/quackleio/dawgfactory.cpp @@ -0,0 +1,282 @@ +/* + * Quackle -- Crossword game artificial intelligence and analysis tool + * Copyright (C) 2005-2014 Jason Katz-Brown and John O'Laughlin. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + + +#include <iostream> +#include <QtCore> + +#include "dawgfactory.h" +#include "util.h" + + +DawgFactory::DawgFactory(const QString& alphabetFile) +{ + QuackleIO::FlexibleAlphabetParameters *flexure = new QuackleIO::FlexibleAlphabetParameters; + flexure->load(alphabetFile); + alphas = flexure; + + root.t = false; + root.insmallerdict = false; + root.playability = 0; + root.c = QUACKLE_BLANK_MARK; + root.pointer = 0; + root.lastchild = true; +} + +DawgFactory::~DawgFactory() +{ + delete alphas; +} + +bool DawgFactory::pushWord(const QString& word, bool inSmaller, int playability) +{ + UVString originalString = QuackleIO::Util::qstringToString(word); + + UVString leftover; + Quackle::LetterString encodedWord = alphas->encode(originalString, &leftover); + if (leftover.empty()) + { + ++m_encodableWords; + root.pushWord(encodedWord, inSmaller, playability); + return true; + } + + ++m_unencodableWords; + return false; +} + +void DawgFactory::generate() +{ + const int bucketcount = 2000; + vector< int > bucket[bucketcount]; + + nodelist.clear(); + nodelist.push_back(&root); + root.print(nodelist); + + nodelist[0]->letterSum(); + + for (unsigned int i = 0; i < nodelist.size(); i++) + { + bucket[nodelist[i]->sum % bucketcount].push_back(i); + nodelist[i]->pointer = 0; + nodelist[i]->written = false; + nodelist[i]->deleted = false; + nodelist[i]->cloneof = NULL; + } + + for (int b = 0; b < bucketcount; b++) + { + if (bucket[b].size() == 0) + continue; + for (vector<int>::iterator it = bucket[b].begin(); it != bucket[b].end() - 1; it++) + { + if (!nodelist[(*it)]->deleted) + { + for (vector<int>::iterator jt = it + 1; jt != bucket[b].end(); jt++) + { + if (!nodelist[(*jt)]->deleted) + { + // cout << "Comparing " << (*it) << " and " << (*jt) << endl; + if (nodelist[(*it)]->equals(nodelist[(*jt)][0])) + { + //cout << "Hey! " << (*it) << " == " << (*jt) << endl; + // ones[l].erase(jt); + nodelist[(*jt)]->deleted = true; + nodelist[(*jt)]->cloneof = nodelist[(*it)]; + } + } + } + } + } + } + + nodelist.clear(); + nodelist.push_back(&root); + root.print(nodelist); +} + +void DawgFactory::writeIndex(const QString& filename) +{ + ofstream out(QuackleIO::Util::qstringToStdString(filename).c_str(), ios::out | ios::binary); + + for (unsigned int i = 0; i < nodelist.size(); i++) { + //cout << nodelist[i]->c << " " << nodelist[i]->pointer << " " << nodelist[i]->t << " " << nodelist[i]->lastchild << endl; + Node* n = nodelist[i]; + unsigned int p; + if (nodelist[i]->deleted) + { + p = (unsigned int)(nodelist[i]->cloneof->pointer); + // n = nodelist[i]->cloneof; + } + else + p = (unsigned int)(nodelist[i]->pointer); + + char bytes[7]; + unsigned char n1 = (p & 0x00FF0000) >> 16; + unsigned char n2 = (p & 0x0000FF00) >> 8; + unsigned char n3 = (p & 0x000000FF); + unsigned char n4 = n->c - QUACKLE_FIRST_LETTER; + + unsigned int pb = n->playability; + unsigned char n5 = (pb & 0x00FF0000) >> 16; + unsigned char n6 = (pb & 0x0000FF00) >> 8; + unsigned char n7 = (pb & 0x000000FF); + + if (n->t) { + n4 |= 32; + } + if (n->lastchild) { + n4 |= 64; + } + if (n->insmallerdict) { + n4 |= 128; + } + + bytes[0] = n1; bytes[1] = n2; bytes[2] = n3; bytes[3] = n4; + bytes[4] = n5; bytes[5] = n6; bytes[6] = n7; + out.write(bytes, 7); + } +} + + + +void DawgFactory::Node::print(vector< Node* >& nodelist) +{ + written = true; + + if (children.size() == 0) + return; + + if (!deleted) + { + //cout << " Setting pointer to " << nodelist.size() << " before I push_back the children." << endl; + pointer = nodelist.size(); + } + else + { + pointer = cloneof->pointer; + //cout << " Setting pointer to clone's (" << pointer << ") and not pushing anything." << endl; + } + + if (!deleted) + { + for (unsigned int i = 0; i < children.size(); i++) { + nodelist.push_back(&children[i]); + } + + for (unsigned int i = 0; i < children.size(); i++) { + if (!children[i].deleted) + children[i].print(nodelist); + else if (!children[i].cloneof->written) + children[i].cloneof->print(nodelist); + } + } + + if (children.size() > 0) + children[children.size() - 1].lastchild = true; +} + + +void DawgFactory::Node::pushWord(const Quackle::LetterString& word, bool inSmaller, int pb) +{ + if (word.length() == 0) { + t = true; + playability = pb; + insmallerdict = inSmaller; + } + else { + char first = word[0]; + Quackle::LetterString rest = word.substr(1, word.length() - 1); + int index = -1; + + // cout << "first: " << first << ", rest: " << rest << endl; + + for (unsigned int i = 0; i < children.size(); i++) { + if (children[i].c == first) { + index = i; + break; + } + } + + if (index == -1) { + Node n; + n.c = first; + n.t = false; + n.playability = 0; + n.insmallerdict = false; + n.pointer = 0; + n.lastchild = false; + children.push_back(n); + index = children.size() - 1; + } + + children[index].pushWord(rest, inSmaller, pb); + } + + sumexplored = false; + deleted = false; + written = false; +} + + +bool DawgFactory::Node::equals(const Node &n) const +{ + if (playability != n.playability) + return false; + if (c != n.c) + return false; + if (children.size() != n.children.size()) + return false; + if (t != n.t) + return false; + if (insmallerdict != n.insmallerdict) + return false; + if (sum != n.sum) + return false; + + for (unsigned int i = 0; i < children.size(); i++) + if (!children[i].equals(n.children[i])) + return false; + + return true; +} + +int DawgFactory::Node::wordCount() const +{ + int wordCount = (t ? 0 : 1); + for (size_t i = 0; i < children.size(); i++) + wordCount += children[i].wordCount(); + return wordCount; +} + +int DawgFactory::Node::letterSum() const +{ + if (sumexplored) + return sum; + + sumexplored = true; + + // djb2 checksum + sum = 5381 * 33 + (int) c; + + for (unsigned int i = 0; i < children.size(); i++) + sum = (sum << 5) + sum + children[i].letterSum(); + + return sum; +} diff --git a/quackleio/dawgfactory.h b/quackleio/dawgfactory.h new file mode 100644 index 0000000..b2cfb76 --- /dev/null +++ b/quackleio/dawgfactory.h @@ -0,0 +1,79 @@ +/* + * Quackle -- Crossword game artificial intelligence and analysis tool + * Copyright (C) 2005-2014 Jason Katz-Brown and John O'Laughlin. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef QUACKLE_DAWGFACTORY_H +#define QUACKLE_DAWGFACTORY_H + +#include <vector> +#include "flexiblealphabet.h" + + +class DawgFactory { +public: + + DawgFactory(const QString& alphabetFile); + ~DawgFactory(); + + int wordCount() const { return root.wordCount(); }; + int nodeCount() const { return nodelist.size(); }; + int encodableWords() const { return m_encodableWords; }; + int unencodableWords() const { return m_unencodableWords; }; + + bool pushWord(const QString& word, bool inSmaller, int playability); + void generate(); + void writeIndex(const QString& fname); + +private: + class Node { + public: + void pushWord(const Quackle::LetterString& word, bool inSmaller, int pb); + void print(vector< Node* >& nodelist); + + int letterSum() const; + int wordCount() const; + bool equals(const Node &n) const; + + Quackle::Letter c; + bool t; + bool insmallerdict; + int playability; + + vector<Node> children; + int pointer; + int location; + + bool lastchild; + + mutable bool sumexplored; + mutable int sum; + + bool deleted; + Node* cloneof; + bool written; + }; + + int m_encodableWords; + int m_unencodableWords; + vector< Node* > nodelist; + Quackle::AlphabetParameters *alphas; + Node root; +}; + +#endif + + |