summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--makeminidawg/makeminidawg.pro4
-rw-r--r--makeminidawg/makeminidawgmain.cpp124
-rw-r--r--makeminidawg/minidawgmaker.cpp516
-rw-r--r--makeminidawg/minidawgmaker.h37
-rw-r--r--quackleio/dawgfactory.cpp282
-rw-r--r--quackleio/dawgfactory.h79
6 files changed, 482 insertions, 560 deletions
diff --git a/makeminidawg/makeminidawg.pro b/makeminidawg/makeminidawg.pro
index 8af3bb2..729e6b4 100644
--- a/makeminidawg/makeminidawg.pro
+++ b/makeminidawg/makeminidawg.pro
@@ -24,8 +24,8 @@ QMAKE_LFLAGS_RELEASE += -L../lib/release -L../quackleio/lib/release
QMAKE_LFLAGS_DEBUG += -L../lib/debug -L../quackleio/lib/debug
# Input
-HEADERS += minidawgmaker.h
-SOURCES += minidawgmaker.cpp makeminidawgmain.cpp
+HEADERS +=
+SOURCES += makeminidawgmain.cpp
macx-g++ {
diff --git a/makeminidawg/makeminidawgmain.cpp b/makeminidawg/makeminidawgmain.cpp
index 82871be..89afb68 100644
--- a/makeminidawg/makeminidawgmain.cpp
+++ b/makeminidawg/makeminidawgmain.cpp
@@ -16,16 +16,130 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-#include <QCoreApplication>
-#include <QStringList>
+#include <map>
+#include <QtCore>
-#include "minidawgmaker.h"
+#include "quackleio/dawgfactory.h"
+#include "quackleio/froggetopt.h"
+#include "quackleio/util.h"
+
+std::map< QString, bool> smallerMap;
+std::map< QString, int> playabilityMap;
int main(int argc, char **argv)
{
QCoreApplication a(argc, argv);
- MiniDawgMaker maker;
- return maker.executeFromArguments();
+ GetOpt opts;
+ QString alphabet;
+ opts.addOption('a', "alphabet", &alphabet);
+ if (!opts.parse())
+ return 1;
+
+ if (alphabet.isNull())
+ alphabet = "english";
+
+ QString alphabetFile = QString("../data/alphabets/%1.quackle_alphabet").arg(alphabet);
+ UVcout << "Using alphabet file: " << QuackleIO::Util::qstringToString(alphabetFile) << endl;
+
+ DawgFactory factory(alphabetFile);
+
+
+ QString smallerDictFilename = "smaller.raw";
+ QFile smallerDict(smallerDictFilename);
+ if (!smallerDict.exists())
+ {
+ UVcout << "smaller dictionary does not exist: " << QuackleIO::Util::qstringToString(smallerDictFilename) << endl;
+ return false;
+ }
+
+ if (!smallerDict.open(QIODevice::ReadOnly | QIODevice::Text))
+ {
+ UVcout << "Could not open " << QuackleIO::Util::qstringToString(smallerDictFilename) << endl;
+ return false;
+ }
+
+ QTextStream smallerStream(&smallerDict);
+ smallerStream.setCodec(QTextCodec::codecForName("UTF-8"));
+
+ while (!smallerStream.atEnd())
+ {
+ QString originalQString;
+ smallerStream >> originalQString;
+ //UVcout << "this word is in the smaller dictionary: " << QuackleIO::Util::qstringToString(originalQString) << endl;
+ smallerMap[originalQString] = true;
+ }
+
+ QString playabilityFilename = "playabilities.raw";
+ QFile playability(playabilityFilename);
+ if (!playability.exists())
+ {
+ UVcout << "playability does not exist: " << QuackleIO::Util::qstringToString(playabilityFilename) << endl;
+ return false;
+ }
+
+ if (!playability.open(QIODevice::ReadOnly | QIODevice::Text))
+ {
+ UVcout << "Could not open " << QuackleIO::Util::qstringToString(playabilityFilename) << endl;
+ return false;
+ }
+
+ QTextStream playabilityStream(&playability);
+ playabilityStream.setCodec(QTextCodec::codecForName("UTF-8"));
+
+ while (!playabilityStream.atEnd())
+ {
+ int pb;
+ playabilityStream >> pb;
+ QString originalQString;
+ playabilityStream >> originalQString;
+ //UVcout << "playability: " << QuackleIO::Util::qstringToString(originalQString) << " " << pb << endl;
+ playabilityMap[originalQString] = pb;
+ }
+
+ QString dawgFilename = "dawginput.raw";
+ QFile file(dawgFilename);
+ if (!file.exists())
+ {
+ UVcout << "dawg does not exist: " << QuackleIO::Util::qstringToString(dawgFilename) << endl;
+ return false;
+ }
+
+ if (!file.open(QIODevice::ReadOnly | QIODevice::Text))
+ {
+ UVcout << "Could not open " << QuackleIO::Util::qstringToString(dawgFilename) << endl;
+ return false;
+ }
+
+ QTextStream stream(&file);
+ stream.setCodec(QTextCodec::codecForName("UTF-8"));
+
+ while (!stream.atEnd())
+ {
+ QString word;
+ stream >> word;
+
+ bool inSmaller = smallerMap[word];
+ int pb = playabilityMap[word];
+
+ if (stream.atEnd())
+ break;
+
+ if (!factory.pushWord(word, inSmaller, pb))
+ UVcout << "not encodable without leftover: " << QuackleIO::Util::qstringToString(word) << endl;
+ }
+
+ file.close();
+
+ UVcout << "encodable words: " << factory.encodableWords() << ", unencodable words: " << factory.unencodableWords() << endl;
+
+ UVcout << "nodelist.size(): " << factory.nodeCount() << endl;
+
+ factory.generate();
+ UVcout << "Compressed nodelist.size(): " << factory.nodeCount() << endl;
+
+ factory.writeIndex("output.dawg");
+
+ return 0;
}
diff --git a/makeminidawg/minidawgmaker.cpp b/makeminidawg/minidawgmaker.cpp
deleted file mode 100644
index fa4df2f..0000000
--- a/makeminidawg/minidawgmaker.cpp
+++ /dev/null
@@ -1,516 +0,0 @@
-/*
- * Quackle -- Crossword game artificial intelligence and analysis tool
- * Copyright (C) 2005-2014 Jason Katz-Brown and John O'Laughlin.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <string>
-#include <iostream>
-#include <iomanip>
-#include <vector>
-#include <map>
-
-#include <QtCore>
-
-#include <quackleio/flexiblealphabet.h>
-#include <quackleio/froggetopt.h>
-#include <quackleio/util.h>
-
-#include "minidawgmaker.h"
-
-using namespace std;
-
-class Node {
-public:
- void pushword(Quackle::LetterString word, bool inSmaller, int pb);
- void print(Quackle::LetterString prefix);
-
- int depth();
- int subtreesize();
- int lettersum();
- bool equals(Node &n);
-
- Quackle::Letter c;
- bool t;
- bool insmallerdict;
- int playability;
-
- vector<Node> children;
- Node* parent;
- int pointer;
- int location;
- int oldpointer;
-
- bool lastchild;
-
- bool dexplored;
- bool sexplored;
- bool lexplored;
-
- int d;
- int s;
- int l;
-
- bool deleted;
- Node* cloneof;
- bool written;
-};
-
-
-vector< Node* > nodelist;
-Node root;
-
-vector< int > ofdepth[20][100];
-
-map< QString, bool> smallerMap;
-map< QString, int> playabilityMap;
-
-bool Node::equals(Node &n)
-{
- if (playability != n.playability)
- return false;
- if (c != n.c)
- return false;
- if (children.size() != n.children.size())
- return false;
- if (t != n.t)
- return false;
- if (insmallerdict != n.insmallerdict)
- return false;
- if (l != n.l)
- return false;
- if (s != n.s)
- return false;
- if (d != n.d)
- return false;
-
- for (unsigned int i = 0; i < children.size(); i++)
- if (!children[i].equals(n.children[i]))
- return false;
-
- return true;
-}
-
-int Node::depth()
-{
- if (dexplored)
- return d;
-
- dexplored = true;
-
- if (children.size() == 0)
- {
- d = 1;
- return d;
- }
-
- int childmax = 0;
-
- for (unsigned int i = 0; i < children.size(); i++)
- {
- int d = children[i].depth();
- if (d > childmax)
- childmax = d;
- }
-
- d = 1 + childmax;
- return d;
-}
-
-
-int Node::subtreesize()
-{
- if (sexplored)
- return s;
-
- sexplored = true;
-
- if (children.size() == 0)
- {
- s = 1;
- return s;
- }
-
- int childsum = 0;
-
- for (unsigned int i = 0; i < children.size(); i++)
- {
- int s = children[i].subtreesize();
- childsum += s;
- }
-
- s = 1 + childsum;
- return s;
-}
-
-
-int Node::lettersum()
-{
- if (lexplored)
- return l;
-
- lexplored = true;
-
- int thisletter = c;
-
- if (children.size() == 0)
- {
- l = thisletter;
- return l;
- }
-
- int childsum = 0;
-
- for (unsigned int i = 0; i < children.size(); i++)
- {
- int s = children[i].lettersum();
- childsum += s;
- }
-
- l = thisletter + childsum;
- return l;
-}
-
-
-void Node::print(Quackle::LetterString prefix) {
-
- written = true;
-
- if (t) {
- //cout << prefix << endl;
- }
-
- //cout << "prefix: " << prefix << ", children: " << children.size() << ", deleted: " << deleted << ", oldpointer: " << oldpointer << ", nthChild: " << nthChild << endl;
-
- if (children.size() == 0)
- {
- //cout << " no children. nothing to do." << endl;
- return;
- }
-
- if (!deleted)
- {
- //cout << " Setting pointer to " << nodelist.size() << " before I push_back the children." << endl;
- pointer = nodelist.size();
- }
- else
- {
- pointer = cloneof->pointer;
- //cout << " Setting pointer to clone's (" << pointer << ") and not pushing anything." << endl;
- }
-
- if (!deleted)
- {
- for (unsigned int i = 0; i < children.size(); i++) {
- children[i].parent = this;
- nodelist.push_back(&children[i]);
- }
-
- for (unsigned int i = 0; i < children.size(); i++) {
- if (!children[i].deleted)
- children[i].print(prefix + children[i].c);
- else if (!children[i].cloneof->written)
- children[i].cloneof->print(prefix + children[i].c);
- }
- }
-
- if (children.size() > 0)
- children[children.size() - 1].lastchild = true;
-}
-
-
-void Node::pushword(Quackle::LetterString word, bool inSmaller, int pb) {
- if (word.length() == 0) {
- t = true;
- playability = pb;
- insmallerdict = inSmaller;
- }
- else {
- char first = word[0];
- Quackle::LetterString rest = word.substr(1, word.length() - 1);
- int index = -1;
-
- // cout << "first: " << first << ", rest: " << rest << endl;
-
- for (unsigned int i = 0; i < children.size(); i++) {
- if (children[i].c == first) {
- index = i;
- i = children.size();
- }
- }
-
- if (index == -1) {
- Node n;
- n.c = first;
- n.t = false;
- n.playability = 0;
- n.insmallerdict = false;
- n.pointer = 0;
- n.oldpointer = 0;
- n.lastchild = false;
- children.push_back(n);
- index = children.size() - 1;
- }
-
- children[index].pushword(rest, inSmaller, pb);
- }
-
- dexplored = false;
- sexplored = false;
- lexplored = false;
- deleted = false;
- written = false;
-}
-
-
-void minimize()
-{
- nodelist[0]->depth();
- nodelist[0]->subtreesize();
- nodelist[0]->lettersum();
-
- for (unsigned int i = 0; i < nodelist.size(); i++)
- {
- int d = nodelist[i]->d;
- if ((d >= 1) & (d <= 20))
- ofdepth[d - 1][nodelist[i]->l%100].push_back(i);
- }
-
- for (int d = 0; d < 20; d++)
- {
- for (int l = 0; l < 100; l++)
- {
- //cout << "l: " << l << endl;
- if (ofdepth[d][l].size() > 0)
- for (vector<int>::iterator it = ofdepth[d][l].begin(); it != ofdepth[d][l].end() - 1; it++)
- {
- if (!nodelist[(*it)]->deleted)
- {
- for (vector<int>::iterator jt = it + 1; jt != ofdepth[d][l].end(); jt++)
- {
- if (!nodelist[(*jt)]->deleted)
- {
- // cout << "Comparing " << (*it) << " and " << (*jt) << endl;
- if (nodelist[(*it)]->equals(nodelist[(*jt)][0]))
- {
- //cout << "Hey! " << (*it) << " == " << (*jt) << endl;
- // ones[l].erase(jt);
- nodelist[(*jt)]->deleted = true;
- nodelist[(*jt)]->cloneof = nodelist[(*it)];
- }
- }
- }
- }
- }
- }
-
- for (unsigned int i = 0; i < nodelist.size(); i++)
- {
- nodelist[i]->oldpointer = nodelist[i]->pointer;
- nodelist[i]->pointer = 0;
- nodelist[i]->written = false;
- }
-
- }
-
- nodelist.clear();
- nodelist.push_back(&root);
- root.print("");
- UVcout << "nodelist.size(): " << nodelist.size() << endl;
-}
-
-int MiniDawgMaker::executeFromArguments()
-{
- GetOpt opts;
- QString alphabet;
- opts.addOption('a', "alphabet", &alphabet);
- if (!opts.parse())
- return 1;
-
- if (alphabet.isNull())
- alphabet = "english";
-
- Quackle::AlphabetParameters *alphas = 0;
- QString alphabetFile = QString("../data/alphabets/%1.quackle_alphabet").arg(alphabet);
- UVcout << "Using alphabet file: " << QuackleIO::Util::qstringToString(alphabetFile) << endl;
- QuackleIO::FlexibleAlphabetParameters *flexure = new QuackleIO::FlexibleAlphabetParameters;
- flexure->load(alphabetFile);
- alphas = flexure;
-
- root.t = false;
- root.insmallerdict = false;
- root.playability = 0;
- root.c = QUACKLE_BLANK_MARK;
- root.pointer = 0;
- root.lastchild = true;
-
- QString smallerDictFilename = "smaller.raw";
- QFile smallerDict(smallerDictFilename);
- if (!smallerDict.exists())
- {
- UVcout << "smaller dictionary does not exist: " << QuackleIO::Util::qstringToString(smallerDictFilename) << endl;
- return false;
- }
-
- if (!smallerDict.open(QIODevice::ReadOnly | QIODevice::Text))
- {
- UVcout << "Could not open " << QuackleIO::Util::qstringToString(smallerDictFilename) << endl;
- return false;
- }
-
- QTextStream smallerStream(&smallerDict);
- smallerStream.setCodec(QTextCodec::codecForName("UTF-8"));
-
- while (!smallerStream.atEnd())
- {
- QString originalQString;
- smallerStream >> originalQString;
- //UVcout << "this word is in the smaller dictionary: " << QuackleIO::Util::qstringToString(originalQString) << endl;
- smallerMap[originalQString] = true;
- }
-
- QString playabilityFilename = "playabilities.raw";
- QFile playability(playabilityFilename);
- if (!playability.exists())
- {
- UVcout << "playability does not exist: " << QuackleIO::Util::qstringToString(playabilityFilename) << endl;
- return false;
- }
-
- if (!playability.open(QIODevice::ReadOnly | QIODevice::Text))
- {
- UVcout << "Could not open " << QuackleIO::Util::qstringToString(playabilityFilename) << endl;
- return false;
- }
-
- QTextStream playabilityStream(&playability);
- playabilityStream.setCodec(QTextCodec::codecForName("UTF-8"));
-
- while (!playabilityStream.atEnd())
- {
- int pb;
- playabilityStream >> pb;
- QString originalQString;
- playabilityStream >> originalQString;
- //UVcout << "playability: " << QuackleIO::Util::qstringToString(originalQString) << " " << pb << endl;
- playabilityMap[originalQString] = pb;
- }
-
- QString dawgFilename = "dawginput.raw";
- QFile file(dawgFilename);
- if (!file.exists())
- {
- UVcout << "dawg does not exist: " << QuackleIO::Util::qstringToString(dawgFilename) << endl;
- return false;
- }
-
- if (!file.open(QIODevice::ReadOnly | QIODevice::Text))
- {
- UVcout << "Could not open " << QuackleIO::Util::qstringToString(dawgFilename) << endl;
- return false;
- }
-
- QTextStream stream(&file);
- stream.setCodec(QTextCodec::codecForName("UTF-8"));
-
- int encodableWords = 0;
- int unencodableWords = 0;
-
- while (!stream.atEnd())
- {
- QString originalQString;
- stream >> originalQString;
-
- bool inSmaller = smallerMap[originalQString];
- int pb = playabilityMap[originalQString];
-
- if (stream.atEnd())
- break;
-
- UVString originalString = QuackleIO::Util::qstringToString(originalQString);
-
- //UVcout << "read original string: " << originalString;
- //if (!inSmaller) UVcout << "#";
- //UVcout << endl;
-
- UVString leftover;
- Quackle::LetterString encodedWord = alphas->encode(originalString, &leftover);
- if (leftover.empty())
- {
- //for (Quackle::LetterString::iterator it = encodedWord.begin(); it != encodedWord.end(); ++it)
- //UVcout << "got encoded letter: " << (int)(*it) << endl;
-
- root.pushword(encodedWord, inSmaller, pb);
- ++encodableWords;
- }
- else
- {
- UVcout << "not encodable without leftover: " << originalString << endl;
- ++unencodableWords;
- }
- }
-
- file.close();
- delete alphas;
-
- UVcout << "encodable words: " << encodableWords << ", unencodable words: " << unencodableWords << endl;
-
- nodelist.push_back(&root);
- root.print("");
- UVcout << "nodelist.size(): " << nodelist.size() << endl;
-
- minimize();
-
- ofstream out("output.dawg", ios::out | ios::binary);
-
- for (unsigned int i = 0; i < nodelist.size(); i++) {
- //cout << nodelist[i]->c << " " << nodelist[i]->pointer << " " << nodelist[i]->t << " " << nodelist[i]->lastchild << endl;
- Node* n = nodelist[i];
- unsigned int p;
- if (nodelist[i]->deleted)
- {
- p = (unsigned int)(nodelist[i]->cloneof->pointer);
- // n = nodelist[i]->cloneof;
- }
- else
- p = (unsigned int)(nodelist[i]->pointer);
-
- char bytes[7];
- unsigned char n1 = (p & 0x00FF0000) >> 16;
- unsigned char n2 = (p & 0x0000FF00) >> 8;
- unsigned char n3 = (p & 0x000000FF);
- unsigned char n4 = n->c - QUACKLE_FIRST_LETTER;
-
- unsigned int pb = n->playability;
- unsigned char n5 = (pb & 0x00FF0000) >> 16;
- unsigned char n6 = (pb & 0x0000FF00) >> 8;
- unsigned char n7 = (pb & 0x000000FF);
-
- if (n->t) {
- n4 |= 32;
- }
- if (n->lastchild) {
- n4 |= 64;
- }
- if (n->insmallerdict) {
- n4 |= 128;
- }
-
- bytes[0] = n1; bytes[1] = n2; bytes[2] = n3; bytes[3] = n4;
- bytes[4] = n5; bytes[5] = n6; bytes[6] = n7;
- out.write(bytes, 7);
- }
- return 0;
-}
diff --git a/makeminidawg/minidawgmaker.h b/makeminidawg/minidawgmaker.h
deleted file mode 100644
index 7c8deee..0000000
--- a/makeminidawg/minidawgmaker.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Quackle -- Crossword game artificial intelligence and analysis tool
- * Copyright (C) 2005-2014 Jason Katz-Brown and John O'Laughlin.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef QUACKLE_MINIDAWGMAKER_H
-#define QUACKLE_MINIDAWGMAKER_H
-
-#include <QStringList>
-
-class MiniDawgMaker
-{
-public:
- MiniDawgMaker() {}
- ~MiniDawgMaker() {}
-
- // parse and execute commands specified on command line
- int executeFromArguments();
-
- // TOOD further classify this code
-};
-
-#endif
-
diff --git a/quackleio/dawgfactory.cpp b/quackleio/dawgfactory.cpp
new file mode 100644
index 0000000..8a37766
--- /dev/null
+++ b/quackleio/dawgfactory.cpp
@@ -0,0 +1,282 @@
+/*
+ * Quackle -- Crossword game artificial intelligence and analysis tool
+ * Copyright (C) 2005-2014 Jason Katz-Brown and John O'Laughlin.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+
+#include <iostream>
+#include <QtCore>
+
+#include "dawgfactory.h"
+#include "util.h"
+
+
+DawgFactory::DawgFactory(const QString& alphabetFile)
+{
+ QuackleIO::FlexibleAlphabetParameters *flexure = new QuackleIO::FlexibleAlphabetParameters;
+ flexure->load(alphabetFile);
+ alphas = flexure;
+
+ root.t = false;
+ root.insmallerdict = false;
+ root.playability = 0;
+ root.c = QUACKLE_BLANK_MARK;
+ root.pointer = 0;
+ root.lastchild = true;
+}
+
+DawgFactory::~DawgFactory()
+{
+ delete alphas;
+}
+
+bool DawgFactory::pushWord(const QString& word, bool inSmaller, int playability)
+{
+ UVString originalString = QuackleIO::Util::qstringToString(word);
+
+ UVString leftover;
+ Quackle::LetterString encodedWord = alphas->encode(originalString, &leftover);
+ if (leftover.empty())
+ {
+ ++m_encodableWords;
+ root.pushWord(encodedWord, inSmaller, playability);
+ return true;
+ }
+
+ ++m_unencodableWords;
+ return false;
+}
+
+void DawgFactory::generate()
+{
+ const int bucketcount = 2000;
+ vector< int > bucket[bucketcount];
+
+ nodelist.clear();
+ nodelist.push_back(&root);
+ root.print(nodelist);
+
+ nodelist[0]->letterSum();
+
+ for (unsigned int i = 0; i < nodelist.size(); i++)
+ {
+ bucket[nodelist[i]->sum % bucketcount].push_back(i);
+ nodelist[i]->pointer = 0;
+ nodelist[i]->written = false;
+ nodelist[i]->deleted = false;
+ nodelist[i]->cloneof = NULL;
+ }
+
+ for (int b = 0; b < bucketcount; b++)
+ {
+ if (bucket[b].size() == 0)
+ continue;
+ for (vector<int>::iterator it = bucket[b].begin(); it != bucket[b].end() - 1; it++)
+ {
+ if (!nodelist[(*it)]->deleted)
+ {
+ for (vector<int>::iterator jt = it + 1; jt != bucket[b].end(); jt++)
+ {
+ if (!nodelist[(*jt)]->deleted)
+ {
+ // cout << "Comparing " << (*it) << " and " << (*jt) << endl;
+ if (nodelist[(*it)]->equals(nodelist[(*jt)][0]))
+ {
+ //cout << "Hey! " << (*it) << " == " << (*jt) << endl;
+ // ones[l].erase(jt);
+ nodelist[(*jt)]->deleted = true;
+ nodelist[(*jt)]->cloneof = nodelist[(*it)];
+ }
+ }
+ }
+ }
+ }
+ }
+
+ nodelist.clear();
+ nodelist.push_back(&root);
+ root.print(nodelist);
+}
+
+void DawgFactory::writeIndex(const QString& filename)
+{
+ ofstream out(QuackleIO::Util::qstringToStdString(filename).c_str(), ios::out | ios::binary);
+
+ for (unsigned int i = 0; i < nodelist.size(); i++) {
+ //cout << nodelist[i]->c << " " << nodelist[i]->pointer << " " << nodelist[i]->t << " " << nodelist[i]->lastchild << endl;
+ Node* n = nodelist[i];
+ unsigned int p;
+ if (nodelist[i]->deleted)
+ {
+ p = (unsigned int)(nodelist[i]->cloneof->pointer);
+ // n = nodelist[i]->cloneof;
+ }
+ else
+ p = (unsigned int)(nodelist[i]->pointer);
+
+ char bytes[7];
+ unsigned char n1 = (p & 0x00FF0000) >> 16;
+ unsigned char n2 = (p & 0x0000FF00) >> 8;
+ unsigned char n3 = (p & 0x000000FF);
+ unsigned char n4 = n->c - QUACKLE_FIRST_LETTER;
+
+ unsigned int pb = n->playability;
+ unsigned char n5 = (pb & 0x00FF0000) >> 16;
+ unsigned char n6 = (pb & 0x0000FF00) >> 8;
+ unsigned char n7 = (pb & 0x000000FF);
+
+ if (n->t) {
+ n4 |= 32;
+ }
+ if (n->lastchild) {
+ n4 |= 64;
+ }
+ if (n->insmallerdict) {
+ n4 |= 128;
+ }
+
+ bytes[0] = n1; bytes[1] = n2; bytes[2] = n3; bytes[3] = n4;
+ bytes[4] = n5; bytes[5] = n6; bytes[6] = n7;
+ out.write(bytes, 7);
+ }
+}
+
+
+
+void DawgFactory::Node::print(vector< Node* >& nodelist)
+{
+ written = true;
+
+ if (children.size() == 0)
+ return;
+
+ if (!deleted)
+ {
+ //cout << " Setting pointer to " << nodelist.size() << " before I push_back the children." << endl;
+ pointer = nodelist.size();
+ }
+ else
+ {
+ pointer = cloneof->pointer;
+ //cout << " Setting pointer to clone's (" << pointer << ") and not pushing anything." << endl;
+ }
+
+ if (!deleted)
+ {
+ for (unsigned int i = 0; i < children.size(); i++) {
+ nodelist.push_back(&children[i]);
+ }
+
+ for (unsigned int i = 0; i < children.size(); i++) {
+ if (!children[i].deleted)
+ children[i].print(nodelist);
+ else if (!children[i].cloneof->written)
+ children[i].cloneof->print(nodelist);
+ }
+ }
+
+ if (children.size() > 0)
+ children[children.size() - 1].lastchild = true;
+}
+
+
+void DawgFactory::Node::pushWord(const Quackle::LetterString& word, bool inSmaller, int pb)
+{
+ if (word.length() == 0) {
+ t = true;
+ playability = pb;
+ insmallerdict = inSmaller;
+ }
+ else {
+ char first = word[0];
+ Quackle::LetterString rest = word.substr(1, word.length() - 1);
+ int index = -1;
+
+ // cout << "first: " << first << ", rest: " << rest << endl;
+
+ for (unsigned int i = 0; i < children.size(); i++) {
+ if (children[i].c == first) {
+ index = i;
+ break;
+ }
+ }
+
+ if (index == -1) {
+ Node n;
+ n.c = first;
+ n.t = false;
+ n.playability = 0;
+ n.insmallerdict = false;
+ n.pointer = 0;
+ n.lastchild = false;
+ children.push_back(n);
+ index = children.size() - 1;
+ }
+
+ children[index].pushWord(rest, inSmaller, pb);
+ }
+
+ sumexplored = false;
+ deleted = false;
+ written = false;
+}
+
+
+bool DawgFactory::Node::equals(const Node &n) const
+{
+ if (playability != n.playability)
+ return false;
+ if (c != n.c)
+ return false;
+ if (children.size() != n.children.size())
+ return false;
+ if (t != n.t)
+ return false;
+ if (insmallerdict != n.insmallerdict)
+ return false;
+ if (sum != n.sum)
+ return false;
+
+ for (unsigned int i = 0; i < children.size(); i++)
+ if (!children[i].equals(n.children[i]))
+ return false;
+
+ return true;
+}
+
+int DawgFactory::Node::wordCount() const
+{
+ int wordCount = (t ? 0 : 1);
+ for (size_t i = 0; i < children.size(); i++)
+ wordCount += children[i].wordCount();
+ return wordCount;
+}
+
+int DawgFactory::Node::letterSum() const
+{
+ if (sumexplored)
+ return sum;
+
+ sumexplored = true;
+
+ // djb2 checksum
+ sum = 5381 * 33 + (int) c;
+
+ for (unsigned int i = 0; i < children.size(); i++)
+ sum = (sum << 5) + sum + children[i].letterSum();
+
+ return sum;
+}
diff --git a/quackleio/dawgfactory.h b/quackleio/dawgfactory.h
new file mode 100644
index 0000000..b2cfb76
--- /dev/null
+++ b/quackleio/dawgfactory.h
@@ -0,0 +1,79 @@
+/*
+ * Quackle -- Crossword game artificial intelligence and analysis tool
+ * Copyright (C) 2005-2014 Jason Katz-Brown and John O'Laughlin.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef QUACKLE_DAWGFACTORY_H
+#define QUACKLE_DAWGFACTORY_H
+
+#include <vector>
+#include "flexiblealphabet.h"
+
+
+class DawgFactory {
+public:
+
+ DawgFactory(const QString& alphabetFile);
+ ~DawgFactory();
+
+ int wordCount() const { return root.wordCount(); };
+ int nodeCount() const { return nodelist.size(); };
+ int encodableWords() const { return m_encodableWords; };
+ int unencodableWords() const { return m_unencodableWords; };
+
+ bool pushWord(const QString& word, bool inSmaller, int playability);
+ void generate();
+ void writeIndex(const QString& fname);
+
+private:
+ class Node {
+ public:
+ void pushWord(const Quackle::LetterString& word, bool inSmaller, int pb);
+ void print(vector< Node* >& nodelist);
+
+ int letterSum() const;
+ int wordCount() const;
+ bool equals(const Node &n) const;
+
+ Quackle::Letter c;
+ bool t;
+ bool insmallerdict;
+ int playability;
+
+ vector<Node> children;
+ int pointer;
+ int location;
+
+ bool lastchild;
+
+ mutable bool sumexplored;
+ mutable int sum;
+
+ bool deleted;
+ Node* cloneof;
+ bool written;
+ };
+
+ int m_encodableWords;
+ int m_unencodableWords;
+ vector< Node* > nodelist;
+ Quackle::AlphabetParameters *alphas;
+ Node root;
+};
+
+#endif
+
+