summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--makegaddag/makegaddag.cpp199
-rw-r--r--quackleio/gaddagfactory.cpp166
-rw-r--r--quackleio/gaddagfactory.h59
3 files changed, 237 insertions, 187 deletions
diff --git a/makegaddag/makegaddag.cpp b/makegaddag/makegaddag.cpp
index 8e2ffac..b8fe276 100644
--- a/makegaddag/makegaddag.cpp
+++ b/makegaddag/makegaddag.cpp
@@ -29,79 +29,12 @@
#include <QtCore>
-#include <gaddag.h>
-#include <quackleio/flexiblealphabet.h>
-#include <quackleio/froggetopt.h>
-#include <quackleio/util.h>
+#include "quackleio/froggetopt.h"
+#include "quackleio/gaddagfactory.h"
+#include "quackleio/util.h"
using namespace std;
-class Node {
- public:
- Quackle::Letter c;
- bool t;
- vector<Node> children;
- int pointer;
- bool lastchild;
- void pushword(Quackle::LetterString word);
- void print(Quackle::LetterString prefix);
-};
-
-vector< Node* > nodelist;
-
-void Node::print(Quackle::LetterString prefix) {
- if (t) {
- //UVcout << QUACKLE_ALPHABET_PARAMETERS->userVisible(prefix)) << endl;
- }
-
- // UVcout << "prefix: " << QUACKLE_ALPHABET_PARAMETERS->userVisible(prefix) << ", children: " << children.size() << endl;
-
- if (children.size() > 0) {
- pointer = nodelist.size();
- children[children.size() - 1].lastchild = true;
- }
-
- for (size_t i = 0; i < children.size(); i++) {
- nodelist.push_back(&children[i]);
- }
-
- for (size_t i = 0; i < children.size(); i++) {
- children[i].print(prefix + children[i].c);
- }
-}
-
-
-void Node::pushword(Quackle::LetterString word) {
- if (word.length() == 0) {
- t = true;
- }
- else {
- Quackle::Letter first = Quackle::String::front(word);
- Quackle::LetterString rest = Quackle::String::allButFront(word);
- int index = -1;
-
- // cout << "first: " << first << ", rest: " << rest << endl;
-
- for (size_t i = 0; i < children.size(); i++) {
- if (children[i].c == first) {
- index = i;
- i = children.size();
- }
- }
-
- if (index == -1) {
- Node n;
- n.c = first;
- n.t = false;
- n.pointer = 0;
- n.lastchild = false;
- children.push_back(n);
- index = children.size() - 1;
- }
-
- children[index].pushword(rest);
- }
-}
int main(int argc, char **argv)
@@ -127,21 +60,9 @@ int main(int argc, char **argv)
if (outputFilename.isNull())
outputFilename = "output.gaddag";
- Quackle::AlphabetParameters *alphas = 0;
QString alphabetFile = QString("../data/alphabets/%1.quackle_alphabet").arg(alphabet);
UVcout << "Using alphabet file: " << QuackleIO::Util::qstringToString(alphabetFile) << endl;
- QuackleIO::FlexibleAlphabetParameters *flexure = new QuackleIO::FlexibleAlphabetParameters;
- flexure->load(alphabetFile);
- alphas = flexure;
-
- // So the separator is sorted to last.
- Quackle::Letter internalSeparatorRepresentation = QUACKLE_FIRST_LETTER + QUACKLE_MAXIMUM_ALPHABET_SIZE;
-
- Node root;
- root.t = false;
- root.c = QUACKLE_NULL_MARK; // "_"
- root.pointer = 0;
- root.lastchild = true;
+ GaddagFactory factory(alphabetFile);
QFile file(inputFilename);
if (!file.exists())
@@ -159,11 +80,6 @@ int main(int argc, char **argv)
QTextStream stream(&file);
stream.setCodec(QTextCodec::codecForName("UTF-8"));
- int encodableWords = 0;
- int unencodableWords = 0;
-
- Quackle::WordList gaddagizedWords;
-
while (!stream.atEnd())
{
QString originalQString;
@@ -172,115 +88,24 @@ int main(int argc, char **argv)
if (stream.atEnd())
break;
- UVString originalString = QuackleIO::Util::qstringToString(originalQString);
-
- UVString leftover;
- Quackle::LetterString encodedWord = alphas->encode(originalString, &leftover);
- if (leftover.empty())
- {
- //for (Quackle::LetterString::iterator it = encodedWord.begin(); it != encodedWord.end(); ++it)
- //UVcout << "got encoded letter: " << (int)(*it) << endl;
-
- ++encodableWords;
-
- for (unsigned i = 1; i <= encodedWord.length(); i++) {
- Quackle::LetterString newword;
-
- for (int j = i - 1; j >= 0; j--) {
- newword.push_back(encodedWord[j]);
- }
-
- if (i < encodedWord.length()) {
- newword.push_back(internalSeparatorRepresentation); // "^"
- for (unsigned j = i; j < encodedWord.length(); j++) {
- newword.push_back(encodedWord[j]);
- }
- }
- gaddagizedWords.push_back(newword);
- }
- }
- else
- {
- UVcout << "not encodable without leftover: " << originalString << endl;
- ++unencodableWords;
- }
+ factory.pushWord(originalQString);
}
- UVcout << "Sorting " << gaddagizedWords.size () << " words..." << endl;
- sort(gaddagizedWords.begin(), gaddagizedWords.end());
+ UVcout << "Sorting " << factory.wordCount() << " words..." << endl;
+ factory.sortWords();
UVcout << "Generating nodes...";
- Quackle::WordList::const_iterator wordsEnd = gaddagizedWords.end();
- for (Quackle::WordList::const_iterator wordsIt = gaddagizedWords.begin(); wordsIt != wordsEnd; ++wordsIt)
- {
- root.pushword(*wordsIt);
- }
+ factory.generate();
UVcout << "Writing index...";
-
- nodelist.push_back(&root);
-
- root.print("");
-
- ofstream out(QuackleIO::Util::qstringToStdString(outputFilename).c_str(), ios::out | ios::binary);
-
- for (size_t i = 0; i < nodelist.size(); i++) {
- // UVcout << nodelist[i]->c << " " << nodelist[i]->pointer << " " << nodelist[i]->t << " " << nodelist[i]->lastchild << endl;
-
- unsigned int p = (unsigned int)(nodelist[i]->pointer);
- if (p != 0) {
- p -= i; // offset indexing
- }
-
- char bytes[4];
- unsigned char n1 = (p & 0x00FF0000) >> 16;
- /*
- UVcout << "byte 1: " << ((p & 0xFF000000) >> 24);
- UVcout << ", byte 2: " << ((p & 0x00FF0000) >> 8);
- UVcout << ", byte 3: " << ((p & 0x0000FF00) >> 8);
- UVcout << ", byte 4: " << ((p & 0x000000FF) >> 0) << endl;
- */
-
- unsigned char n2 = (p & 0x0000FF00) >> 8;
- unsigned char n3 = (p & 0x000000FF) >> 0;
- unsigned char n4;
-
- /*
- UVcout << "p: " << p << ", crap: " << (((unsigned int)(n1) << 24) |
- ((unsigned int)(n2) << 16) |
- ((unsigned int)(n3) << 8)) << endl;
- */
- n4 = nodelist[i]->c;
- if (n4 == internalSeparatorRepresentation)
- n4 = QUACKLE_GADDAG_SEPARATOR;
-
- if (nodelist[i]->t) {
- n4 |= 64;
- }
- if (nodelist[i]->lastchild) {
- n4 |= 128;
- }
-
- /*
- UVcout << "p: " << p << endl;;
- UVcout << "n4:" << (int)(n4) <<
- ", n1: " << (int)(n1) <<
- ", n2: " << (int)(n2) <<
- ", n3: " << (int)(n3) << endl;
- */
-
- //bytes[0] = n4; bytes[1] = n1; bytes[2] = n2; bytes[3] = n3;
- bytes[0] = n1; bytes[1] = n2; bytes[2] = n3; bytes[3] = n4;
- //out.write((const char*) &p, 4);
- out.write(bytes, 4);
- }
+ factory.writeIndex(outputFilename);
UVcout << endl;
- UVcout << "Wrote " << encodableWords << " words over " << nodelist.size() << " nodes to " << QuackleIO::Util::qstringToString(outputFilename) << "." << endl;
+ UVcout << "Wrote " << factory.encodableWords() << " words over " << factory.nodeCount() << " nodes to " << QuackleIO::Util::qstringToString(outputFilename) << "." << endl;
- if (unencodableWords > 0)
- UVcout << "There were " << unencodableWords << " words left out." << endl;
+ if (factory.unencodableWords() > 0)
+ UVcout << "There were " << factory.unencodableWords() << " words left out." << endl;
return 0;
}
diff --git a/quackleio/gaddagfactory.cpp b/quackleio/gaddagfactory.cpp
new file mode 100644
index 0000000..3a608f0
--- /dev/null
+++ b/quackleio/gaddagfactory.cpp
@@ -0,0 +1,166 @@
+/*
+ * Quackle -- Crossword game artificial intelligence and analysis tool
+ * Copyright (C) 2005-2014 Jason Katz-Brown and John O'Laughlin.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+
+#include <iostream>
+#include <QtCore>
+
+#include "gaddagfactory.h"
+#include "util.h"
+
+GaddagFactory::GaddagFactory(const QString& alphabetFile)
+{
+ QuackleIO::FlexibleAlphabetParameters *flexure = new QuackleIO::FlexibleAlphabetParameters;
+ flexure->load(alphabetFile);
+ alphas = flexure;
+
+ // So the separator is sorted to last.
+ root.t = false;
+ root.c = QUACKLE_NULL_MARK; // "_"
+ root.pointer = 0;
+ root.lastchild = true;
+}
+
+void GaddagFactory::pushWord(const QString& word)
+{
+ UVString originalString = QuackleIO::Util::qstringToString(word);
+
+ UVString leftover;
+ Quackle::LetterString encodedWord = alphas->encode(originalString, &leftover);
+ if (leftover.empty())
+ {
+ ++m_encodableWords;
+
+ for (unsigned i = 1; i <= encodedWord.length(); i++)
+ {
+ Quackle::LetterString newword;
+
+ for (int j = i - 1; j >= 0; j--)
+ newword.push_back(encodedWord[j]);
+
+ if (i < encodedWord.length())
+ {
+ newword.push_back(internalSeparatorRepresentation); // "^"
+ for (unsigned j = i; j < encodedWord.length(); j++)
+ newword.push_back(encodedWord[j]);
+ }
+ gaddagizedWords.push_back(newword);
+ }
+ }
+ else
+ {
+ UVcout << "not encodable without leftover: " << originalString << endl;
+ ++m_unencodableWords;
+ }
+}
+
+void GaddagFactory::generate()
+{
+ Quackle::WordList::const_iterator wordsEnd = gaddagizedWords.end();
+ for (Quackle::WordList::const_iterator wordsIt = gaddagizedWords.begin(); wordsIt != wordsEnd; ++wordsIt)
+ root.pushWord(*wordsIt);
+ // for (const auto& words : gaddaggizedWords)
+ // root.pushWord(words);
+}
+
+void GaddagFactory::writeIndex(const QString& fname)
+{
+ nodelist.push_back(&root);
+
+ root.print(nodelist, "");
+
+ ofstream out(QuackleIO::Util::qstringToStdString(fname).c_str(), ios::out | ios::binary);
+
+ for (size_t i = 0; i < nodelist.size(); i++)
+ {
+ unsigned int p = (unsigned int)(nodelist[i]->pointer);
+ if (p != 0)
+ p -= i; // offset indexing
+
+ char bytes[4];
+ unsigned char n1 = (p & 0x00FF0000) >> 16;
+ unsigned char n2 = (p & 0x0000FF00) >> 8;
+ unsigned char n3 = (p & 0x000000FF) >> 0;
+ unsigned char n4;
+
+ n4 = nodelist[i]->c;
+ if (n4 == internalSeparatorRepresentation)
+ n4 = QUACKLE_NULL_MARK;
+
+ if (nodelist[i]->t)
+ n4 |= 64;
+
+ if (nodelist[i]->lastchild)
+ n4 |= 128;
+
+ bytes[0] = n1; bytes[1] = n2; bytes[2] = n3; bytes[3] = n4;
+ out.write(bytes, 4);
+ }
+}
+
+
+void GaddagFactory::Node::print(vector< Node* > nodelist, Quackle::LetterString prefix)
+{
+ if (children.size() > 0)
+ {
+ pointer = nodelist.size();
+ children[children.size() - 1].lastchild = true;
+ }
+
+ for (size_t i = 0; i < children.size(); i++)
+ nodelist.push_back(&children[i]);
+
+ for (size_t i = 0; i < children.size(); i++)
+ children[i].print(nodelist, prefix + children[i].c);
+}
+
+
+void GaddagFactory::Node::pushWord(Quackle::LetterString word)
+{
+ if (word.length() == 0)
+ {
+ t = true;
+ return;
+ }
+
+ Quackle::Letter first = Quackle::String::front(word);
+ Quackle::LetterString rest = Quackle::String::allButFront(word);
+ int index = -1;
+
+ for (size_t i = 0; i < children.size(); i++)
+ {
+ if (children[i].c == first)
+ {
+ index = i;
+ i = children.size();
+ }
+ }
+
+ if (index == -1)
+ {
+ Node n;
+ n.c = first;
+ n.t = false;
+ n.pointer = 0;
+ n.lastchild = false;
+ children.push_back(n);
+ index = children.size() - 1;
+ }
+
+ children[index].pushWord(rest);
+}
diff --git a/quackleio/gaddagfactory.h b/quackleio/gaddagfactory.h
new file mode 100644
index 0000000..ca3bc40
--- /dev/null
+++ b/quackleio/gaddagfactory.h
@@ -0,0 +1,59 @@
+/*
+ * Quackle -- Crossword game artificial intelligence and analysis tool
+ * Copyright (C) 2005-2014 Jason Katz-Brown and John O'Laughlin.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "flexiblealphabet.h"
+
+
+class GaddagFactory {
+public:
+
+ static const Quackle::Letter internalSeparatorRepresentation = QUACKLE_FIRST_LETTER + QUACKLE_MAXIMUM_ALPHABET_SIZE;
+
+ GaddagFactory(const QString& alphabetFile);
+
+ int wordCount() const { return gaddagizedWords.size(); };
+ int nodeCount() const { return nodelist.size(); };
+ int encodableWords() const { return m_encodableWords; };
+ int unencodableWords() const { return m_unencodableWords; };
+
+ void pushWord(const QString& word);
+ void sortWords() { sort(gaddagizedWords.begin(), gaddagizedWords.end()); };
+ void generate();
+ void writeIndex(const QString& fname);
+
+private:
+ class Node {
+ public:
+ Quackle::Letter c;
+ bool t;
+ vector<Node> children;
+ int pointer;
+ bool lastchild;
+ void pushWord(Quackle::LetterString word);
+ void print(vector< Node* > nodelist, Quackle::LetterString prefix);
+ };
+
+ int m_encodableWords;
+ int m_unencodableWords;
+ Quackle::WordList gaddagizedWords;
+ vector< Node* > nodelist;
+ Quackle::AlphabetParameters *alphas;
+ Node root;
+
+
+};