diff options
-rw-r--r-- | makegaddag/makegaddag.cpp | 199 | ||||
-rw-r--r-- | quackleio/gaddagfactory.cpp | 166 | ||||
-rw-r--r-- | quackleio/gaddagfactory.h | 59 |
3 files changed, 237 insertions, 187 deletions
diff --git a/makegaddag/makegaddag.cpp b/makegaddag/makegaddag.cpp index 8e2ffac..b8fe276 100644 --- a/makegaddag/makegaddag.cpp +++ b/makegaddag/makegaddag.cpp @@ -29,79 +29,12 @@ #include <QtCore> -#include <gaddag.h> -#include <quackleio/flexiblealphabet.h> -#include <quackleio/froggetopt.h> -#include <quackleio/util.h> +#include "quackleio/froggetopt.h" +#include "quackleio/gaddagfactory.h" +#include "quackleio/util.h" using namespace std; -class Node { - public: - Quackle::Letter c; - bool t; - vector<Node> children; - int pointer; - bool lastchild; - void pushword(Quackle::LetterString word); - void print(Quackle::LetterString prefix); -}; - -vector< Node* > nodelist; - -void Node::print(Quackle::LetterString prefix) { - if (t) { - //UVcout << QUACKLE_ALPHABET_PARAMETERS->userVisible(prefix)) << endl; - } - - // UVcout << "prefix: " << QUACKLE_ALPHABET_PARAMETERS->userVisible(prefix) << ", children: " << children.size() << endl; - - if (children.size() > 0) { - pointer = nodelist.size(); - children[children.size() - 1].lastchild = true; - } - - for (size_t i = 0; i < children.size(); i++) { - nodelist.push_back(&children[i]); - } - - for (size_t i = 0; i < children.size(); i++) { - children[i].print(prefix + children[i].c); - } -} - - -void Node::pushword(Quackle::LetterString word) { - if (word.length() == 0) { - t = true; - } - else { - Quackle::Letter first = Quackle::String::front(word); - Quackle::LetterString rest = Quackle::String::allButFront(word); - int index = -1; - - // cout << "first: " << first << ", rest: " << rest << endl; - - for (size_t i = 0; i < children.size(); i++) { - if (children[i].c == first) { - index = i; - i = children.size(); - } - } - - if (index == -1) { - Node n; - n.c = first; - n.t = false; - n.pointer = 0; - n.lastchild = false; - children.push_back(n); - index = children.size() - 1; - } - - children[index].pushword(rest); - } -} int main(int argc, char **argv) @@ -127,21 +60,9 @@ int main(int argc, char **argv) if (outputFilename.isNull()) outputFilename = "output.gaddag"; - Quackle::AlphabetParameters *alphas = 0; QString alphabetFile = QString("../data/alphabets/%1.quackle_alphabet").arg(alphabet); UVcout << "Using alphabet file: " << QuackleIO::Util::qstringToString(alphabetFile) << endl; - QuackleIO::FlexibleAlphabetParameters *flexure = new QuackleIO::FlexibleAlphabetParameters; - flexure->load(alphabetFile); - alphas = flexure; - - // So the separator is sorted to last. - Quackle::Letter internalSeparatorRepresentation = QUACKLE_FIRST_LETTER + QUACKLE_MAXIMUM_ALPHABET_SIZE; - - Node root; - root.t = false; - root.c = QUACKLE_NULL_MARK; // "_" - root.pointer = 0; - root.lastchild = true; + GaddagFactory factory(alphabetFile); QFile file(inputFilename); if (!file.exists()) @@ -159,11 +80,6 @@ int main(int argc, char **argv) QTextStream stream(&file); stream.setCodec(QTextCodec::codecForName("UTF-8")); - int encodableWords = 0; - int unencodableWords = 0; - - Quackle::WordList gaddagizedWords; - while (!stream.atEnd()) { QString originalQString; @@ -172,115 +88,24 @@ int main(int argc, char **argv) if (stream.atEnd()) break; - UVString originalString = QuackleIO::Util::qstringToString(originalQString); - - UVString leftover; - Quackle::LetterString encodedWord = alphas->encode(originalString, &leftover); - if (leftover.empty()) - { - //for (Quackle::LetterString::iterator it = encodedWord.begin(); it != encodedWord.end(); ++it) - //UVcout << "got encoded letter: " << (int)(*it) << endl; - - ++encodableWords; - - for (unsigned i = 1; i <= encodedWord.length(); i++) { - Quackle::LetterString newword; - - for (int j = i - 1; j >= 0; j--) { - newword.push_back(encodedWord[j]); - } - - if (i < encodedWord.length()) { - newword.push_back(internalSeparatorRepresentation); // "^" - for (unsigned j = i; j < encodedWord.length(); j++) { - newword.push_back(encodedWord[j]); - } - } - gaddagizedWords.push_back(newword); - } - } - else - { - UVcout << "not encodable without leftover: " << originalString << endl; - ++unencodableWords; - } + factory.pushWord(originalQString); } - UVcout << "Sorting " << gaddagizedWords.size () << " words..." << endl; - sort(gaddagizedWords.begin(), gaddagizedWords.end()); + UVcout << "Sorting " << factory.wordCount() << " words..." << endl; + factory.sortWords(); UVcout << "Generating nodes..."; - Quackle::WordList::const_iterator wordsEnd = gaddagizedWords.end(); - for (Quackle::WordList::const_iterator wordsIt = gaddagizedWords.begin(); wordsIt != wordsEnd; ++wordsIt) - { - root.pushword(*wordsIt); - } + factory.generate(); UVcout << "Writing index..."; - - nodelist.push_back(&root); - - root.print(""); - - ofstream out(QuackleIO::Util::qstringToStdString(outputFilename).c_str(), ios::out | ios::binary); - - for (size_t i = 0; i < nodelist.size(); i++) { - // UVcout << nodelist[i]->c << " " << nodelist[i]->pointer << " " << nodelist[i]->t << " " << nodelist[i]->lastchild << endl; - - unsigned int p = (unsigned int)(nodelist[i]->pointer); - if (p != 0) { - p -= i; // offset indexing - } - - char bytes[4]; - unsigned char n1 = (p & 0x00FF0000) >> 16; - /* - UVcout << "byte 1: " << ((p & 0xFF000000) >> 24); - UVcout << ", byte 2: " << ((p & 0x00FF0000) >> 8); - UVcout << ", byte 3: " << ((p & 0x0000FF00) >> 8); - UVcout << ", byte 4: " << ((p & 0x000000FF) >> 0) << endl; - */ - - unsigned char n2 = (p & 0x0000FF00) >> 8; - unsigned char n3 = (p & 0x000000FF) >> 0; - unsigned char n4; - - /* - UVcout << "p: " << p << ", crap: " << (((unsigned int)(n1) << 24) | - ((unsigned int)(n2) << 16) | - ((unsigned int)(n3) << 8)) << endl; - */ - n4 = nodelist[i]->c; - if (n4 == internalSeparatorRepresentation) - n4 = QUACKLE_GADDAG_SEPARATOR; - - if (nodelist[i]->t) { - n4 |= 64; - } - if (nodelist[i]->lastchild) { - n4 |= 128; - } - - /* - UVcout << "p: " << p << endl;; - UVcout << "n4:" << (int)(n4) << - ", n1: " << (int)(n1) << - ", n2: " << (int)(n2) << - ", n3: " << (int)(n3) << endl; - */ - - //bytes[0] = n4; bytes[1] = n1; bytes[2] = n2; bytes[3] = n3; - bytes[0] = n1; bytes[1] = n2; bytes[2] = n3; bytes[3] = n4; - //out.write((const char*) &p, 4); - out.write(bytes, 4); - } + factory.writeIndex(outputFilename); UVcout << endl; - UVcout << "Wrote " << encodableWords << " words over " << nodelist.size() << " nodes to " << QuackleIO::Util::qstringToString(outputFilename) << "." << endl; + UVcout << "Wrote " << factory.encodableWords() << " words over " << factory.nodeCount() << " nodes to " << QuackleIO::Util::qstringToString(outputFilename) << "." << endl; - if (unencodableWords > 0) - UVcout << "There were " << unencodableWords << " words left out." << endl; + if (factory.unencodableWords() > 0) + UVcout << "There were " << factory.unencodableWords() << " words left out." << endl; return 0; } diff --git a/quackleio/gaddagfactory.cpp b/quackleio/gaddagfactory.cpp new file mode 100644 index 0000000..3a608f0 --- /dev/null +++ b/quackleio/gaddagfactory.cpp @@ -0,0 +1,166 @@ +/* + * Quackle -- Crossword game artificial intelligence and analysis tool + * Copyright (C) 2005-2014 Jason Katz-Brown and John O'Laughlin. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + + +#include <iostream> +#include <QtCore> + +#include "gaddagfactory.h" +#include "util.h" + +GaddagFactory::GaddagFactory(const QString& alphabetFile) +{ + QuackleIO::FlexibleAlphabetParameters *flexure = new QuackleIO::FlexibleAlphabetParameters; + flexure->load(alphabetFile); + alphas = flexure; + + // So the separator is sorted to last. + root.t = false; + root.c = QUACKLE_NULL_MARK; // "_" + root.pointer = 0; + root.lastchild = true; +} + +void GaddagFactory::pushWord(const QString& word) +{ + UVString originalString = QuackleIO::Util::qstringToString(word); + + UVString leftover; + Quackle::LetterString encodedWord = alphas->encode(originalString, &leftover); + if (leftover.empty()) + { + ++m_encodableWords; + + for (unsigned i = 1; i <= encodedWord.length(); i++) + { + Quackle::LetterString newword; + + for (int j = i - 1; j >= 0; j--) + newword.push_back(encodedWord[j]); + + if (i < encodedWord.length()) + { + newword.push_back(internalSeparatorRepresentation); // "^" + for (unsigned j = i; j < encodedWord.length(); j++) + newword.push_back(encodedWord[j]); + } + gaddagizedWords.push_back(newword); + } + } + else + { + UVcout << "not encodable without leftover: " << originalString << endl; + ++m_unencodableWords; + } +} + +void GaddagFactory::generate() +{ + Quackle::WordList::const_iterator wordsEnd = gaddagizedWords.end(); + for (Quackle::WordList::const_iterator wordsIt = gaddagizedWords.begin(); wordsIt != wordsEnd; ++wordsIt) + root.pushWord(*wordsIt); + // for (const auto& words : gaddaggizedWords) + // root.pushWord(words); +} + +void GaddagFactory::writeIndex(const QString& fname) +{ + nodelist.push_back(&root); + + root.print(nodelist, ""); + + ofstream out(QuackleIO::Util::qstringToStdString(fname).c_str(), ios::out | ios::binary); + + for (size_t i = 0; i < nodelist.size(); i++) + { + unsigned int p = (unsigned int)(nodelist[i]->pointer); + if (p != 0) + p -= i; // offset indexing + + char bytes[4]; + unsigned char n1 = (p & 0x00FF0000) >> 16; + unsigned char n2 = (p & 0x0000FF00) >> 8; + unsigned char n3 = (p & 0x000000FF) >> 0; + unsigned char n4; + + n4 = nodelist[i]->c; + if (n4 == internalSeparatorRepresentation) + n4 = QUACKLE_NULL_MARK; + + if (nodelist[i]->t) + n4 |= 64; + + if (nodelist[i]->lastchild) + n4 |= 128; + + bytes[0] = n1; bytes[1] = n2; bytes[2] = n3; bytes[3] = n4; + out.write(bytes, 4); + } +} + + +void GaddagFactory::Node::print(vector< Node* > nodelist, Quackle::LetterString prefix) +{ + if (children.size() > 0) + { + pointer = nodelist.size(); + children[children.size() - 1].lastchild = true; + } + + for (size_t i = 0; i < children.size(); i++) + nodelist.push_back(&children[i]); + + for (size_t i = 0; i < children.size(); i++) + children[i].print(nodelist, prefix + children[i].c); +} + + +void GaddagFactory::Node::pushWord(Quackle::LetterString word) +{ + if (word.length() == 0) + { + t = true; + return; + } + + Quackle::Letter first = Quackle::String::front(word); + Quackle::LetterString rest = Quackle::String::allButFront(word); + int index = -1; + + for (size_t i = 0; i < children.size(); i++) + { + if (children[i].c == first) + { + index = i; + i = children.size(); + } + } + + if (index == -1) + { + Node n; + n.c = first; + n.t = false; + n.pointer = 0; + n.lastchild = false; + children.push_back(n); + index = children.size() - 1; + } + + children[index].pushWord(rest); +} diff --git a/quackleio/gaddagfactory.h b/quackleio/gaddagfactory.h new file mode 100644 index 0000000..ca3bc40 --- /dev/null +++ b/quackleio/gaddagfactory.h @@ -0,0 +1,59 @@ +/* + * Quackle -- Crossword game artificial intelligence and analysis tool + * Copyright (C) 2005-2014 Jason Katz-Brown and John O'Laughlin. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "flexiblealphabet.h" + + +class GaddagFactory { +public: + + static const Quackle::Letter internalSeparatorRepresentation = QUACKLE_FIRST_LETTER + QUACKLE_MAXIMUM_ALPHABET_SIZE; + + GaddagFactory(const QString& alphabetFile); + + int wordCount() const { return gaddagizedWords.size(); }; + int nodeCount() const { return nodelist.size(); }; + int encodableWords() const { return m_encodableWords; }; + int unencodableWords() const { return m_unencodableWords; }; + + void pushWord(const QString& word); + void sortWords() { sort(gaddagizedWords.begin(), gaddagizedWords.end()); }; + void generate(); + void writeIndex(const QString& fname); + +private: + class Node { + public: + Quackle::Letter c; + bool t; + vector<Node> children; + int pointer; + bool lastchild; + void pushWord(Quackle::LetterString word); + void print(vector< Node* > nodelist, Quackle::LetterString prefix); + }; + + int m_encodableWords; + int m_unencodableWords; + Quackle::WordList gaddagizedWords; + vector< Node* > nodelist; + Quackle::AlphabetParameters *alphas; + Node root; + + +}; |