summaryrefslogtreecommitdiff
path: root/makegaddag/makegaddag.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'makegaddag/makegaddag.cpp')
-rw-r--r--makegaddag/makegaddag.cpp202
1 files changed, 15 insertions, 187 deletions
diff --git a/makegaddag/makegaddag.cpp b/makegaddag/makegaddag.cpp
index 8e2ffac..ef2c439 100644
--- a/makegaddag/makegaddag.cpp
+++ b/makegaddag/makegaddag.cpp
@@ -29,79 +29,12 @@
#include <QtCore>
-#include <gaddag.h>
-#include <quackleio/flexiblealphabet.h>
-#include <quackleio/froggetopt.h>
-#include <quackleio/util.h>
+#include "quackleio/froggetopt.h"
+#include "quackleio/gaddagfactory.h"
+#include "quackleio/util.h"
using namespace std;
-class Node {
- public:
- Quackle::Letter c;
- bool t;
- vector<Node> children;
- int pointer;
- bool lastchild;
- void pushword(Quackle::LetterString word);
- void print(Quackle::LetterString prefix);
-};
-
-vector< Node* > nodelist;
-
-void Node::print(Quackle::LetterString prefix) {
- if (t) {
- //UVcout << QUACKLE_ALPHABET_PARAMETERS->userVisible(prefix)) << endl;
- }
-
- // UVcout << "prefix: " << QUACKLE_ALPHABET_PARAMETERS->userVisible(prefix) << ", children: " << children.size() << endl;
-
- if (children.size() > 0) {
- pointer = nodelist.size();
- children[children.size() - 1].lastchild = true;
- }
-
- for (size_t i = 0; i < children.size(); i++) {
- nodelist.push_back(&children[i]);
- }
-
- for (size_t i = 0; i < children.size(); i++) {
- children[i].print(prefix + children[i].c);
- }
-}
-
-
-void Node::pushword(Quackle::LetterString word) {
- if (word.length() == 0) {
- t = true;
- }
- else {
- Quackle::Letter first = Quackle::String::front(word);
- Quackle::LetterString rest = Quackle::String::allButFront(word);
- int index = -1;
-
- // cout << "first: " << first << ", rest: " << rest << endl;
-
- for (size_t i = 0; i < children.size(); i++) {
- if (children[i].c == first) {
- index = i;
- i = children.size();
- }
- }
-
- if (index == -1) {
- Node n;
- n.c = first;
- n.t = false;
- n.pointer = 0;
- n.lastchild = false;
- children.push_back(n);
- index = children.size() - 1;
- }
-
- children[index].pushword(rest);
- }
-}
int main(int argc, char **argv)
@@ -127,21 +60,9 @@ int main(int argc, char **argv)
if (outputFilename.isNull())
outputFilename = "output.gaddag";
- Quackle::AlphabetParameters *alphas = 0;
QString alphabetFile = QString("../data/alphabets/%1.quackle_alphabet").arg(alphabet);
UVcout << "Using alphabet file: " << QuackleIO::Util::qstringToString(alphabetFile) << endl;
- QuackleIO::FlexibleAlphabetParameters *flexure = new QuackleIO::FlexibleAlphabetParameters;
- flexure->load(alphabetFile);
- alphas = flexure;
-
- // So the separator is sorted to last.
- Quackle::Letter internalSeparatorRepresentation = QUACKLE_FIRST_LETTER + QUACKLE_MAXIMUM_ALPHABET_SIZE;
-
- Node root;
- root.t = false;
- root.c = QUACKLE_NULL_MARK; // "_"
- root.pointer = 0;
- root.lastchild = true;
+ GaddagFactory factory(QuackleIO::Util::qstringToString(alphabetFile));
QFile file(inputFilename);
if (!file.exists())
@@ -159,11 +80,6 @@ int main(int argc, char **argv)
QTextStream stream(&file);
stream.setCodec(QTextCodec::codecForName("UTF-8"));
- int encodableWords = 0;
- int unencodableWords = 0;
-
- Quackle::WordList gaddagizedWords;
-
while (!stream.atEnd())
{
QString originalQString;
@@ -172,115 +88,27 @@ int main(int argc, char **argv)
if (stream.atEnd())
break;
- UVString originalString = QuackleIO::Util::qstringToString(originalQString);
-
- UVString leftover;
- Quackle::LetterString encodedWord = alphas->encode(originalString, &leftover);
- if (leftover.empty())
- {
- //for (Quackle::LetterString::iterator it = encodedWord.begin(); it != encodedWord.end(); ++it)
- //UVcout << "got encoded letter: " << (int)(*it) << endl;
-
- ++encodableWords;
-
- for (unsigned i = 1; i <= encodedWord.length(); i++) {
- Quackle::LetterString newword;
-
- for (int j = i - 1; j >= 0; j--) {
- newword.push_back(encodedWord[j]);
- }
-
- if (i < encodedWord.length()) {
- newword.push_back(internalSeparatorRepresentation); // "^"
- for (unsigned j = i; j < encodedWord.length(); j++) {
- newword.push_back(encodedWord[j]);
- }
- }
- gaddagizedWords.push_back(newword);
- }
- }
- else
- {
- UVcout << "not encodable without leftover: " << originalString << endl;
- ++unencodableWords;
- }
+ if (!factory.pushWord(QuackleIO::Util::qstringToString(originalQString)))
+ UVcout << "not encodable without leftover: " << QuackleIO::Util::qstringToString(originalQString) << endl;
}
- UVcout << "Sorting " << gaddagizedWords.size () << " words..." << endl;
- sort(gaddagizedWords.begin(), gaddagizedWords.end());
+ UVcout << "Sorting " << factory.wordCount() << " words..." << endl;
+ factory.sortWords();
UVcout << "Generating nodes...";
- Quackle::WordList::const_iterator wordsEnd = gaddagizedWords.end();
- for (Quackle::WordList::const_iterator wordsIt = gaddagizedWords.begin(); wordsIt != wordsEnd; ++wordsIt)
- {
- root.pushword(*wordsIt);
- }
+ factory.generate();
UVcout << "Writing index...";
-
- nodelist.push_back(&root);
-
- root.print("");
-
- ofstream out(QuackleIO::Util::qstringToStdString(outputFilename).c_str(), ios::out | ios::binary);
-
- for (size_t i = 0; i < nodelist.size(); i++) {
- // UVcout << nodelist[i]->c << " " << nodelist[i]->pointer << " " << nodelist[i]->t << " " << nodelist[i]->lastchild << endl;
-
- unsigned int p = (unsigned int)(nodelist[i]->pointer);
- if (p != 0) {
- p -= i; // offset indexing
- }
-
- char bytes[4];
- unsigned char n1 = (p & 0x00FF0000) >> 16;
- /*
- UVcout << "byte 1: " << ((p & 0xFF000000) >> 24);
- UVcout << ", byte 2: " << ((p & 0x00FF0000) >> 8);
- UVcout << ", byte 3: " << ((p & 0x0000FF00) >> 8);
- UVcout << ", byte 4: " << ((p & 0x000000FF) >> 0) << endl;
- */
-
- unsigned char n2 = (p & 0x0000FF00) >> 8;
- unsigned char n3 = (p & 0x000000FF) >> 0;
- unsigned char n4;
-
- /*
- UVcout << "p: " << p << ", crap: " << (((unsigned int)(n1) << 24) |
- ((unsigned int)(n2) << 16) |
- ((unsigned int)(n3) << 8)) << endl;
- */
- n4 = nodelist[i]->c;
- if (n4 == internalSeparatorRepresentation)
- n4 = QUACKLE_GADDAG_SEPARATOR;
-
- if (nodelist[i]->t) {
- n4 |= 64;
- }
- if (nodelist[i]->lastchild) {
- n4 |= 128;
- }
-
- /*
- UVcout << "p: " << p << endl;;
- UVcout << "n4:" << (int)(n4) <<
- ", n1: " << (int)(n1) <<
- ", n2: " << (int)(n2) <<
- ", n3: " << (int)(n3) << endl;
- */
-
- //bytes[0] = n4; bytes[1] = n1; bytes[2] = n2; bytes[3] = n3;
- bytes[0] = n1; bytes[1] = n2; bytes[2] = n3; bytes[3] = n4;
- //out.write((const char*) &p, 4);
- out.write(bytes, 4);
- }
+ factory.writeIndex(outputFilename.toUtf8().constData());
UVcout << endl;
- UVcout << "Wrote " << encodableWords << " words over " << nodelist.size() << " nodes to " << QuackleIO::Util::qstringToString(outputFilename) << "." << endl;
+ UVcout << "Wrote " << factory.encodableWords() << " words over " << factory.nodeCount() << " nodes to " << QuackleIO::Util::qstringToString(outputFilename) << "." << endl;
+
+ UVcout << "Hash: " << QString(QByteArray(factory.hashBytes(), 16).toHex()).toStdString() << endl;
- if (unencodableWords > 0)
- UVcout << "There were " << unencodableWords << " words left out." << endl;
+ if (factory.unencodableWords() > 0)
+ UVcout << "There were " << factory.unencodableWords() << " words left out." << endl;
return 0;
}