diff options
Diffstat (limited to 'makegaddag')
-rw-r--r-- | makegaddag/.gitignore | 2 | ||||
-rw-r--r-- | makegaddag/makegaddag.cpp | 202 | ||||
-rwxr-xr-x | makegaddag/makegaddag.pro | 10 |
3 files changed, 24 insertions, 190 deletions
diff --git a/makegaddag/.gitignore b/makegaddag/.gitignore index f878785..dbd3941 100644 --- a/makegaddag/.gitignore +++ b/makegaddag/.gitignore @@ -4,3 +4,5 @@ Makefile.Debug Makefile.Release debug release +makegaddag + diff --git a/makegaddag/makegaddag.cpp b/makegaddag/makegaddag.cpp index 8e2ffac..ef2c439 100644 --- a/makegaddag/makegaddag.cpp +++ b/makegaddag/makegaddag.cpp @@ -29,79 +29,12 @@ #include <QtCore> -#include <gaddag.h> -#include <quackleio/flexiblealphabet.h> -#include <quackleio/froggetopt.h> -#include <quackleio/util.h> +#include "quackleio/froggetopt.h" +#include "quackleio/gaddagfactory.h" +#include "quackleio/util.h" using namespace std; -class Node { - public: - Quackle::Letter c; - bool t; - vector<Node> children; - int pointer; - bool lastchild; - void pushword(Quackle::LetterString word); - void print(Quackle::LetterString prefix); -}; - -vector< Node* > nodelist; - -void Node::print(Quackle::LetterString prefix) { - if (t) { - //UVcout << QUACKLE_ALPHABET_PARAMETERS->userVisible(prefix)) << endl; - } - - // UVcout << "prefix: " << QUACKLE_ALPHABET_PARAMETERS->userVisible(prefix) << ", children: " << children.size() << endl; - - if (children.size() > 0) { - pointer = nodelist.size(); - children[children.size() - 1].lastchild = true; - } - - for (size_t i = 0; i < children.size(); i++) { - nodelist.push_back(&children[i]); - } - - for (size_t i = 0; i < children.size(); i++) { - children[i].print(prefix + children[i].c); - } -} - - -void Node::pushword(Quackle::LetterString word) { - if (word.length() == 0) { - t = true; - } - else { - Quackle::Letter first = Quackle::String::front(word); - Quackle::LetterString rest = Quackle::String::allButFront(word); - int index = -1; - - // cout << "first: " << first << ", rest: " << rest << endl; - - for (size_t i = 0; i < children.size(); i++) { - if (children[i].c == first) { - index = i; - i = children.size(); - } - } - - if (index == -1) { - Node n; - n.c = first; - n.t = false; - n.pointer = 0; - n.lastchild = false; - children.push_back(n); - index = children.size() - 1; - } - - children[index].pushword(rest); - } -} int main(int argc, char **argv) @@ -127,21 +60,9 @@ int main(int argc, char **argv) if (outputFilename.isNull()) outputFilename = "output.gaddag"; - Quackle::AlphabetParameters *alphas = 0; QString alphabetFile = QString("../data/alphabets/%1.quackle_alphabet").arg(alphabet); UVcout << "Using alphabet file: " << QuackleIO::Util::qstringToString(alphabetFile) << endl; - QuackleIO::FlexibleAlphabetParameters *flexure = new QuackleIO::FlexibleAlphabetParameters; - flexure->load(alphabetFile); - alphas = flexure; - - // So the separator is sorted to last. - Quackle::Letter internalSeparatorRepresentation = QUACKLE_FIRST_LETTER + QUACKLE_MAXIMUM_ALPHABET_SIZE; - - Node root; - root.t = false; - root.c = QUACKLE_NULL_MARK; // "_" - root.pointer = 0; - root.lastchild = true; + GaddagFactory factory(QuackleIO::Util::qstringToString(alphabetFile)); QFile file(inputFilename); if (!file.exists()) @@ -159,11 +80,6 @@ int main(int argc, char **argv) QTextStream stream(&file); stream.setCodec(QTextCodec::codecForName("UTF-8")); - int encodableWords = 0; - int unencodableWords = 0; - - Quackle::WordList gaddagizedWords; - while (!stream.atEnd()) { QString originalQString; @@ -172,115 +88,27 @@ int main(int argc, char **argv) if (stream.atEnd()) break; - UVString originalString = QuackleIO::Util::qstringToString(originalQString); - - UVString leftover; - Quackle::LetterString encodedWord = alphas->encode(originalString, &leftover); - if (leftover.empty()) - { - //for (Quackle::LetterString::iterator it = encodedWord.begin(); it != encodedWord.end(); ++it) - //UVcout << "got encoded letter: " << (int)(*it) << endl; - - ++encodableWords; - - for (unsigned i = 1; i <= encodedWord.length(); i++) { - Quackle::LetterString newword; - - for (int j = i - 1; j >= 0; j--) { - newword.push_back(encodedWord[j]); - } - - if (i < encodedWord.length()) { - newword.push_back(internalSeparatorRepresentation); // "^" - for (unsigned j = i; j < encodedWord.length(); j++) { - newword.push_back(encodedWord[j]); - } - } - gaddagizedWords.push_back(newword); - } - } - else - { - UVcout << "not encodable without leftover: " << originalString << endl; - ++unencodableWords; - } + if (!factory.pushWord(QuackleIO::Util::qstringToString(originalQString))) + UVcout << "not encodable without leftover: " << QuackleIO::Util::qstringToString(originalQString) << endl; } - UVcout << "Sorting " << gaddagizedWords.size () << " words..." << endl; - sort(gaddagizedWords.begin(), gaddagizedWords.end()); + UVcout << "Sorting " << factory.wordCount() << " words..." << endl; + factory.sortWords(); UVcout << "Generating nodes..."; - Quackle::WordList::const_iterator wordsEnd = gaddagizedWords.end(); - for (Quackle::WordList::const_iterator wordsIt = gaddagizedWords.begin(); wordsIt != wordsEnd; ++wordsIt) - { - root.pushword(*wordsIt); - } + factory.generate(); UVcout << "Writing index..."; - - nodelist.push_back(&root); - - root.print(""); - - ofstream out(QuackleIO::Util::qstringToStdString(outputFilename).c_str(), ios::out | ios::binary); - - for (size_t i = 0; i < nodelist.size(); i++) { - // UVcout << nodelist[i]->c << " " << nodelist[i]->pointer << " " << nodelist[i]->t << " " << nodelist[i]->lastchild << endl; - - unsigned int p = (unsigned int)(nodelist[i]->pointer); - if (p != 0) { - p -= i; // offset indexing - } - - char bytes[4]; - unsigned char n1 = (p & 0x00FF0000) >> 16; - /* - UVcout << "byte 1: " << ((p & 0xFF000000) >> 24); - UVcout << ", byte 2: " << ((p & 0x00FF0000) >> 8); - UVcout << ", byte 3: " << ((p & 0x0000FF00) >> 8); - UVcout << ", byte 4: " << ((p & 0x000000FF) >> 0) << endl; - */ - - unsigned char n2 = (p & 0x0000FF00) >> 8; - unsigned char n3 = (p & 0x000000FF) >> 0; - unsigned char n4; - - /* - UVcout << "p: " << p << ", crap: " << (((unsigned int)(n1) << 24) | - ((unsigned int)(n2) << 16) | - ((unsigned int)(n3) << 8)) << endl; - */ - n4 = nodelist[i]->c; - if (n4 == internalSeparatorRepresentation) - n4 = QUACKLE_GADDAG_SEPARATOR; - - if (nodelist[i]->t) { - n4 |= 64; - } - if (nodelist[i]->lastchild) { - n4 |= 128; - } - - /* - UVcout << "p: " << p << endl;; - UVcout << "n4:" << (int)(n4) << - ", n1: " << (int)(n1) << - ", n2: " << (int)(n2) << - ", n3: " << (int)(n3) << endl; - */ - - //bytes[0] = n4; bytes[1] = n1; bytes[2] = n2; bytes[3] = n3; - bytes[0] = n1; bytes[1] = n2; bytes[2] = n3; bytes[3] = n4; - //out.write((const char*) &p, 4); - out.write(bytes, 4); - } + factory.writeIndex(outputFilename.toUtf8().constData()); UVcout << endl; - UVcout << "Wrote " << encodableWords << " words over " << nodelist.size() << " nodes to " << QuackleIO::Util::qstringToString(outputFilename) << "." << endl; + UVcout << "Wrote " << factory.encodableWords() << " words over " << factory.nodeCount() << " nodes to " << QuackleIO::Util::qstringToString(outputFilename) << "." << endl; + + UVcout << "Hash: " << QString(QByteArray(factory.hashBytes(), 16).toHex()).toStdString() << endl; - if (unencodableWords > 0) - UVcout << "There were " << unencodableWords << " words left out." << endl; + if (factory.unencodableWords() > 0) + UVcout << "There were " << factory.unencodableWords() << " words left out." << endl; return 0; } diff --git a/makegaddag/makegaddag.pro b/makegaddag/makegaddag.pro index dfd1259..9895c99 100755 --- a/makegaddag/makegaddag.pro +++ b/makegaddag/makegaddag.pro @@ -5,10 +5,12 @@ CONFIG += release debug { OBJECTS_DIR = obj/debug + QMAKE_LIBDIR += ../lib/debug ../quackleio/lib/debug } release { OBJECTS_DIR = obj/release + QMAKE_LIBDIR += ../lib/release ../quackleio/lib/release } MOC_DIR = moc @@ -19,10 +21,12 @@ MOC_DIR = moc CONFIG += console CONFIG -= app_bundle -LIBS += -lquackleio -lquackle +win32:!win32-g++ { + LIBS += -lquackleio -llibquackle +} else { + LIBS += -lquackleio -lquackle +} -QMAKE_LFLAGS_RELEASE += -L../lib/release -L../quackleio/lib/release -QMAKE_LFLAGS_DEBUG += -L../lib/debug -L../quackleio/lib/debug # Input SOURCES += makegaddag.cpp |