summaryrefslogtreecommitdiff
path: root/quackleio
diff options
context:
space:
mode:
authorJohn Fultz <jfultz@wolfram.com>2015-08-24 04:45:27 -0500
committerJohn Fultz <jfultz@wolfram.com>2015-08-24 04:45:46 -0500
commit1f7b8ef6f96e1d5a2c50565a0f52cc633215e485 (patch)
tree11f406677824d20924748225ab7eb129ba929cd0 /quackleio
parent8c7ffef1b6c669592e979fb6038dd634df7f95fc (diff)
Version the GADDAGs.
Basically the same thing I just did to the DAWG files, now done to GADDAGs. Also, add hashing, and make sure GADDAGs only load if their hash matches that of the DAWG files.
Diffstat (limited to 'quackleio')
-rw-r--r--quackleio/dawgfactory.cpp1
-rw-r--r--quackleio/gaddagfactory.cpp61
-rw-r--r--quackleio/gaddagfactory.h23
3 files changed, 55 insertions, 30 deletions
diff --git a/quackleio/dawgfactory.cpp b/quackleio/dawgfactory.cpp
index 6fb5be0..74b4346 100644
--- a/quackleio/dawgfactory.cpp
+++ b/quackleio/dawgfactory.cpp
@@ -138,6 +138,7 @@ void DawgFactory::writeIndex(const QString& filename)
bytes[1] = (m_encodableWords & 0x0000FF00) >> 8;
bytes[2] = (m_encodableWords & 0x000000FF);
+ out.put(1); // DAWG format version 1
out.write(m_hash.charptr, sizeof(m_hash.charptr));
out.write((char*)bytes, 3);
diff --git a/quackleio/gaddagfactory.cpp b/quackleio/gaddagfactory.cpp
index e2c726d..7f666cb 100644
--- a/quackleio/gaddagfactory.cpp
+++ b/quackleio/gaddagfactory.cpp
@@ -19,6 +19,7 @@
#include <iostream>
#include <QtCore>
+#include <QCryptographicHash>
#include "gaddagfactory.h"
#include "util.h"
@@ -27,18 +28,20 @@ GaddagFactory::GaddagFactory(const QString& alphabetFile)
{
QuackleIO::FlexibleAlphabetParameters *flexure = new QuackleIO::FlexibleAlphabetParameters;
flexure->load(alphabetFile);
- alphas = flexure;
+ m_alphas = flexure;
// So the separator is sorted to last.
- root.t = false;
- root.c = QUACKLE_NULL_MARK; // "_"
- root.pointer = 0;
- root.lastchild = true;
+ m_root.t = false;
+ m_root.c = QUACKLE_NULL_MARK; // "_"
+ m_root.pointer = 0;
+ m_root.lastchild = true;
+
+ m_hash.int32ptr[0] = m_hash.int32ptr[1] = m_hash.int32ptr[2] = m_hash.int32ptr[3] = 0;
}
GaddagFactory::~GaddagFactory()
{
- delete alphas;
+ delete m_alphas;
}
bool GaddagFactory::pushWord(const QString& word)
@@ -46,10 +49,14 @@ bool GaddagFactory::pushWord(const QString& word)
UVString originalString = QuackleIO::Util::qstringToString(word);
UVString leftover;
- Quackle::LetterString encodedWord = alphas->encode(originalString, &leftover);
+ Quackle::LetterString encodedWord = m_alphas->encode(originalString, &leftover);
if (leftover.empty())
{
++m_encodableWords;
+ hashWord(encodedWord);
+ // FIXME: This hash will fail if duplicate words are passed in.
+ // But testing for duplicate words isn't so easy without keeping
+ // an entirely separate list.
for (unsigned i = 1; i <= encodedWord.length(); i++)
{
@@ -64,7 +71,7 @@ bool GaddagFactory::pushWord(const QString& word)
for (unsigned j = i; j < encodedWord.length(); j++)
newword.push_back(encodedWord[j]);
}
- gaddagizedWords.push_back(newword);
+ m_gaddagizedWords.push_back(newword);
}
return true;
}
@@ -73,26 +80,40 @@ bool GaddagFactory::pushWord(const QString& word)
return false;
}
+void GaddagFactory::hashWord(const Quackle::LetterString &word)
+{
+ QCryptographicHash wordhash(QCryptographicHash::Md5);
+ wordhash.addData(word.constData(), word.length());
+ QByteArray wordhashbytes = wordhash.result();
+ m_hash.int32ptr[0] ^= ((const int32_t*)wordhashbytes.constData())[0];
+ m_hash.int32ptr[1] ^= ((const int32_t*)wordhashbytes.constData())[1];
+ m_hash.int32ptr[2] ^= ((const int32_t*)wordhashbytes.constData())[2];
+ m_hash.int32ptr[3] ^= ((const int32_t*)wordhashbytes.constData())[3];
+}
+
void GaddagFactory::generate()
{
- Quackle::WordList::const_iterator wordsEnd = gaddagizedWords.end();
- for (Quackle::WordList::const_iterator wordsIt = gaddagizedWords.begin(); wordsIt != wordsEnd; ++wordsIt)
- root.pushWord(*wordsIt);
+ Quackle::WordList::const_iterator wordsEnd = m_gaddagizedWords.end();
+ for (Quackle::WordList::const_iterator wordsIt = m_gaddagizedWords.begin(); wordsIt != wordsEnd; ++wordsIt)
+ m_root.pushWord(*wordsIt);
// for (const auto& words : gaddaggizedWords)
- // root.pushWord(words);
+ // m_root.pushWord(words);
}
-void GaddagFactory::writeIndex(const QString& fname)
+void GaddagFactory::writeIndex(const QString &fname)
{
- nodelist.push_back(&root);
+ m_nodelist.push_back(&m_root);
- root.print(nodelist);
+ m_root.print(m_nodelist);
ofstream out(QuackleIO::Util::qstringToStdString(fname).c_str(), ios::out | ios::binary);
- for (size_t i = 0; i < nodelist.size(); i++)
+ out.put(1); // GADDAG format version 1
+ out.write(m_hash.charptr, sizeof(m_hash.charptr));
+
+ for (size_t i = 0; i < m_nodelist.size(); i++)
{
- unsigned int p = (unsigned int)(nodelist[i]->pointer);
+ unsigned int p = (unsigned int)(m_nodelist[i]->pointer);
if (p != 0)
p -= i; // offset indexing
@@ -102,14 +123,14 @@ void GaddagFactory::writeIndex(const QString& fname)
unsigned char n3 = (p & 0x000000FF) >> 0;
unsigned char n4;
- n4 = nodelist[i]->c;
+ n4 = m_nodelist[i]->c;
if (n4 == internalSeparatorRepresentation)
n4 = QUACKLE_NULL_MARK;
- if (nodelist[i]->t)
+ if (m_nodelist[i]->t)
n4 |= 64;
- if (nodelist[i]->lastchild)
+ if (m_nodelist[i]->lastchild)
n4 |= 128;
bytes[0] = n1; bytes[1] = n2; bytes[2] = n3; bytes[3] = n4;
diff --git a/quackleio/gaddagfactory.h b/quackleio/gaddagfactory.h
index 9eb8d72..2d21192 100644
--- a/quackleio/gaddagfactory.h
+++ b/quackleio/gaddagfactory.h
@@ -30,13 +30,14 @@ public:
GaddagFactory(const QString& alphabetFile);
~GaddagFactory();
- int wordCount() const { return gaddagizedWords.size(); };
- int nodeCount() const { return nodelist.size(); };
+ int wordCount() const { return m_gaddagizedWords.size(); };
+ int nodeCount() const { return m_nodelist.size(); };
int encodableWords() const { return m_encodableWords; };
int unencodableWords() const { return m_unencodableWords; };
bool pushWord(const QString& word);
- void sortWords() { sort(gaddagizedWords.begin(), gaddagizedWords.end()); };
+ void hashWord(const Quackle::LetterString &word);
+ void sortWords() { sort(m_gaddagizedWords.begin(), m_gaddagizedWords.end()); };
void generate();
void writeIndex(const QString& fname);
@@ -49,17 +50,19 @@ private:
int pointer;
bool lastchild;
void pushWord(const Quackle::LetterString& word);
- void print(vector< Node* >& nodelist);
+ void print(vector< Node* >& m_nodelist);
};
int m_encodableWords;
int m_unencodableWords;
- Quackle::WordList gaddagizedWords;
- vector< Node* > nodelist;
- Quackle::AlphabetParameters *alphas;
- Node root;
-
-
+ Quackle::WordList m_gaddagizedWords;
+ vector< Node* > m_nodelist;
+ Quackle::AlphabetParameters *m_alphas;
+ Node m_root;
+ union {
+ char charptr[16];
+ int32_t int32ptr[4];
+ } m_hash;
};
#endif