summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn Fultz <jfultz@wolfram.com>2015-08-24 04:45:27 -0500
committerJohn Fultz <jfultz@wolfram.com>2015-08-24 04:45:46 -0500
commit1f7b8ef6f96e1d5a2c50565a0f52cc633215e485 (patch)
tree11f406677824d20924748225ab7eb129ba929cd0
parent8c7ffef1b6c669592e979fb6038dd634df7f95fc (diff)
Version the GADDAGs.
Basically the same thing I just did to the DAWG files, now done to GADDAGs. Also, add hashing, and make sure GADDAGs only load if their hash matches that of the DAWG files.
-rw-r--r--lexiconparameters.cpp52
-rw-r--r--lexiconparameters.h15
-rw-r--r--quacker/settings.cpp19
-rw-r--r--quackleio/dawgfactory.cpp1
-rw-r--r--quackleio/gaddagfactory.cpp61
-rw-r--r--quackleio/gaddagfactory.h23
-rw-r--r--quackletest.cpp4
-rw-r--r--test/testharness.cpp6
8 files changed, 117 insertions, 64 deletions
diff --git a/lexiconparameters.cpp b/lexiconparameters.cpp
index ca09fa5..e014048 100644
--- a/lexiconparameters.cpp
+++ b/lexiconparameters.cpp
@@ -19,13 +19,14 @@
#include <iostream>
#include <fstream>
+
#include "datamanager.h"
#include "lexiconparameters.h"
#include "uv.h"
using namespace Quackle;
-class Quackle::V0DawgInterpreter : public DawgInterpreter
+class Quackle::V0LexiconInterpreter : public LexiconInterpreter
{
virtual void loadDawg(ifstream &file, LexiconParameters &lexparams)
@@ -39,6 +40,17 @@ class Quackle::V0DawgInterpreter : public DawgInterpreter
}
}
+ virtual void loadGaddag(ifstream &file, LexiconParameters &lexparams)
+ {
+ int i = 0;
+ file.unget();
+ while (!file.eof())
+ {
+ file.read((char*)(lexparams.m_gaddag) + i, 4);
+ i += 4;
+ }
+ }
+
virtual void dawgAt(const unsigned char *dawg, int index, unsigned int &p, Letter &letter, bool &t, bool &lastchild, bool &british, int &playability) const
{
index *= 7;
@@ -55,7 +67,7 @@ class Quackle::V0DawgInterpreter : public DawgInterpreter
virtual int versionNumber() const { return 0; }
};
-class Quackle::V1DawgInterpreter : public DawgInterpreter
+class Quackle::V1LexiconInterpreter : public LexiconInterpreter
{
virtual void loadDawg(ifstream &file, LexiconParameters &lexparams)
@@ -72,6 +84,24 @@ class Quackle::V1DawgInterpreter : public DawgInterpreter
}
}
+ virtual void loadGaddag(ifstream &file, LexiconParameters &lexparams)
+ {
+ char hash[16];
+ file.read(hash, sizeof(hash));
+ if (memcmp(hash, lexparams.m_hash, sizeof(hash)))
+ {
+ lexparams.unloadGaddag(); // don't use a mismatched gaddag
+ return;
+ }
+
+ int i = 0;
+ while (!file.eof())
+ {
+ file.read((char*)(lexparams.m_gaddag) + i, 4);
+ i += 4;
+ }
+ }
+
virtual void dawgAt(const unsigned char *dawg, int index, unsigned int &p, Letter &letter, bool &t, bool &lastchild, bool &british, int &playability) const
{
index *= 7;
@@ -108,14 +138,14 @@ void LexiconParameters::unloadAll()
void LexiconParameters::unloadDawg()
{
delete[] m_dawg;
- m_dawg = 0;
+ m_dawg = NULL;
delete m_interpreter;
}
void LexiconParameters::unloadGaddag()
{
delete[] m_gaddag;
- m_gaddag = 0;
+ m_gaddag = NULL;
}
void LexiconParameters::loadDawg(const string &filename)
@@ -133,10 +163,10 @@ void LexiconParameters::loadDawg(const string &filename)
switch(versionByte)
{
case 0:
- m_interpreter = new V0DawgInterpreter();
+ m_interpreter = new V0LexiconInterpreter();
break;
case 1:
- m_interpreter = new V1DawgInterpreter();
+ m_interpreter = new V1LexiconInterpreter();
break;
default:
UVcout << "couldn't open dawg " << filename.c_str() << endl;
@@ -160,14 +190,12 @@ void LexiconParameters::loadGaddag(const string &filename)
return;
}
+ char versionByte = file.get();
+ if (versionByte != m_interpreter->versionNumber())
+ return;
m_gaddag = new unsigned char[40000000];
- int i = 0;
- while (!file.eof())
- {
- file.read((char*)(m_gaddag) + i, 4);
- i += 4;
- }
+ m_interpreter->loadGaddag(file, *this);
}
string LexiconParameters::findDictionaryFile(const string &lexicon)
diff --git a/lexiconparameters.h b/lexiconparameters.h
index 4b6369d..04ad4e7 100644
--- a/lexiconparameters.h
+++ b/lexiconparameters.h
@@ -25,22 +25,23 @@
namespace Quackle
{
-class DawgInterpreter
+class LexiconInterpreter
{
public:
virtual void loadDawg(ifstream &file, LexiconParameters &lexparams) = 0;
+ virtual void loadGaddag(ifstream &file, LexiconParameters &lexparams) = 0;
virtual void dawgAt(const unsigned char *dawg, int index, unsigned int &p, Letter &letter, bool &t, bool &lastchild, bool &british, int &playability) const = 0;
virtual int versionNumber() const = 0;
- virtual ~DawgInterpreter() {};
+ virtual ~LexiconInterpreter() {};
};
-class V0DawgInterpreter;
-class V1DawgInterpreter;
+class V0LexiconInterpreter;
+class V1LexiconInterpreter;
class LexiconParameters
{
- friend class Quackle::V0DawgInterpreter;
- friend class Quackle::V1DawgInterpreter;
+ friend class Quackle::V0LexiconInterpreter;
+ friend class Quackle::V1LexiconInterpreter;
public:
LexiconParameters();
@@ -79,7 +80,7 @@ protected:
unsigned char *m_dawg;
unsigned char *m_gaddag;
string m_lexiconName;
- DawgInterpreter *m_interpreter;
+ LexiconInterpreter *m_interpreter;
char m_hash[16];
int m_wordcount;
};
diff --git a/quacker/settings.cpp b/quacker/settings.cpp
index 3c42a39..362e916 100644
--- a/quacker/settings.cpp
+++ b/quacker/settings.cpp
@@ -207,16 +207,6 @@ void Settings::setQuackleToUseLexiconName(const string &lexiconName)
{
QUACKLE_LEXICON_PARAMETERS->setLexiconName(lexiconName);
- string gaddagFile = Quackle::LexiconParameters::findDictionaryFile(lexiconName + ".gaddag");
-
- if (gaddagFile.empty())
- {
- UVcout << "Gaddag for lexicon '" << lexiconName << "' does not exist." << endl;
- QUACKLE_LEXICON_PARAMETERS->unloadGaddag();
- }
- else
- QUACKLE_LEXICON_PARAMETERS->loadGaddag(gaddagFile);
-
string dawgFile = Quackle::LexiconParameters::findDictionaryFile(lexiconName + ".dawg");
if (dawgFile.empty())
{
@@ -226,6 +216,15 @@ void Settings::setQuackleToUseLexiconName(const string &lexiconName)
else
QUACKLE_LEXICON_PARAMETERS->loadDawg(dawgFile);
+ string gaddagFile = Quackle::LexiconParameters::findDictionaryFile(lexiconName + ".gaddag");
+ if (gaddagFile.empty())
+ {
+ UVcout << "Gaddag for lexicon '" << lexiconName << "' does not exist." << endl;
+ QUACKLE_LEXICON_PARAMETERS->unloadGaddag();
+ }
+ else
+ QUACKLE_LEXICON_PARAMETERS->loadGaddag(gaddagFile);
+
QUACKLE_STRATEGY_PARAMETERS->initialize(lexiconName);
}
}
diff --git a/quackleio/dawgfactory.cpp b/quackleio/dawgfactory.cpp
index 6fb5be0..74b4346 100644
--- a/quackleio/dawgfactory.cpp
+++ b/quackleio/dawgfactory.cpp
@@ -138,6 +138,7 @@ void DawgFactory::writeIndex(const QString& filename)
bytes[1] = (m_encodableWords & 0x0000FF00) >> 8;
bytes[2] = (m_encodableWords & 0x000000FF);
+ out.put(1); // DAWG format version 1
out.write(m_hash.charptr, sizeof(m_hash.charptr));
out.write((char*)bytes, 3);
diff --git a/quackleio/gaddagfactory.cpp b/quackleio/gaddagfactory.cpp
index e2c726d..7f666cb 100644
--- a/quackleio/gaddagfactory.cpp
+++ b/quackleio/gaddagfactory.cpp
@@ -19,6 +19,7 @@
#include <iostream>
#include <QtCore>
+#include <QCryptographicHash>
#include "gaddagfactory.h"
#include "util.h"
@@ -27,18 +28,20 @@ GaddagFactory::GaddagFactory(const QString& alphabetFile)
{
QuackleIO::FlexibleAlphabetParameters *flexure = new QuackleIO::FlexibleAlphabetParameters;
flexure->load(alphabetFile);
- alphas = flexure;
+ m_alphas = flexure;
// So the separator is sorted to last.
- root.t = false;
- root.c = QUACKLE_NULL_MARK; // "_"
- root.pointer = 0;
- root.lastchild = true;
+ m_root.t = false;
+ m_root.c = QUACKLE_NULL_MARK; // "_"
+ m_root.pointer = 0;
+ m_root.lastchild = true;
+
+ m_hash.int32ptr[0] = m_hash.int32ptr[1] = m_hash.int32ptr[2] = m_hash.int32ptr[3] = 0;
}
GaddagFactory::~GaddagFactory()
{
- delete alphas;
+ delete m_alphas;
}
bool GaddagFactory::pushWord(const QString& word)
@@ -46,10 +49,14 @@ bool GaddagFactory::pushWord(const QString& word)
UVString originalString = QuackleIO::Util::qstringToString(word);
UVString leftover;
- Quackle::LetterString encodedWord = alphas->encode(originalString, &leftover);
+ Quackle::LetterString encodedWord = m_alphas->encode(originalString, &leftover);
if (leftover.empty())
{
++m_encodableWords;
+ hashWord(encodedWord);
+ // FIXME: This hash will fail if duplicate words are passed in.
+ // But testing for duplicate words isn't so easy without keeping
+ // an entirely separate list.
for (unsigned i = 1; i <= encodedWord.length(); i++)
{
@@ -64,7 +71,7 @@ bool GaddagFactory::pushWord(const QString& word)
for (unsigned j = i; j < encodedWord.length(); j++)
newword.push_back(encodedWord[j]);
}
- gaddagizedWords.push_back(newword);
+ m_gaddagizedWords.push_back(newword);
}
return true;
}
@@ -73,26 +80,40 @@ bool GaddagFactory::pushWord(const QString& word)
return false;
}
+void GaddagFactory::hashWord(const Quackle::LetterString &word)
+{
+ QCryptographicHash wordhash(QCryptographicHash::Md5);
+ wordhash.addData(word.constData(), word.length());
+ QByteArray wordhashbytes = wordhash.result();
+ m_hash.int32ptr[0] ^= ((const int32_t*)wordhashbytes.constData())[0];
+ m_hash.int32ptr[1] ^= ((const int32_t*)wordhashbytes.constData())[1];
+ m_hash.int32ptr[2] ^= ((const int32_t*)wordhashbytes.constData())[2];
+ m_hash.int32ptr[3] ^= ((const int32_t*)wordhashbytes.constData())[3];
+}
+
void GaddagFactory::generate()
{
- Quackle::WordList::const_iterator wordsEnd = gaddagizedWords.end();
- for (Quackle::WordList::const_iterator wordsIt = gaddagizedWords.begin(); wordsIt != wordsEnd; ++wordsIt)
- root.pushWord(*wordsIt);
+ Quackle::WordList::const_iterator wordsEnd = m_gaddagizedWords.end();
+ for (Quackle::WordList::const_iterator wordsIt = m_gaddagizedWords.begin(); wordsIt != wordsEnd; ++wordsIt)
+ m_root.pushWord(*wordsIt);
// for (const auto& words : gaddaggizedWords)
- // root.pushWord(words);
+ // m_root.pushWord(words);
}
-void GaddagFactory::writeIndex(const QString& fname)
+void GaddagFactory::writeIndex(const QString &fname)
{
- nodelist.push_back(&root);
+ m_nodelist.push_back(&m_root);
- root.print(nodelist);
+ m_root.print(m_nodelist);
ofstream out(QuackleIO::Util::qstringToStdString(fname).c_str(), ios::out | ios::binary);
- for (size_t i = 0; i < nodelist.size(); i++)
+ out.put(1); // GADDAG format version 1
+ out.write(m_hash.charptr, sizeof(m_hash.charptr));
+
+ for (size_t i = 0; i < m_nodelist.size(); i++)
{
- unsigned int p = (unsigned int)(nodelist[i]->pointer);
+ unsigned int p = (unsigned int)(m_nodelist[i]->pointer);
if (p != 0)
p -= i; // offset indexing
@@ -102,14 +123,14 @@ void GaddagFactory::writeIndex(const QString& fname)
unsigned char n3 = (p & 0x000000FF) >> 0;
unsigned char n4;
- n4 = nodelist[i]->c;
+ n4 = m_nodelist[i]->c;
if (n4 == internalSeparatorRepresentation)
n4 = QUACKLE_NULL_MARK;
- if (nodelist[i]->t)
+ if (m_nodelist[i]->t)
n4 |= 64;
- if (nodelist[i]->lastchild)
+ if (m_nodelist[i]->lastchild)
n4 |= 128;
bytes[0] = n1; bytes[1] = n2; bytes[2] = n3; bytes[3] = n4;
diff --git a/quackleio/gaddagfactory.h b/quackleio/gaddagfactory.h
index 9eb8d72..2d21192 100644
--- a/quackleio/gaddagfactory.h
+++ b/quackleio/gaddagfactory.h
@@ -30,13 +30,14 @@ public:
GaddagFactory(const QString& alphabetFile);
~GaddagFactory();
- int wordCount() const { return gaddagizedWords.size(); };
- int nodeCount() const { return nodelist.size(); };
+ int wordCount() const { return m_gaddagizedWords.size(); };
+ int nodeCount() const { return m_nodelist.size(); };
int encodableWords() const { return m_encodableWords; };
int unencodableWords() const { return m_unencodableWords; };
bool pushWord(const QString& word);
- void sortWords() { sort(gaddagizedWords.begin(), gaddagizedWords.end()); };
+ void hashWord(const Quackle::LetterString &word);
+ void sortWords() { sort(m_gaddagizedWords.begin(), m_gaddagizedWords.end()); };
void generate();
void writeIndex(const QString& fname);
@@ -49,17 +50,19 @@ private:
int pointer;
bool lastchild;
void pushWord(const Quackle::LetterString& word);
- void print(vector< Node* >& nodelist);
+ void print(vector< Node* >& m_nodelist);
};
int m_encodableWords;
int m_unencodableWords;
- Quackle::WordList gaddagizedWords;
- vector< Node* > nodelist;
- Quackle::AlphabetParameters *alphas;
- Node root;
-
-
+ Quackle::WordList m_gaddagizedWords;
+ vector< Node* > m_nodelist;
+ Quackle::AlphabetParameters *m_alphas;
+ Node m_root;
+ union {
+ char charptr[16];
+ int32_t int32ptr[4];
+ } m_hash;
};
#endif
diff --git a/quackletest.cpp b/quackletest.cpp
index e69c2cb..7ea5d10 100644
--- a/quackletest.cpp
+++ b/quackletest.cpp
@@ -47,7 +47,7 @@ int main()
dataManager.setAppDataDirectory("data");
dataManager.lexiconParameters()->loadDawg(Quackle::LexiconParameters::findDictionaryFile("twl06.dawg"));
- dataManager.lexiconParameters()->loadGaddag(Quackle::LexiconParameters::findDictionaryFile("twl06.gaddag"));
+ dataManager.lexiconParameters()->loadGaddag(Quackle::LexiconParameters::findDictionaryFile("twl06.gaddag"));
dataManager.strategyParameters()->initialize("twl06");
dataManager.setBoardParameters(new Quackle::EnglishBoard());
@@ -58,7 +58,7 @@ int main()
const int gameCnt = 1000;
//const int gameCnt = 1;
for (int game = 0; game < gameCnt; ++game) {
- testGame();
+ testGame();
}
return 0;
diff --git a/test/testharness.cpp b/test/testharness.cpp
index 683443f..3f390c1 100644
--- a/test/testharness.cpp
+++ b/test/testharness.cpp
@@ -207,13 +207,13 @@ void TestHarness::startUp()
m_dataManager.setBoardParameters(new ScrabbleBoard());
- m_dataManager.lexiconParameters()->loadGaddag(Quackle::LexiconParameters::findDictionaryFile(QuackleIO::Util::qstringToStdString(m_lexicon + ".gaddag")));
+ m_dataManager.lexiconParameters()->loadDawg(Quackle::LexiconParameters::findDictionaryFile(QuackleIO::Util::qstringToStdString(m_lexicon + ".dawg")));
UVcout << ".";
- m_dataManager.lexiconParameters()->loadDawg(Quackle::LexiconParameters::findDictionaryFile(QuackleIO::Util::qstringToStdString(m_lexicon + ".dawg")));
+ m_dataManager.lexiconParameters()->loadGaddag(Quackle::LexiconParameters::findDictionaryFile(QuackleIO::Util::qstringToStdString(m_lexicon + ".gaddag")));
+ UVcout << ".";
m_dataManager.strategyParameters()->initialize(QuackleIO::Util::qstringToStdString(m_lexicon));
- UVcout << ".";
UVcout << endl;