summaryrefslogtreecommitdiff
path: root/lexiconparameters.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lexiconparameters.cpp')
-rw-r--r--lexiconparameters.cpp200
1 files changed, 185 insertions, 15 deletions
diff --git a/lexiconparameters.cpp b/lexiconparameters.cpp
index b32f632..f04a941 100644
--- a/lexiconparameters.cpp
+++ b/lexiconparameters.cpp
@@ -19,15 +19,122 @@
#include <iostream>
#include <fstream>
+
#include "datamanager.h"
#include "lexiconparameters.h"
#include "uv.h"
using namespace Quackle;
+class Quackle::V0LexiconInterpreter : public LexiconInterpreter
+{
+
+ virtual void loadDawg(ifstream &file, LexiconParameters &lexparams)
+ {
+ int i = 0;
+ while (!file.eof())
+ {
+ file.read((char*)(lexparams.m_dawg) + i, 7);
+ i += 7;
+ }
+ }
+
+ virtual void loadGaddag(ifstream &file, LexiconParameters &lexparams)
+ {
+ int i = 0;
+ while (!file.eof())
+ {
+ file.read((char*)(lexparams.m_gaddag) + i, 4);
+ i += 4;
+ }
+ }
+
+ virtual void dawgAt(const unsigned char *dawg, int index, unsigned int &p, Letter &letter, bool &t, bool &lastchild, bool &british, int &playability) const
+ {
+ index *= 7;
+ p = (dawg[index] << 16) + (dawg[index + 1] << 8) + (dawg[index + 2]);
+ letter = dawg[index + 3];
+
+ t = (letter & 32) != 0;
+ lastchild = (letter & 64) != 0;
+ british = !(letter & 128);
+ letter = (letter & 31) + QUACKLE_FIRST_LETTER;
+
+ playability = (dawg[index + 4] << 16) + (dawg[index + 5] << 8) + (dawg[index + 6]);
+ }
+ virtual int versionNumber() const { return 0; }
+};
+
+class Quackle::V1LexiconInterpreter : public LexiconInterpreter
+{
+
+ virtual void loadDawg(ifstream &file, LexiconParameters &lexparams)
+ {
+ int i = 0;
+ unsigned char bytes[3];
+ file.get(); // skip past version byte
+ file.read(lexparams.m_hash, sizeof(lexparams.m_hash));
+ file.read((char*)bytes, 3);
+
+ lexparams.m_utf8Alphabet.resize(file.get());
+ for (size_t i = 0; i < lexparams.m_utf8Alphabet.size(); i++)
+ {
+ file >> lexparams.m_utf8Alphabet[i];
+ file.get(); // separator space
+ }
+ while (!file.eof())
+ {
+ file.read((char*)(lexparams.m_dawg) + i, 7);
+ i += 7;
+ }
+ }
+
+ virtual void loadGaddag(ifstream &file, LexiconParameters &lexparams)
+ {
+ char hash[16];
+ file.get(); // skip past version byte
+ file.read(hash, sizeof(hash));
+ if (memcmp(hash, lexparams.m_hash, sizeof(hash)))
+ {
+ // If we're using a v0 DAWG, then ignore the hash
+ for (size_t i = 0; i < sizeof(lexparams.m_hash); i++)
+ {
+ if (lexparams.m_hash[0] != 0)
+ {
+ lexparams.unloadGaddag(); // don't use a mismatched gaddag
+ return;
+ }
+ }
+ }
+
+ size_t i = 0;
+ while (!file.eof())
+ {
+ file.read((char*)(lexparams.m_gaddag) + i, 4);
+ i += 4;
+ }
+ }
+
+ virtual void dawgAt(const unsigned char *dawg, int index, unsigned int &p, Letter &letter, bool &t, bool &lastchild, bool &british, int &playability) const
+ {
+ index *= 7;
+ p = (dawg[index] << 16) + (dawg[index + 1] << 8) + (dawg[index + 2]);
+ letter = dawg[index + 3];
+
+ lastchild = ((letter & 64) != 0);
+ british = !(letter & 128);
+ letter = (letter & 63) + QUACKLE_FIRST_LETTER;
+
+ playability = (dawg[index + 4] << 16) + (dawg[index + 5] << 8) + (dawg[index + 6]);
+ t = (playability != 0);
+ }
+ virtual int versionNumber() const { return 1; }
+};
+
LexiconParameters::LexiconParameters()
- : m_dawg(0), m_gaddag(0)
+ : m_dawg(NULL), m_gaddag(NULL), m_interpreter(NULL)
{
+ memset(m_hash, 0, sizeof(m_hash));
}
LexiconParameters::~LexiconParameters()
@@ -44,13 +151,15 @@ void LexiconParameters::unloadAll()
void LexiconParameters::unloadDawg()
{
delete[] m_dawg;
- m_dawg = 0;
+ m_dawg = NULL;
+ delete m_interpreter;
+ m_interpreter = NULL;
}
void LexiconParameters::unloadGaddag()
{
delete[] m_gaddag;
- m_gaddag = 0;
+ m_gaddag = NULL;
}
void LexiconParameters::loadDawg(const string &filename)
@@ -64,14 +173,19 @@ void LexiconParameters::loadDawg(const string &filename)
return;
}
- m_dawg = new unsigned char[7000000];
-
- int i = 0;
- while (!file.eof())
+ char versionByte = file.get();
+ m_interpreter = createInterpreter(versionByte);
+ if (m_interpreter == NULL)
{
- file.read((char*)(m_dawg) + i, 7);
- i += 7;
+ UVcout << "couldn't open file " << filename.c_str() << endl;
+ return;
}
+
+ file.seekg(0, ios_base::end);
+ m_dawg = new unsigned char[file.tellg()];
+ file.seekg(0, ios_base::beg);
+
+ m_interpreter->loadDawg(file, *this);
}
void LexiconParameters::loadGaddag(const string &filename)
@@ -86,18 +200,74 @@ void LexiconParameters::loadGaddag(const string &filename)
return;
}
- m_gaddag = new unsigned char[40000000];
+ char versionByte = file.get();
+ if (versionByte < m_interpreter->versionNumber())
+ return;
+ file.seekg(0, ios_base::end);
+ m_gaddag = new unsigned char[file.tellg()];
+ file.seekg(0, ios_base::beg);
- int i = 0;
- while (!file.eof())
+ // must create a local interpreter because dawg/gaddag versions might not match
+ LexiconInterpreter* interpreter = createInterpreter(versionByte);
+ if (interpreter != NULL)
{
- file.read((char*)(m_gaddag) + i, 4);
- i += 4;
+ interpreter->loadGaddag(file, *this);
+ delete interpreter;
}
+ else
+ unloadGaddag();
}
string LexiconParameters::findDictionaryFile(const string &lexicon)
{
- return DataManager::self()->findDataFile("lexica", lexicon);
+ return QUACKLE_DATAMANAGER->findDataFile("lexica", lexicon);
+}
+
+bool LexiconParameters::hasUserDictionaryFile(const string &lexicon)
+{
+ return QUACKLE_DATAMANAGER->hasUserDataFile("lexica", lexicon);
+}
+
+string LexiconParameters::hashString(bool shortened) const
+{
+ const char hex[] = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' };
+ string hashStr;
+ for (size_t i = 0; i < sizeof(m_hash); i++)
+ {
+ hashStr.push_back(hex[(m_hash[i] & 0xF0) >> 4]);
+ hashStr.push_back(hex[m_hash[i] & 0x0F]);
+ if (shortened && i == 3)
+ break;
+ }
+ return hashStr;
+}
+
+string LexiconParameters::copyrightString() const
+{
+ string copyrightsFilename = QUACKLE_DATAMANAGER->makeDataFilename("lexica", "copyrights.txt", false);
+ fstream copyrightsFile(copyrightsFilename, ios_base::in);
+ while (copyrightsFile.good() && !copyrightsFile.eof())
+ {
+ string line;
+ getline(copyrightsFile, line);
+ if (line.size() < 9 || line.find_first_of(':') != 8)
+ continue;
+ if (hashString(true).compare(line.substr(0,8)) != 0)
+ continue;
+ return line.substr(9, line.size());
+ }
+ return string();
}
+LexiconInterpreter* LexiconParameters::createInterpreter(char version) const
+{
+ switch(version)
+ {
+ case 0:
+ return new V0LexiconInterpreter();
+ case 1:
+ return new V1LexiconInterpreter();
+ default:
+ return NULL;
+ }
+}