diff options
author | John Fultz <jfultz@wolfram.com> | 2015-10-18 23:35:21 -0500 |
---|---|---|
committer | John Fultz <jfultz@wolfram.com> | 2015-10-18 23:35:21 -0500 |
commit | 2c2a91a6154a8dafa1415ec546ac07b2486b6743 (patch) | |
tree | cc2f799c30b4d1fdcfaac9970680998abb0788ec /lexiconparameters.cpp | |
parent | 06b0b048147df0387001f8c4bf8f52851d722240 (diff) | |
parent | 23f13f666c42068ed086c5a5791063465db653c7 (diff) |
Merge branch 'feature/editablesettings'
Diffstat (limited to 'lexiconparameters.cpp')
-rw-r--r-- | lexiconparameters.cpp | 200 |
1 files changed, 185 insertions, 15 deletions
diff --git a/lexiconparameters.cpp b/lexiconparameters.cpp index b32f632..f04a941 100644 --- a/lexiconparameters.cpp +++ b/lexiconparameters.cpp @@ -19,15 +19,122 @@ #include <iostream> #include <fstream> + #include "datamanager.h" #include "lexiconparameters.h" #include "uv.h" using namespace Quackle; +class Quackle::V0LexiconInterpreter : public LexiconInterpreter +{ + + virtual void loadDawg(ifstream &file, LexiconParameters &lexparams) + { + int i = 0; + while (!file.eof()) + { + file.read((char*)(lexparams.m_dawg) + i, 7); + i += 7; + } + } + + virtual void loadGaddag(ifstream &file, LexiconParameters &lexparams) + { + int i = 0; + while (!file.eof()) + { + file.read((char*)(lexparams.m_gaddag) + i, 4); + i += 4; + } + } + + virtual void dawgAt(const unsigned char *dawg, int index, unsigned int &p, Letter &letter, bool &t, bool &lastchild, bool &british, int &playability) const + { + index *= 7; + p = (dawg[index] << 16) + (dawg[index + 1] << 8) + (dawg[index + 2]); + letter = dawg[index + 3]; + + t = (letter & 32) != 0; + lastchild = (letter & 64) != 0; + british = !(letter & 128); + letter = (letter & 31) + QUACKLE_FIRST_LETTER; + + playability = (dawg[index + 4] << 16) + (dawg[index + 5] << 8) + (dawg[index + 6]); + } + virtual int versionNumber() const { return 0; } +}; + +class Quackle::V1LexiconInterpreter : public LexiconInterpreter +{ + + virtual void loadDawg(ifstream &file, LexiconParameters &lexparams) + { + int i = 0; + unsigned char bytes[3]; + file.get(); // skip past version byte + file.read(lexparams.m_hash, sizeof(lexparams.m_hash)); + file.read((char*)bytes, 3); + + lexparams.m_utf8Alphabet.resize(file.get()); + for (size_t i = 0; i < lexparams.m_utf8Alphabet.size(); i++) + { + file >> lexparams.m_utf8Alphabet[i]; + file.get(); // separator space + } + while (!file.eof()) + { + file.read((char*)(lexparams.m_dawg) + i, 7); + i += 7; + } + } + + virtual void loadGaddag(ifstream &file, LexiconParameters &lexparams) + { + char hash[16]; + file.get(); // skip past version byte + file.read(hash, sizeof(hash)); + if (memcmp(hash, lexparams.m_hash, sizeof(hash))) + { + // If we're using a v0 DAWG, then ignore the hash + for (size_t i = 0; i < sizeof(lexparams.m_hash); i++) + { + if (lexparams.m_hash[0] != 0) + { + lexparams.unloadGaddag(); // don't use a mismatched gaddag + return; + } + } + } + + size_t i = 0; + while (!file.eof()) + { + file.read((char*)(lexparams.m_gaddag) + i, 4); + i += 4; + } + } + + virtual void dawgAt(const unsigned char *dawg, int index, unsigned int &p, Letter &letter, bool &t, bool &lastchild, bool &british, int &playability) const + { + index *= 7; + p = (dawg[index] << 16) + (dawg[index + 1] << 8) + (dawg[index + 2]); + letter = dawg[index + 3]; + + lastchild = ((letter & 64) != 0); + british = !(letter & 128); + letter = (letter & 63) + QUACKLE_FIRST_LETTER; + + playability = (dawg[index + 4] << 16) + (dawg[index + 5] << 8) + (dawg[index + 6]); + t = (playability != 0); + } + virtual int versionNumber() const { return 1; } +}; + LexiconParameters::LexiconParameters() - : m_dawg(0), m_gaddag(0) + : m_dawg(NULL), m_gaddag(NULL), m_interpreter(NULL) { + memset(m_hash, 0, sizeof(m_hash)); } LexiconParameters::~LexiconParameters() @@ -44,13 +151,15 @@ void LexiconParameters::unloadAll() void LexiconParameters::unloadDawg() { delete[] m_dawg; - m_dawg = 0; + m_dawg = NULL; + delete m_interpreter; + m_interpreter = NULL; } void LexiconParameters::unloadGaddag() { delete[] m_gaddag; - m_gaddag = 0; + m_gaddag = NULL; } void LexiconParameters::loadDawg(const string &filename) @@ -64,14 +173,19 @@ void LexiconParameters::loadDawg(const string &filename) return; } - m_dawg = new unsigned char[7000000]; - - int i = 0; - while (!file.eof()) + char versionByte = file.get(); + m_interpreter = createInterpreter(versionByte); + if (m_interpreter == NULL) { - file.read((char*)(m_dawg) + i, 7); - i += 7; + UVcout << "couldn't open file " << filename.c_str() << endl; + return; } + + file.seekg(0, ios_base::end); + m_dawg = new unsigned char[file.tellg()]; + file.seekg(0, ios_base::beg); + + m_interpreter->loadDawg(file, *this); } void LexiconParameters::loadGaddag(const string &filename) @@ -86,18 +200,74 @@ void LexiconParameters::loadGaddag(const string &filename) return; } - m_gaddag = new unsigned char[40000000]; + char versionByte = file.get(); + if (versionByte < m_interpreter->versionNumber()) + return; + file.seekg(0, ios_base::end); + m_gaddag = new unsigned char[file.tellg()]; + file.seekg(0, ios_base::beg); - int i = 0; - while (!file.eof()) + // must create a local interpreter because dawg/gaddag versions might not match + LexiconInterpreter* interpreter = createInterpreter(versionByte); + if (interpreter != NULL) { - file.read((char*)(m_gaddag) + i, 4); - i += 4; + interpreter->loadGaddag(file, *this); + delete interpreter; } + else + unloadGaddag(); } string LexiconParameters::findDictionaryFile(const string &lexicon) { - return DataManager::self()->findDataFile("lexica", lexicon); + return QUACKLE_DATAMANAGER->findDataFile("lexica", lexicon); +} + +bool LexiconParameters::hasUserDictionaryFile(const string &lexicon) +{ + return QUACKLE_DATAMANAGER->hasUserDataFile("lexica", lexicon); +} + +string LexiconParameters::hashString(bool shortened) const +{ + const char hex[] = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' }; + string hashStr; + for (size_t i = 0; i < sizeof(m_hash); i++) + { + hashStr.push_back(hex[(m_hash[i] & 0xF0) >> 4]); + hashStr.push_back(hex[m_hash[i] & 0x0F]); + if (shortened && i == 3) + break; + } + return hashStr; +} + +string LexiconParameters::copyrightString() const +{ + string copyrightsFilename = QUACKLE_DATAMANAGER->makeDataFilename("lexica", "copyrights.txt", false); + fstream copyrightsFile(copyrightsFilename, ios_base::in); + while (copyrightsFile.good() && !copyrightsFile.eof()) + { + string line; + getline(copyrightsFile, line); + if (line.size() < 9 || line.find_first_of(':') != 8) + continue; + if (hashString(true).compare(line.substr(0,8)) != 0) + continue; + return line.substr(9, line.size()); + } + return string(); } +LexiconInterpreter* LexiconParameters::createInterpreter(char version) const +{ + switch(version) + { + case 0: + return new V0LexiconInterpreter(); + case 1: + return new V1LexiconInterpreter(); + default: + return NULL; + } +} |