diff options
author | John Fultz <jfultz@wolfram.com> | 2015-10-18 23:35:21 -0500 |
---|---|---|
committer | John Fultz <jfultz@wolfram.com> | 2015-10-18 23:35:21 -0500 |
commit | 2c2a91a6154a8dafa1415ec546ac07b2486b6743 (patch) | |
tree | cc2f799c30b4d1fdcfaac9970680998abb0788ec /quackleio | |
parent | 06b0b048147df0387001f8c4bf8f52851d722240 (diff) | |
parent | 23f13f666c42068ed086c5a5791063465db653c7 (diff) |
Merge branch 'feature/editablesettings'
Diffstat (limited to 'quackleio')
-rw-r--r-- | quackleio/dawgfactory.cpp | 326 | ||||
-rw-r--r-- | quackleio/dawgfactory.h | 94 | ||||
-rw-r--r-- | quackleio/flexiblealphabet.h | 2 | ||||
-rw-r--r-- | quackleio/gaddagfactory.cpp | 200 | ||||
-rw-r--r-- | quackleio/gaddagfactory.h | 74 | ||||
-rw-r--r-- | quackleio/iotest/iotest.pro | 10 | ||||
-rw-r--r-- | quackleio/quackleio.pro | 2 |
7 files changed, 703 insertions, 5 deletions
diff --git a/quackleio/dawgfactory.cpp b/quackleio/dawgfactory.cpp new file mode 100644 index 0000000..4dd4ec3 --- /dev/null +++ b/quackleio/dawgfactory.cpp @@ -0,0 +1,326 @@ +/* + * Quackle -- Crossword game artificial intelligence and analysis tool + * Copyright (C) 2005-2014 Jason Katz-Brown and John O'Laughlin. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + + +#include <iomanip> +#include <ios> +#include <iostream> +#include <QtCore> +#include <QCryptographicHash> + +#include "dawgfactory.h" +#include "util.h" + + +DawgFactory::DawgFactory(const QString &alphabetFile) + : m_encodableWords(0), m_unencodableWords(0), m_duplicateWords(0), + m_countsByLength(Quackle::FixedLengthString::maxSize, 0) +{ + QuackleIO::FlexibleAlphabetParameters *flexure = new QuackleIO::FlexibleAlphabetParameters; + flexure->load(alphabetFile); + m_alphas = flexure; + + m_root.insmallerdict = false; + m_root.playability = 0; + m_root.c = QUACKLE_BLANK_MARK; + m_root.pointer = 0; + m_root.lastchild = true; + + m_hash.int32ptr[0] = m_hash.int32ptr[1] = m_hash.int32ptr[2] = m_hash.int32ptr[3] = 0; +} + +DawgFactory::~DawgFactory() +{ + delete m_alphas; +} + +bool DawgFactory::pushWord(const UVString &word, bool inSmaller, int playability) +{ + UVString leftover; + Quackle::LetterString encodedWord = m_alphas->encode(word, &leftover); + if (leftover.empty()) + return pushWord(encodedWord, inSmaller, playability); + + ++m_unencodableWords; + return false; +} + +bool DawgFactory::pushWord(const Quackle::LetterString &word, bool inSmaller, int playability) +{ + if (m_root.pushWord(word, inSmaller, playability)) + { + ++m_encodableWords; + ++m_countsByLength[word.length()]; + hashWord(word); + return true; + } + ++m_duplicateWords; + return false; +} + +void DawgFactory::hashWord(const Quackle::LetterString &word) +{ + QCryptographicHash wordhash(QCryptographicHash::Md5); + wordhash.addData(word.constData(), word.length()); + QByteArray wordhashbytes = wordhash.result(); + m_hash.int32ptr[0] ^= ((const int32_t*)wordhashbytes.constData())[0]; + m_hash.int32ptr[1] ^= ((const int32_t*)wordhashbytes.constData())[1]; + m_hash.int32ptr[2] ^= ((const int32_t*)wordhashbytes.constData())[2]; + m_hash.int32ptr[3] ^= ((const int32_t*)wordhashbytes.constData())[3]; +} + +void DawgFactory::generate() +{ + const int bucketcount = 2000; + vector< int > bucket[bucketcount]; + + m_nodelist.clear(); + m_nodelist.push_back(&m_root); + m_root.print(m_nodelist); + + m_nodelist[0]->letterSum(); + + for (unsigned int i = 0; i < m_nodelist.size(); i++) + { + bucket[m_nodelist[i]->sum % bucketcount].push_back(i); + m_nodelist[i]->pointer = 0; + m_nodelist[i]->written = false; + m_nodelist[i]->deleted = false; + m_nodelist[i]->cloneof = NULL; + } + + for (int b = 0; b < bucketcount; b++) + { + if (bucket[b].size() == 0) + continue; + for (vector<int>::iterator it = bucket[b].begin(); it != bucket[b].end() - 1; it++) + { + if (!m_nodelist[(*it)]->deleted) + { + for (vector<int>::iterator jt = it + 1; jt != bucket[b].end(); jt++) + { + if (!m_nodelist[(*jt)]->deleted) + { + // cout << "Comparing " << (*it) << " and " << (*jt) << endl; + if (m_nodelist[(*it)]->equals(m_nodelist[(*jt)][0])) + { + //cout << "Hey! " << (*it) << " == " << (*jt) << endl; + // ones[l].erase(jt); + m_nodelist[(*jt)]->deleted = true; + m_nodelist[(*jt)]->cloneof = m_nodelist[(*it)]; + } + } + } + } + } + } + + m_nodelist.clear(); + m_nodelist.push_back(&m_root); + m_root.print(m_nodelist); +} + +void DawgFactory::writeIndex(const string &filename) +{ + ofstream out(filename.c_str(), ios::out | ios::binary); + unsigned char bytes[7]; + + bytes[0] = (m_encodableWords & 0x00FF0000) >> 16; + bytes[1] = (m_encodableWords & 0x0000FF00) >> 8; + bytes[2] = (m_encodableWords & 0x000000FF); + + out.put(1); // DAWG format version 1 + out.write(m_hash.charptr, sizeof(m_hash.charptr)); + out.write((char*)bytes, 3); + out.put((char)m_alphas->length()); + for (Quackle::Letter i = m_alphas->firstLetter(); i <= m_alphas->lastLetter(); i++) + { + QString letterText = QuackleIO::Util::uvStringToQString(m_alphas->letterParameter(i).text()); + QByteArray utf8bytes = letterText.toUtf8(); + string utf8LetterText(utf8bytes.constData()); + out << utf8LetterText << ' '; + } + + for (unsigned int i = 0; i < m_nodelist.size(); i++) { + //cout << m_nodelist[i]->c << " " << m_nodelist[i]->pointer << " " << m_nodelist[i]->t << " " << m_nodelist[i]->lastchild << endl; + Node* n = m_nodelist[i]; + unsigned int p; + if (m_nodelist[i]->deleted) + { + p = (unsigned int)(m_nodelist[i]->cloneof->pointer); + // n = m_nodelist[i]->cloneof; + } + else + p = (unsigned int)(m_nodelist[i]->pointer); + + bytes[0] = (p & 0x00FF0000) >> 16; + bytes[1] = (p & 0x0000FF00) >> 8; + bytes[2] = (p & 0x000000FF); + bytes[3] = n->c - QUACKLE_FIRST_LETTER; + + unsigned int pb = n->playability; + bytes[4] = (pb & 0x00FF0000) >> 16; + bytes[5] = (pb & 0x0000FF00) >> 8; + bytes[6] = (pb & 0x000000FF); + + if (n->lastchild) { + bytes[3] |= 64; + } + if (n->insmallerdict) { + bytes[3] |= 128; + } + + out.write((char*)bytes, 7); + } +} + +string DawgFactory::letterCountString() const +{ + ostringstream str; + for (int i = 0; i < 4; i++) + { + for (int j = 0; j < 4; j++) + { + const int letterCount = j * 4 + i + 2; + if (j != 0) + str << "\t"; + if (m_countsByLength[letterCount] > 0) + str << letterCount << "s: " << std::setw(7) << std::right << std::setfill(' ') << m_countsByLength[letterCount]; + } + str << "\n"; + } + return str.str(); +} + + +void DawgFactory::Node::print(vector< Node* > &nodelist) +{ + written = true; + + if (children.size() == 0) + return; + + if (!deleted) + { + //cout << " Setting pointer to " << nodelist.size() << " before I push_back the children." << endl; + pointer = nodelist.size(); + } + else + { + pointer = cloneof->pointer; + //cout << " Setting pointer to clone's (" << pointer << ") and not pushing anything." << endl; + } + + if (!deleted) + { + for (unsigned int i = 0; i < children.size(); i++) { + nodelist.push_back(&children[i]); + } + + for (unsigned int i = 0; i < children.size(); i++) { + if (!children[i].deleted) + children[i].print(nodelist); + else if (!children[i].cloneof->written) + children[i].cloneof->print(nodelist); + } + } + + if (children.size() > 0) + children[children.size() - 1].lastchild = true; +} + + +// returns true if the word was actually added...false if it's a duplicate. +bool DawgFactory::Node::pushWord(const Quackle::LetterString &word, bool inSmaller, int pb) +{ + bool added; + if (word.length() == 0) { + added = (playability == 0); + playability = (pb == 0) ? 1 : pb; // word terminators nodes are marked by nonzero playability in the v1 DAWG format + insmallerdict = inSmaller; + } + else { + char first = word[0]; + Quackle::LetterString rest = word.substr(1, word.length() - 1); + int index = -1; + + // cout << "first: " << first << ", rest: " << rest << endl; + + for (unsigned int i = 0; i < children.size(); i++) { + if (children[i].c == first) { + index = i; + break; + } + } + + if (index == -1) { + Node n; + n.c = first; + n.playability = 0; + n.insmallerdict = false; + n.pointer = 0; + n.lastchild = false; + children.push_back(n); + index = children.size() - 1; + } + + added = children[index].pushWord(rest, inSmaller, pb); + } + + sumexplored = false; + deleted = false; + written = false; + return added; +} + + +bool DawgFactory::Node::equals(const Node &n) const +{ + if (playability != n.playability) + return false; + if (c != n.c) + return false; + if (children.size() != n.children.size()) + return false; + if (insmallerdict != n.insmallerdict) + return false; + if (sum != n.sum) + return false; + + for (unsigned int i = 0; i < children.size(); i++) + if (!children[i].equals(n.children[i])) + return false; + + return true; +} + +int DawgFactory::Node::letterSum() const +{ + if (sumexplored) + return sum; + + sumexplored = true; + + // djb2 checksum + sum = 5381 * 33 + (int) c; + + for (unsigned int i = 0; i < children.size(); i++) + sum = (sum << 5) + sum + children[i].letterSum(); + + return sum; +} diff --git a/quackleio/dawgfactory.h b/quackleio/dawgfactory.h new file mode 100644 index 0000000..7af4d68 --- /dev/null +++ b/quackleio/dawgfactory.h @@ -0,0 +1,94 @@ +/* + * Quackle -- Crossword game artificial intelligence and analysis tool + * Copyright (C) 2005-2014 Jason Katz-Brown and John O'Laughlin. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef QUACKLE_DAWGFACTORY_H +#define QUACKLE_DAWGFACTORY_H + +#include <cstdint> +#include <string> +#include <vector> +#include "flexiblealphabet.h" + + +class DawgFactory { +public: + + DawgFactory(const QString &alphabetFile); + ~DawgFactory(); + + int wordCount() const { return m_encodableWords; }; + string letterCountString() const; + int nodeCount() const { return m_nodelist.size(); }; + int encodableWords() const { return m_encodableWords; }; + int unencodableWords() const { return m_unencodableWords; }; + int duplicateWords() const { return m_duplicateWords; }; + + bool pushWord(const UVString &word, bool inSmaller, int playability); + bool pushWord(const Quackle::LetterString &word, bool inSmaller, int playability); + void hashWord(const Quackle::LetterString &word); + void generate(); + void writeIndex(const string &filename); + + const char* hashBytes() { return m_hash.charptr; }; + +private: + class Node { + public: + bool pushWord(const Quackle::LetterString& word, bool inSmaller, int pb); + void print(vector< Node* > &m_nodelist); + + int letterSum() const; + bool equals(const Node &n) const; + + Quackle::Letter c; + bool insmallerdict; + int playability; // if nonzero, then terminates word + + vector<Node> children; + int pointer; + int location; + + bool lastchild; + + mutable bool sumexplored; + mutable unsigned int sum; + mutable vector<int> counts; + + bool deleted; + Node* cloneof; + bool written; + }; + + int m_encodableWords; + int m_unencodableWords; + int m_duplicateWords; + vector< Node* > m_nodelist; + vector<unsigned int> m_countsByLength; + Quackle::AlphabetParameters *m_alphas; + Node m_root; + union { + char charptr[16]; + std::int32_t int32ptr[4]; + } m_hash; + + static const char m_versionNumber = 1; +}; + +#endif + + diff --git a/quackleio/flexiblealphabet.h b/quackleio/flexiblealphabet.h index 89bd1f4..d5db68a 100644 --- a/quackleio/flexiblealphabet.h +++ b/quackleio/flexiblealphabet.h @@ -21,8 +21,6 @@ #include "alphabetparameters.h" -class QString; - namespace QuackleIO { diff --git a/quackleio/gaddagfactory.cpp b/quackleio/gaddagfactory.cpp new file mode 100644 index 0000000..53ccf04 --- /dev/null +++ b/quackleio/gaddagfactory.cpp @@ -0,0 +1,200 @@ +/* + * Quackle -- Crossword game artificial intelligence and analysis tool + * Copyright (C) 2005-2014 Jason Katz-Brown and John O'Laughlin. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + + +#include <iostream> +#include <QtCore> +#include <QCryptographicHash> + +#include "gaddagfactory.h" +#include "util.h" + +GaddagFactory::GaddagFactory(const UVString &alphabetFile) + : m_encodableWords(0), m_unencodableWords(0), m_alphas(NULL) +{ + if (!alphabetFile.empty()) + { + QuackleIO::FlexibleAlphabetParameters *flexure = new QuackleIO::FlexibleAlphabetParameters; + flexure->load(QuackleIO::Util::uvStringToQString(alphabetFile)); + m_alphas = flexure; + } + + // So the separator is sorted to last. + m_root.t = false; + m_root.c = QUACKLE_NULL_MARK; // "_" + m_root.pointer = 0; + m_root.lastchild = true; + + m_hash.int32ptr[0] = m_hash.int32ptr[1] = m_hash.int32ptr[2] = m_hash.int32ptr[3] = 0; +} + +GaddagFactory::~GaddagFactory() +{ + delete m_alphas; +} + +bool GaddagFactory::pushWord(const UVString &word) +{ + UVString leftover; + Quackle::LetterString encodedWord = m_alphas->encode(word, &leftover); + if (leftover.empty()) + { + pushWord(encodedWord); + return true; + } + + ++m_unencodableWords; + return false; +} + +bool GaddagFactory::pushWord(const Quackle::LetterString &word) +{ + ++m_encodableWords; + hashWord(word); + // FIXME: This hash will fail if duplicate words are passed in. + // But testing for duplicate words isn't so easy without keeping + // an entirely separate list. + + for (unsigned i = 1; i <= word.length(); i++) + { + Quackle::LetterString newword; + + for (int j = i - 1; j >= 0; j--) + newword.push_back(word[j]); + + if (i < word.length()) + { + newword.push_back(internalSeparatorRepresentation); // "^" + for (unsigned j = i; j < word.length(); j++) + newword.push_back(word[j]); + } + m_gaddagizedWords.push_back(newword); + } + return true; +} + +void GaddagFactory::hashWord(const Quackle::LetterString &word) +{ + QCryptographicHash wordhash(QCryptographicHash::Md5); + wordhash.addData(word.constData(), word.length()); + QByteArray wordhashbytes = wordhash.result(); + m_hash.int32ptr[0] ^= ((const int32_t*)wordhashbytes.constData())[0]; + m_hash.int32ptr[1] ^= ((const int32_t*)wordhashbytes.constData())[1]; + m_hash.int32ptr[2] ^= ((const int32_t*)wordhashbytes.constData())[2]; + m_hash.int32ptr[3] ^= ((const int32_t*)wordhashbytes.constData())[3]; +} + +void GaddagFactory::generate() +{ + sort(m_gaddagizedWords.begin(), m_gaddagizedWords.end()); + Quackle::WordList::const_iterator wordsEnd = m_gaddagizedWords.end(); + for (Quackle::WordList::const_iterator wordsIt = m_gaddagizedWords.begin(); wordsIt != wordsEnd; ++wordsIt) + m_root.pushWord(*wordsIt); + // for (const auto& words : gaddaggizedWords) + // m_root.pushWord(words); +} + +void GaddagFactory::writeIndex(const string &fname) +{ + m_nodelist.push_back(&m_root); + + m_root.print(m_nodelist); + + ofstream out(fname.c_str(), ios::out | ios::binary); + + out.put(1); // GADDAG format version 1 + out.write(m_hash.charptr, sizeof(m_hash.charptr)); + + for (size_t i = 0; i < m_nodelist.size(); i++) + { + unsigned int p = (unsigned int)(m_nodelist[i]->pointer); + if (p != 0) + p -= i; // offset indexing + + char bytes[4]; + unsigned char n1 = (p & 0x00FF0000) >> 16; + unsigned char n2 = (p & 0x0000FF00) >> 8; + unsigned char n3 = (p & 0x000000FF) >> 0; + unsigned char n4; + + n4 = m_nodelist[i]->c; + if (n4 == internalSeparatorRepresentation) + n4 = QUACKLE_NULL_MARK; + + if (m_nodelist[i]->t) + n4 |= 64; + + if (m_nodelist[i]->lastchild) + n4 |= 128; + + bytes[0] = n1; bytes[1] = n2; bytes[2] = n3; bytes[3] = n4; + out.write(bytes, 4); + } +} + + +void GaddagFactory::Node::print(vector< Node* >& nodelist) +{ + if (children.size() > 0) + { + pointer = nodelist.size(); + children[children.size() - 1].lastchild = true; + } + + for (size_t i = 0; i < children.size(); i++) + nodelist.push_back(&children[i]); + + for (size_t i = 0; i < children.size(); i++) + children[i].print(nodelist); +} + + +void GaddagFactory::Node::pushWord(const Quackle::LetterString& word) +{ + if (word.length() == 0) + { + t = true; + return; + } + + Quackle::Letter first = Quackle::String::front(word); + Quackle::LetterString rest = Quackle::String::allButFront(word); + int index = -1; + + for (size_t i = 0; i < children.size(); i++) + { + if (children[i].c == first) + { + index = i; + i = children.size(); + } + } + + if (index == -1) + { + Node n; + n.c = first; + n.t = false; + n.pointer = 0; + n.lastchild = false; + children.push_back(n); + index = children.size() - 1; + } + + children[index].pushWord(rest); +} diff --git a/quackleio/gaddagfactory.h b/quackleio/gaddagfactory.h new file mode 100644 index 0000000..3017085 --- /dev/null +++ b/quackleio/gaddagfactory.h @@ -0,0 +1,74 @@ +/* + * Quackle -- Crossword game artificial intelligence and analysis tool + * Copyright (C) 2005-2014 Jason Katz-Brown and John O'Laughlin. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef QUACKLE_GADDAGFACTORY_H +#define QUACKLE_GADDAGFACTORY_H + +#include <cstdint> +#include "flexiblealphabet.h" + + +class GaddagFactory { +public: + + static const Quackle::Letter internalSeparatorRepresentation = QUACKLE_FIRST_LETTER + QUACKLE_MAXIMUM_ALPHABET_SIZE; + + GaddagFactory(const UVString &alphabetFile); + ~GaddagFactory(); + + int wordCount() const { return m_gaddagizedWords.size(); }; + int nodeCount() const { return m_nodelist.size(); }; + int encodableWords() const { return m_encodableWords; }; + int unencodableWords() const { return m_unencodableWords; }; + + bool pushWord(const UVString &word); + bool pushWord(const Quackle::LetterString &word); + void hashWord(const Quackle::LetterString &word); + void sortWords() { sort(m_gaddagizedWords.begin(), m_gaddagizedWords.end()); }; + void generate(); + void writeIndex(const string &fname); + + const char* hashBytes() { return m_hash.charptr; }; + + +private: + class Node { + public: + Quackle::Letter c; + bool t; + vector<Node> children; + int pointer; + bool lastchild; + void pushWord(const Quackle::LetterString& word); + void print(vector< Node* >& m_nodelist); + }; + + int m_encodableWords; + int m_unencodableWords; + Quackle::WordList m_gaddagizedWords; + vector< Node* > m_nodelist; + Quackle::AlphabetParameters *m_alphas; + Node m_root; + union { + char charptr[16]; + std::int32_t int32ptr[4]; + } m_hash; +}; + +#endif + diff --git a/quackleio/iotest/iotest.pro b/quackleio/iotest/iotest.pro index 0bf472e..c7e994f 100644 --- a/quackleio/iotest/iotest.pro +++ b/quackleio/iotest/iotest.pro @@ -8,16 +8,20 @@ CONFIG += release debug { OBJECTS_DIR = obj/debug + QMAKE_LIBDIR += ../../lib/debug ../../quackleio/lib/debug } release { OBJECTS_DIR = obj/release + QMAKE_LIBDIR += ../../lib/release ../../quackleio/lib/release } -LIBS += -lquackleio -lquackle +win32:!win32-g++ { + LIBS += -lquackleio -llibquackle +} else { + LIBS += -lquackleio -lquackle +} -QMAKE_LFLAGS_RELEASE += -L../../lib/release -L../../quackleio/lib/release -QMAKE_LFLAGS_DEBUG += -L../../lib/debug -L../../quackleio/lib/debug # Input HEADERS += trademarkedboards.h diff --git a/quackleio/quackleio.pro b/quackleio/quackleio.pro index 195b0ad..8e43d29 100644 --- a/quackleio/quackleio.pro +++ b/quackleio/quackleio.pro @@ -15,6 +15,8 @@ release { MOC_DIR = moc +QMAKE_CXXFLAGS += -std=c++11 + # enable/disable debug symbols #CONFIG += debug staticlib CONFIG += release staticlib |