summaryrefslogtreecommitdiff
path: root/lexiconparameters.cpp
diff options
context:
space:
mode:
authorJohn Fultz <jfultz@wolfram.com>2015-09-26 10:47:07 -0500
committerJohn Fultz <jfultz@wolfram.com>2015-09-26 10:47:07 -0500
commit1214533715a1acfbc35ebe29ff78afee2f850226 (patch)
tree8c4c9a5280cfbb48b48dff2ae1c567364ea97e89 /lexiconparameters.cpp
parented46987403dd923d3ba14df6eb676e1e163d1d8d (diff)
Work on DAWG generation.
V1 DAWGs now include an alphabet. Begin creating DAWGs which extend other DAWGs. In general, laying the groundwork for plain text import to DAWG.
Diffstat (limited to 'lexiconparameters.cpp')
-rw-r--r--lexiconparameters.cpp8
1 files changed, 8 insertions, 0 deletions
diff --git a/lexiconparameters.cpp b/lexiconparameters.cpp
index 6761fc1..9da3b70 100644
--- a/lexiconparameters.cpp
+++ b/lexiconparameters.cpp
@@ -75,6 +75,14 @@ class Quackle::V1LexiconInterpreter : public LexiconInterpreter
file.get(); // skip past version byte
file.read(lexparams.m_hash, sizeof(lexparams.m_hash));
file.read((char*)bytes, 3);
+
+ lexparams.m_utf8Alphabet.resize(file.get());
+ for (size_t i = 0; i < lexparams.m_utf8Alphabet.size(); i++)
+ {
+ file >> lexparams.m_utf8Alphabet[i];
+ file.get(); // separator space
+ }
+ file.get(); // whitespace separator
lexparams.m_wordcount = (bytes[0] << 16) | (bytes[1] << 8) | bytes[2];
while (!file.eof())
{