summaryrefslogtreecommitdiff
path: root/new_language_generation/perl_tools/generate_raw_words.pl
diff options
context:
space:
mode:
Diffstat (limited to 'new_language_generation/perl_tools/generate_raw_words.pl')
-rwxr-xr-xnew_language_generation/perl_tools/generate_raw_words.pl23
1 files changed, 23 insertions, 0 deletions
diff --git a/new_language_generation/perl_tools/generate_raw_words.pl b/new_language_generation/perl_tools/generate_raw_words.pl
new file mode 100755
index 0000000..b32d810
--- /dev/null
+++ b/new_language_generation/perl_tools/generate_raw_words.pl
@@ -0,0 +1,23 @@
+#!/usr/bin/perl
+
+# USAGE: generate_raw_words.pl generate_words_output
+
+binmode STDOUT, ':utf8';
+
+sub read_and_spit {
+ for my $arg (@ARGV) {
+ open (my $input, "<:encoding(utf8)", $arg);
+
+ while (<$input>) {
+ chomp;
+ my ($word, $count) = split(/\s/, $_);
+
+ my $clean_word = $word;
+ $clean_word =~ s/;//g;
+
+ print "$clean_word\n";
+ }
+ }
+}
+
+read_and_spit();