summaryrefslogtreecommitdiff
path: root/new_language_generation/perl_tools/generate_raw_words.pl
blob: b32d810aa28e68883d4d68939fa625b53e7e8878 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
#!/usr/bin/perl

# USAGE: generate_raw_words.pl generate_words_output

binmode STDOUT, ':utf8';

sub read_and_spit {
	for my $arg (@ARGV) {
		open (my $input, "<:encoding(utf8)", $arg);

		while (<$input>) {
			chomp;
			my ($word, $count) = split(/\s/, $_);

			my $clean_word = $word;
			$clean_word =~ s/;//g;

			print "$clean_word\n";
		}
	}
}

read_and_spit();