1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
#!/usr/bin/perl # USAGE: generate_raw_words.pl generate_words_output binmode STDOUT, ':utf8'; sub read_and_spit { for my $arg (@ARGV) { open (my $input, "<:encoding(utf8)", $arg); while (<$input>) { chomp; my ($word, $count) = split(/\s/, $_); my $clean_word = $word; $clean_word =~ s/;//g; print "$clean_word\n"; } } } read_and_spit();