6 files changed, 837 insertions, 0 deletions
diff --git a/new_language_generation/perl_tools/analyse_gcgs.pl b/new_language_generation/perl_tools/analyse_gcgs.pl
new file mode 100755
index 0000000..94e7cce
--- /dev/null
+++ b/new_language_generation/perl_tools/analyse_gcgs.pl
@@ -0,0 +1,56 @@
+#!/usr/bin/perl
+
+# USAGE: analyse_gcgs.pl *.gcg > gcganalysis
+
+use warnings;
+use strict;
+
+my %letter_points;
+my %letter_counts;
+
+binmode STDOUT, ':utf8';
+
+sub read_gcgs {
+	my $games_count = 0;
+	for my $arg (@ARGV) {
+		open (my $input, "<:encoding(utf8)", $arg);
+		++$games_count;
+
+		while (<$input>) {
+			chomp;
+			next if (!/^\>/);
+
+			my ($player, $rack, $position, $play, $score, $cumulativescore) = split /\s+/, $_;
+
+			next if (! defined $cumulativescore);
+			if ($play =~ /\[/) {
+				print "blank play! $play for $score\n";
+				next;
+			}
+
+			for my $letter (split /|/, $play) {
+				++$letter_counts{$letter};
+				$letter_points{$letter} += int($score);
+
+				#print "$letter now seen " . $letter_counts{$letter} . " times, scored " . $letter_points{$letter} . " points\n";
+			}
+		}
+	}
+
+	print "# Read $games_count games.\n";
+}
+
+sub spit_analysis {
+	my %points_per_turn;
+	for my $letter (keys %letter_counts) {
+		$points_per_turn{$letter} = $letter_points{$letter} / $letter_counts{$letter};
+	}
+
+	for my $letter (sort { $points_per_turn{$b} <=> $points_per_turn{$a} } keys %points_per_turn) {
+		# TODO lame that this always puts out un-bar-surrounded letters
+		print "$letter " . $points_per_turn{$letter} . "\n";
+	}
+}
+
+read_gcgs();
+spit_analysis();
diff --git a/new_language_generation/perl_tools/generate_alphabet.pl b/new_language_generation/perl_tools/generate_alphabet.pl
new file mode 100755
index 0000000..31006a7
--- /dev/null
+++ b/new_language_generation/perl_tools/generate_alphabet.pl
@@ -0,0 +1,281 @@
+#!/usr/bin/perl
+
+use warnings;
+use strict;
+
+use Getopt::Long;
+use Pod::Usage;
+
+use Lingua::KO::Hangul::Util qw(:all);
+use POSIX qw(ceil floor);
+
+my $distribution_filename = 'undef';
+my $analysis_filename = 'undef';
+my $count_assigner_filename = 'undef';
+
+my $counts_only = 0;
+
+my $number_of_distinct_tiles = 30;
+my $number_of_blanks = 2;
+
+my $help = 0;
+
+GetOptions('distribution=s' => \$distribution_filename,
+           'analysis=s' => \$analysis_filename,
+           'count_assigner=s' => \$count_assigner_filename,
+           'number_of_distinct_tiles=i' => \$number_of_distinct_tiles,
+           'number_of_blanks=i' => \$number_of_blanks,
+           'counts_only' => \$counts_only,
+		   'help|?' => \$help) or pod2usage(2);
+
+pod2usage(1) if $help;
+
+=pod
+
+=head1 NAME
+
+generate_alphabet.pl - Print out quackle alphabet file
+
+=head1 SYNOPSIS
+
+--distribution=<file>  
+	output of generate_distribution.pl
+
+--analysis=<file>  
+	output of analyse_gcgs.pl
+
+--count_assigner=<file>
+	file with one number per line. Number on nth line is how many
+	tiles are in bag of letter that appears nth least often.
+
+--number_of_distinct_tiles=[1, 30]
+	how many tiles are in the alphabet (should be same as length
+	of count assigner file).
+
+--number_of_blanks=<nonnegative integer>
+	how many scoreless blanks are in the bag.
+
+--counts_only
+	only output the counts of letters, in same format as
+	count_assigner input
+
+=cut
+
+my @count_assigner = ();
+my $has_assigned_counts = 0;
+
+my %character_counts;
+my $count_sum = 0;
+my $inverse_count_sum = 0;
+
+my %character_points_per_appearance;
+my $lowest_points_per_appearance = -1;
+my $highest_points_per_appearance = 0;
+my $maximum_value = 10;
+
+binmode STDOUT, ':utf8';
+
+sub read_count_assigner {
+	if ($count_assigner_filename eq 'undef') {
+		$has_assigned_counts = 0;
+		return;
+	}
+
+	if (!(-e $count_assigner_filename)) {
+		print STDERR "Count assigner $count_assigner_filename does not exist; ignoring.\n";
+		$has_assigned_counts = 0;
+		return;
+	}
+
+	$has_assigned_counts = 1;
+
+	open (my $input, "<:encoding(utf8)", $count_assigner_filename);
+
+	my $i = 0;
+	while (<$input>) {
+		chomp;
+		next if (/^\#/);
+
+		my ($count, @rest) = split /\s/, $_;
+		$count_assigner[$i] = $count;
+		++$i;
+	}
+}
+
+sub read_analysis {
+	if ($analysis_filename eq 'undef') {
+		return;
+	}
+
+	if (!(-e $analysis_filename)) {
+		print STDERR "Analysis $analysis_filename does not exist; ignoring.\n";
+		return;
+	}
+
+	open (my $input, "<:encoding(utf8)", $analysis_filename);
+
+	my $i = 0;
+	while (<$input>) {
+		chomp;
+		next if (/^\#/);
+
+		my ($letter, $points_per_appearance) = split /\s/, $_;
+		next if (! defined $points_per_appearance);
+
+		next if ($letter =~ /\[/);
+		next if ($letter =~ /\]/);
+
+		$letter =~ s/\|//g;
+
+		$character_points_per_appearance{$letter} = $points_per_appearance;
+	}
+
+	calculate_bounding_points_per_appearance_values();
+}
+
+sub calculate_bounding_points_per_appearance_values {
+	my $i = 0;
+	for my $character (sort { $character_counts{$a} <=> $character_counts{$b} } keys %character_counts) {
+		if (exists $character_counts{$character} && count_of_character($i, $character) > 0) {
+			my $points_per_appearance = $character_points_per_appearance{$character};
+			if ($lowest_points_per_appearance < 0 || $points_per_appearance < $lowest_points_per_appearance) {
+				$lowest_points_per_appearance = $points_per_appearance;
+			}
+
+			if ($points_per_appearance > $highest_points_per_appearance) {
+				$highest_points_per_appearance = $points_per_appearance;
+			}
+		}
+
+		++$i;
+	}
+}
+
+sub read_characters {
+	open (my $input, "<:encoding(utf8)", $distribution_filename);
+
+	my $i = 0;
+	while (<$input>) {
+		last if ($i >= $number_of_distinct_tiles);
+
+		chomp;
+		my ($letter, $count) = split(/\s/, $_);
+
+		$character_counts{$letter} = $count;
+		$count_sum += $count;
+		$inverse_count_sum += exp(-.0005*$count);
+		++$i;
+	}
+}
+
+sub max {
+	return $_[0] > $_[1]? $_[0] : $_[1];
+}
+
+sub value_of_character {
+	my ($letter) = @_;
+	$letter =~ s/\|//g;
+
+	if (exists $character_points_per_appearance{$letter}) {
+		my $value_multiplier = $maximum_value / ($highest_points_per_appearance - $lowest_points_per_appearance);
+
+		return max(floor(($highest_points_per_appearance - $character_points_per_appearance{$letter}) * $value_multiplier), 1);
+	}
+
+	# This was written VERY late at night.
+	#print "computing value of $letter; count_sum=$count_sum; my count=$character_counts{$letter}\n";
+	return max(floor(187 * exp(-.0005*$character_counts{$letter}) / $inverse_count_sum), 1);
+}
+
+sub hallucinate_assigned_counts {
+	my $total_rounded_count = $number_of_blanks;
+	my $i = 0;
+	for my $character (sort { $character_counts{$a} <=> $character_counts{$b} } keys %character_counts) {
+		my $original_count = $character_counts{$character};
+		my $rounded_count;
+
+		my $truth = 100 * $original_count / $count_sum;
+		if ($truth < 0.1) {
+			$rounded_count = 0;
+		} else {
+			$rounded_count = max(floor($truth), 1);
+		}
+
+		$total_rounded_count += $rounded_count;
+		$count_assigner[$i] = $rounded_count;
+
+		++$i;
+	}
+
+	$has_assigned_counts = 1;
+}
+
+sub count_of_character {
+	my ($index, $count) = @_;
+
+	if ($has_assigned_counts && $index < @count_assigner) {
+		return $count_assigner[$index]
+	}
+
+	return 0;
+}
+
+sub spit_characters {
+	if ($counts_only) {
+		my $i = 0;
+		for my $character (sort { $character_counts{$a} <=> $character_counts{$b} } keys %character_counts) {
+			my $count = count_of_character($i, $character_counts{$character});
+			print "$count #" . ($i + 1) . ": $character\n";
+			++$i;
+		}
+		return;
+	}
+
+	my $total_score = 0;
+	my $total_count = $number_of_blanks;
+	print "blank 0 $number_of_blanks\n";
+
+	print "#char\tblank\tscore\tcount\tvowel\n";
+	my $i = 0;
+	for my $character (sort { $character_counts{$a} <=> $character_counts{$b} } keys %character_counts) {
+		my $blank_text = lc($character);
+
+		# TODO lower-case blanks currently broken.
+		my $disable_lower_case_blanks = 1;
+		if ($disable_lower_case_blanks || $blank_text eq $character) {
+			$blank_text = '['.$character.']';
+			$blank_text =~ s/\|//g;
+		}
+
+		my $score = value_of_character($character);
+		my $count = count_of_character($i, $character_counts{$character});
+		$total_count += $count;
+		$total_score += $score;
+
+		my $is_vowel = '0';
+
+		# TODO make work for unfilled jamo
+		if (isStandardForm($character)) {
+			my @codepoints = unpack('U*', $character);
+			if ($codepoints[0] == 0x115F) {
+				$is_vowel = '1';
+			}
+		}
+
+		++$i;
+
+		print "$character\t$blank_text\t$score\t$count\t$is_vowel\n";
+	}
+
+	print "# Total count: $total_count\n";
+	print "# Total score: $total_score\n";
+}
+
+read_characters();
+read_count_assigner();
+if (!$has_assigned_counts) {
+	hallucinate_assigned_counts();
+}
+
+read_analysis();
+spit_characters();
diff --git a/new_language_generation/perl_tools/generate_distribution.pl b/new_language_generation/perl_tools/generate_distribution.pl
new file mode 100755
index 0000000..e86e04a
--- /dev/null
+++ b/new_language_generation/perl_tools/generate_distribution.pl
@@ -0,0 +1,57 @@
+#!/usr/bin/perl
+
+# USAGE: generate_distribution.pl word_list > character_list
+
+use warnings;
+use strict;
+
+use Getopt::Long;
+use Pod::Usage;
+
+my $help = 0;
+
+GetOptions(
+           'help|?' => \$help) or pod2usage(2);
+
+pod2usage(1) if $help;
+
+=pod
+
+=head1 NAME
+
+generate_distribution.pl - Generate letter distribution data
+
+=head1 SYNOPSIS
+
+One argument: the output of generate_words.pl.
+
+=cut
+
+binmode STDOUT, ':utf8';
+
+my %characters;
+
+sub read_characters {
+	for my $arg (@ARGV) {
+		open (my $input, "<:encoding(utf8)", $arg);
+
+		while (<$input>) {
+			chomp;
+			my ($word, $count) = split(/\s/, $_);
+
+			for my $character (split(/;/, $word)) {
+				$characters{$character} += $count;
+			}
+		}
+	}
+}
+
+sub spit_characters {
+	my @sorted_characters = sort { $characters{$b} <=> $characters{$a} } keys %characters;
+	for my $character (@sorted_characters) {
+		print "$character $characters{$character}\n";
+	}
+}
+
+read_characters();
+spit_characters();
diff --git a/new_language_generation/perl_tools/generate_raw_words.pl b/new_language_generation/perl_tools/generate_raw_words.pl
new file mode 100755
index 0000000..b32d810
--- /dev/null
+++ b/new_language_generation/perl_tools/generate_raw_words.pl
@@ -0,0 +1,23 @@
+#!/usr/bin/perl
+
+# USAGE: generate_raw_words.pl generate_words_output
+
+binmode STDOUT, ':utf8';
+
+sub read_and_spit {
+	for my $arg (@ARGV) {
+		open (my $input, "<:encoding(utf8)", $arg);
+
+		while (<$input>) {
+			chomp;
+			my ($word, $count) = split(/\s/, $_);
+
+			my $clean_word = $word;
+			$clean_word =~ s/;//g;
+
+			print "$clean_word\n";
+		}
+	}
+}
+
+read_and_spit();
diff --git a/new_language_generation/perl_tools/generate_tileset.pl b/new_language_generation/perl_tools/generate_tileset.pl
new file mode 100755
index 0000000..0df2e3b
--- /dev/null
+++ b/new_language_generation/perl_tools/generate_tileset.pl
@@ -0,0 +1,103 @@
+#!/usr/bin/perl
+
+use warnings;
+use strict;
+
+use Getopt::Long;
+use Pod::Usage;
+
+my $alphabet_filename = 'undef';
+my $help = 0;
+
+GetOptions('alphabet=s' => \$alphabet_filename,
+		   'help|?' => \$help) or pod2usage(2);
+
+pod2usage(1) if $help;
+
+=pod
+
+=head1 NAME
+
+generate_tileset.pl - Print out HTML tileset
+
+=head1 SYNOPSIS
+
+--alphabet=<file>  
+	output of generate_alphabet.pl
+
+=cut
+
+binmode STDOUT, ':utf8';
+
+# tiles{count}{letter} = score
+my %tiles;
+
+sub read_alphabet {
+	if ($alphabet_filename eq 'undef') {
+		return;
+	}
+
+	open (my $input, "<:encoding(utf8)", $alphabet_filename);
+
+	my $i = 0;
+	while (<$input>) {
+		chomp;
+		next if (/^\#/);
+
+		my ($letter, $blank_text, $score, $count) = split /\s/, $_;
+
+		if ($letter eq 'blank') {
+			$tiles{$score}{"&#12288;"} = 0;
+			next;
+		}
+
+		next if (! defined $count);
+
+		$letter =~ s/\|//g;
+		$tiles{$count}{$letter} = $score;
+	}
+}
+
+sub spit_characters {
+	my $fontsize = "1.2cm";
+	my $score_fontsize = "18pt";
+	#my $color="#458B74";
+	#my $color="black";
+	#my $color="#EAC117";
+	my $color="darkblue";
+	my $width = "5in";
+	my $height = "0";
+
+	print "<html>\n<head>\n<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">\n</head>\n<body bgcolor=white>\n";
+
+	print "<table cellspacing=2 cellpadding=0>\n";
+
+	print "<tr>";
+	my $already_in_row = 0;
+	for my $count (sort keys %tiles) {
+		for my $letter (sort keys %{$tiles{$count}}) {
+			for my $i (1 .. $count) {
+				if ($already_in_row >= 10) {
+					print "</tr><tr>\n";
+					$already_in_row = 0;
+				}
+
+				my $score = $tiles{$count}{$letter};
+				my $align = 'center';
+				print "<td width=$width height=$height align=$align><nobr>\n";
+				print "<span style=\"font-size: $fontsize; color: $color\">$letter</span>";
+				print "<span style=\"font-size: $score_fontsize; color: $color\"><sub>$score</sub></span>";
+				print "</nobr></td>\n";
+
+				++$already_in_row;
+			}
+		}
+	}
+	print "</tr>\n";
+
+	print "</table>\n";
+	print "</body>\n</html>\n";
+}
+
+read_alphabet();
+spit_characters();
diff --git a/new_language_generation/perl_tools/generate_words.pl b/new_language_generation/perl_tools/generate_words.pl
new file mode 100755
index 0000000..858d978
--- /dev/null
+++ b/new_language_generation/perl_tools/generate_words.pl
@@ -0,0 +1,317 @@
+#!/usr/bin/perl
+
+use warnings;
+use strict;
+use Encode qw/encode decode/; 
+use Getopt::Long;
+use Pod::Usage;
+use Unicode::Normalize;
+use Lingua::KO::Hangul::Util qw(:all);
+
+my $granularity = 'letter';
+my $input_encoding = 'utf8';
+my $split_at_spaces_only = 0;
+my $help = 0;
+
+GetOptions('granularity=s' => \$granularity,
+           'input_encoding=s' => \$input_encoding,
+           'split_at_spaces_only' => \$split_at_spaces_only,
+		   'help|?' => \$help) or pod2usage(2);
+
+pod2usage(1) if $help;
+
+=pod
+
+=head1 NAME
+
+generate_words.pl - Generate a word list
+
+=head1 SYNOPSIS
+
+--granularity=<letter|jamo>  
+	how big a letter is. 
+
+--input_encoding=<utf8|euc-kr>
+	incoding of input texts
+
+=cut
+
+my %words;
+
+binmode STDOUT, ':utf8';
+
+sub read_words {
+	@ARGV = map { /\.(gz|Z)$/ ? "gzip -dc $_ |" : $_  } @ARGV;
+
+	while (<ARGV>) {
+		chomp;
+		my @words;
+		if ($split_at_spaces_only) {
+			@words = split(/ /, $_);
+		} else {
+			@words = split(/[\s\w\-\)\(\,\.\;\:\!\"\~\=\'\`\%\<\>\[\]\/\‘\’\?\#\@]/, $_);
+		}
+
+		for my $word (@words) {
+			my $utf8_word;
+			$utf8_word = decode($input_encoding, $word);
+
+			if (!defined $utf8_word || length($utf8_word) == 0) {
+				next;
+			}
+
+			my $normalized_utf8_word = uc($utf8_word);
+
+			my $expanded_word = expand_word($normalized_utf8_word);
+
+			if (length $expanded_word > 0) {
+				$words{$expanded_word}++;
+			}
+		}
+	}
+}
+
+sub expand_word {
+	my ($word) = @_;
+	my @ret;
+
+	if ($granularity eq 'jamo') {
+		@ret = expand_jamo($word);
+	} elsif ($granularity eq 'letter') {
+		@ret = split //, $word;
+	} else {
+		die "Granularity '$granularity' is not recognized.\n";
+	}
+
+	return join ';', @ret;
+}
+
+sub expand_jamo {
+	my ($word) = @_;
+
+	if (length($word) == 0) {
+		return $word;
+	}
+
+	my @syllables = split(//, $word);
+	my @cumulative_jamos; 
+
+	for my $syllable (@syllables) {
+		my $decomposed_string = decomposeSyllable($syllable);
+
+		for my $jamo (split(//, $decomposed_string)) {
+			push @cumulative_jamos, normalize_jamo($jamo);
+		}
+	}
+
+	return @cumulative_jamos;
+}
+
+sub spit_words {
+	for my $word (sort keys %words) {
+		print "$word $words{$word}\n" if (length($word) > 1);
+	}
+}
+
+sub execute {
+	read_words();
+	spit_words();
+}
+
+# Following is Hangul-specific code and then an execute() call.
+# Constants and ideas borrowed from Lingua::KO::Hangul::Util.
+
+use constant SBase  => 0xAC00;
+use constant SFinal => 0xD7A3; # SBase -1 + SCount
+use constant SCount =>  11172; # LCount * NCount
+use constant NCount =>    588; # VCount * TCount
+use constant LBase  => 0x1100;
+use constant LFinal => 0x1112;
+use constant LCount =>     19; # scalar @JamoL
+use constant VBase  => 0x1161;
+use constant VFinal => 0x1175;
+use constant VCount =>     21; # scalar @JamoV
+use constant TBase  => 0x11A7;
+use constant TFinal => 0x11C2;
+use constant TCount =>     28; # scalar @JamoT
+use constant JBase  => 0x1100;
+use constant JFinal => 0x11FF;
+use constant JCount =>    256;
+
+use constant JamoLIni   => 0x1100;
+use constant JamoLFin   => 0x1159;
+use constant JamoLFill  => 0x115F;
+use constant JamoVIni   => 0x1160;
+use constant JamoVFin   => 0x11A2;
+use constant JamoTIni   => 0x11A8;
+use constant JamoTFin   => 0x11F9;
+
+my @JamoL = ( # Initial (HANGUL CHOSEONG)
+    "G", "GG", "N", "D", "DD", "R", "M", "B", "BB",
+    "S", "SS", "", "J", "JJ", "C", "K", "T", "P", "H",
+  );
+
+my @JamoV = ( # Medial  (HANGUL JUNGSEONG)
+    "A", "AE", "YA", "YAE", "EO", "E", "YEO", "YE", "O",
+    "WA", "WAE", "OE", "YO", "U", "WEO", "WE", "WI",
+    "YU", "EU", "YI", "I",
+  );
+
+my @JamoT = ( # Final    (HANGUL JONGSEONG)
+    "", "G", "GG", "GS", "N", "NJ", "NH", "D", "L", "LG", "LM",
+    "LB", "LS", "LT", "LP", "LH", "M", "B", "BS",
+    "S", "SS", "NG", "J", "C", "K", "T", "P", "H",
+  );
+
+my @TtoL = (
+    "", # ""
+	"G", # "G"
+	"G;G", # "GG"
+	"G;S", # "GS"
+	"N", # "N"
+	"N;J", # "NJ"
+	"N;H", # "NH"
+	"D", # "D"
+	"R", # "L"
+	"R;G", # "LG"
+	"R;M", # "LM"
+    "R;B", # "LB"
+	"R;S", # "LS"
+	"R;T", # "LT"
+	"R;P", # "LP"
+	"R;H", # "LH"
+	"M", # "M"
+	"B", # "B"
+	"B;S", # "BS"
+    "S", # "S"
+	"SS", # "SS"
+	"", # "NG"
+	"J", # "J"
+	"C", # "C"
+	"K", # "K"
+	"T", # "T"
+	"P", # "P"
+	"H", # "H"
+);
+
+my(%CodeL, %CodeV, %CodeT);
+@CodeL{@JamoL} = 0 .. LCount-1;
+@CodeV{@JamoV} = 0 .. VCount-1;
+@CodeT{@JamoT} = 0 .. TCount-1;
+
+# Normalizes jamo then inserts fillers
+
+sub normalize_jamo {
+	my ($jamo) = @_;
+
+	my @homogenized = convert_trailing_to_leading($jamo);
+
+	my @homogenized_filled;
+	for my $homogenous_jamo_codepoint (@homogenized) {
+		my $filled = fill_jamo($homogenous_jamo_codepoint);
+		push @homogenized_filled, $filled;
+	}
+
+	return @homogenized_filled;
+}
+
+# Takes a L, V, or T jamo codepoint, fills in the rest,
+# and returns a unicode string.
+sub fill_jamo {
+	my ($codepoint) = @_;
+	my $syllable_type = getSyllableType($codepoint);
+
+	# technique = jamo_alone, raw_jamo, romanize, jamo_letter, or crazy
+	my $technique = 'jamo_letter';
+
+	if ($technique eq 'raw_jamo') {
+		return '|' . pack('U*', $codepoint) . '|';
+	}
+
+	if ($technique eq 'jamo_alone') {
+		if ($syllable_type eq 'T') {
+			return "\x{25CB}";
+		}
+		return insertFiller(pack('U*', $codepoint));
+	}
+
+	if ($technique eq 'romanize') {
+		return romanize_codepoint($codepoint);
+	}
+
+	my $leading;
+	my $vowel;
+	my $trailing = 0;
+
+	if ($syllable_type eq 'L') {
+		$leading = $codepoint - JamoLIni;
+		$vowel = $CodeV{'A'};
+	} elsif ($syllable_type eq 'V') {
+		$leading = $CodeL{''};
+		$vowel = $codepoint - JamoVIni - 1;
+	} elsif ($syllable_type eq 'T') {
+		# Always the circle.
+		$leading = $CodeL{''};
+		$vowel = $CodeV{'A'};
+		$trailing = $CodeT{'NG'};
+	} else {
+		return pack('U*', $codepoint);
+	}
+
+    return pack('U*', SBase + $leading * NCount + $vowel * TCount + $trailing);
+}
+
+sub romanize_codepoint {
+	my ($codepoint) = @_;
+
+	my $roman;
+
+	my $syllable_type = getSyllableType($codepoint);
+	if ($syllable_type eq 'L') {
+		$roman = $JamoL[$codepoint - JamoLIni];
+	} elsif ($syllable_type eq 'V') {
+		$roman = $JamoV[$codepoint - JamoVIni - 1];
+	} elsif ($syllable_type eq 'T') {
+		$roman = "NG";
+	} else {
+		return pack('U*', $codepoint);
+	}
+
+	return "|$roman|";
+}
+
+# Converts a trailing consonant to a string of leading ones.
+# Returns an array of codepoints.
+sub convert_trailing_to_leading {
+	my ($jamo) = @_;
+	my @ret_codepoints = unpack('U*', $jamo);
+
+	die 'normalize_jamo called with more than one codepoint' if (@ret_codepoints > 1);
+	my $codepoint = $ret_codepoints[0];
+
+	# Elide placeholder consonant.
+	if (@ret_codepoints == 1 && $ret_codepoints[0] == JamoLIni + $CodeL{''}) {
+		return ();
+	}
+
+	if (getSyllableType($codepoint) eq 'T') {
+		my $Tindex = $codepoint - JamoTIni + 1;
+
+		my @leaders = split /;/, $TtoL[$Tindex];
+		push @leaders, '' if (@leaders == 0);  # '' gets lost otherwise
+
+		@ret_codepoints = ();
+		for my $leader (@leaders) {
+			my $new_codepoint = JamoLIni + $CodeL{$leader};
+			push @ret_codepoints, $new_codepoint;
+		}
+	}
+
+	if (@ret_codepoints == 1 && $ret_codepoints[0] == JamoLIni + $CodeL{''}) {
+		$ret_codepoints[0] = JamoTIni + $CodeT{'NG'} - 1;
+	}
+
+	return @ret_codepoints;
+}
+
+execute();