1114402Sru#! /usr/bin/perl
2114402Sru#
3114402Sru#
4114402Sru# hyphenex.pl
5114402Sru#
6114402Sru# This small filter converts a hyphenation exception log article for
7114402Sru# TUGBoat to a real \hyphenation block.
8114402Sru#
9114402Sru# Written by Werner Lemberg <wl@gnu.org>.
10114402Sru#
11114402Sru# Version 1.1 (2003/04/17)
12114402Sru#
13114402Sru# Public domain.
14114402Sru#
15114402Sru#
16114402Sru# Usage:
17114402Sru#
18114402Sru#   [perl] hyphenex.pl < tugboat-article > hyphenation-exceptions
19114402Sru
20114402Sru# print header
21114402Sruprint "% Hyphenation exceptions for US English,\n";
22114402Sruprint "% based on the hyphenation exception log article in TUGBoat.\n";
23114402Sruprint "%\n";
24114402Sruprint "% This is an automatically generated file.  Do not edit!\n";
25114402Sruprint "%\n";
26114402Sruprint "% Please contact Barbara Beeton <bnb\@ams.org>\n";
27114402Sruprint "% for corrections and omissions.\n";
28114402Sruprint "\n";
29114402Sruprint "\\hyphenation{\n";
30114402Sru
31114402Sruwhile (<>) {
32114402Sru  # retain only lines starting with \1 ... \6 or \tabalign
33114402Sru  next if not (m/^\\[123456]/ || m/^\\tabalign/);
34114402Sru  # remove final newline
35114402Sru  chop;
36114402Sru  # remove all TeX commands except \1 ... \6
37114402Sru  s/\\[^123456\s{]+//g;
38114402Sru  # remove all paired { ... }
39114402Sru  1 while s/{(.*?)}/\1/g;
40114402Sru  # skip lines which now have only whitespace before `&'
41114402Sru  next if m/^\s*&/;
42114402Sru  # remove comments
43114402Sru  s/%.*//;
44114402Sru  # remove trailing whitespace
45114402Sru  s/\s*$//;
46114402Sru  # remove trailing `*' (used as a marker in the document)
47114402Sru  s/\*$//;
48114402Sru  # split at whitespace
49114402Sru  @field = split(' ');
50114402Sru  if ($field[0] eq "\\1" || $field[0] eq "\\4") {
51114402Sru    print "  $field[2]\n";
52114402Sru  }
53114402Sru  elsif ($field[0] eq "\\2" || $field[0] eq "\\5") {
54114402Sru    print "  $field[2]\n";
55114402Sru    # handle multiple suffixes separated by commata
56114402Sru    @suffix_list = split(/,/, "$field[3]");
57114402Sru    foreach $suffix (@suffix_list) {
58114402Sru      print "  $field[2]$suffix\n";
59114402Sru    }
60114402Sru  }
61114402Sru  elsif ($field[0] eq "\\3" || $field[0] eq "\\6") {
62114402Sru    # handle multiple suffixes separated by commata
63114402Sru    @suffix_list = split(/,/, "$field[3],$field[4]");
64114402Sru    foreach $suffix (@suffix_list) {
65114402Sru      print "  $field[2]$suffix\n";
66114402Sru    }
67114402Sru  }
68114402Sru  else {
69114402Sru    # for `&', split at `&' with trailing whitespace
70114402Sru    @field = split(/&\s*/);
71114402Sru    print "  $field[1]\n";
72114402Sru  }
73114402Sru}
74114402Sru
75114402Sru# print trailer
76114402Sruprint "}\n";
77114402Sruprint "\n";
78114402Sruprint "% EOF\n";
79