1325324Sgordon# Convert tzdata source into a smaller version of itself.
2325324Sgordon
3325324Sgordon# Contributed by Paul Eggert.  This file is in the public domain.
4325324Sgordon
5325324Sgordon# This is not a general-purpose converter; it is designed for current tzdata.
6325324Sgordon# 'zic' should treat this script's output as if it were identical to
7325324Sgordon# this script's input.
8325324Sgordon
9325324Sgordon
10325324Sgordon# Return a new rule name.
11325324Sgordon# N_RULE_NAMES keeps track of how many rule names have been generated.
12325324Sgordon
13325324Sgordonfunction gen_rule_name(alphabet, base, rule_name, n, digit)
14325324Sgordon{
15325324Sgordon  alphabet = ""
16325324Sgordon  alphabet = alphabet "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
17325324Sgordon  alphabet = alphabet "abcdefghijklmnopqrstuvwxyz"
18325324Sgordon  alphabet = alphabet "!$%&'()*+,./:;<=>?@[\\]^_`{|}~"
19325324Sgordon  base = length(alphabet)
20325324Sgordon  rule_name = ""
21325324Sgordon  n = n_rule_names++
22325324Sgordon
23325324Sgordon  do {
24325324Sgordon    n -= rule_name && n <= base
25325324Sgordon    digit = n % base
26325324Sgordon    rule_name = substr(alphabet, digit + 1, 1) rule_name
27325324Sgordon    n = (n - digit) / base
28325324Sgordon  } while (n);
29325324Sgordon
30325324Sgordon  return rule_name
31325324Sgordon}
32325324Sgordon
33325324Sgordon# Process an input line and save it for later output.
34325324Sgordon
35325324Sgordonfunction process_input_line(line, field, end, i, n, startdef)
36325324Sgordon{
37325324Sgordon  # Remove comments, normalize spaces, and append a space to each line.
38325324Sgordon  sub(/#.*/, "", line)
39325324Sgordon  line = line " "
40325324Sgordon  gsub(/[[:space:]]+/, " ", line)
41325324Sgordon
42325324Sgordon  # Abbreviate keywords.  Do not abbreviate "Link" to just "L",
43325324Sgordon  # as pre-2017c zic erroneously diagnoses "Li" as ambiguous.
44325324Sgordon  sub(/^Link /, "Li ", line)
45325324Sgordon  sub(/^Rule /, "R ", line)
46325324Sgordon  sub(/^Zone /, "Z ", line)
47325324Sgordon
48325324Sgordon  # SystemV rules are not needed.
49325324Sgordon  if (line ~ /^R SystemV /) return
50325324Sgordon
51325324Sgordon  # Replace FooAsia rules with the same rules without "Asia", as they
52325324Sgordon  # are duplicates.
53325324Sgordon  if (match(line, /[^ ]Asia /)) {
54325324Sgordon    if (line ~ /^R /) return
55325324Sgordon    line = substr(line, 1, RSTART) substr(line, RSTART + 5)
56325324Sgordon  }
57325324Sgordon
58325324Sgordon  # Abbreviate times.
59325324Sgordon  while (match(line, /[: ]0+[0-9]/))
60325324Sgordon    line = substr(line, 1, RSTART) substr(line, RSTART + RLENGTH - 1)
61325324Sgordon  while (match(line, /:0[^:]/))
62325324Sgordon    line = substr(line, 1, RSTART - 1) substr(line, RSTART + 2)
63325324Sgordon
64325324Sgordon  # Abbreviate weekday names.  Do not abbreviate "Sun" and "Sat", as
65325324Sgordon  # pre-2017c zic erroneously diagnoses "Su" and "Sa" as ambiguous.
66325324Sgordon  while (match(line, / (last)?(Mon|Wed|Fri)[ <>]/)) {
67325324Sgordon    end = RSTART + RLENGTH
68325324Sgordon    line = substr(line, 1, end - 4) substr(line, end - 1)
69325324Sgordon  }
70325324Sgordon  while (match(line, / (last)?(Tue|Thu)[ <>]/)) {
71325324Sgordon    end = RSTART + RLENGTH
72325324Sgordon    line = substr(line, 1, end - 3) substr(line, end - 1)
73325324Sgordon  }
74325324Sgordon
75325324Sgordon  # Abbreviate "max", "only" and month names.
76325324Sgordon  # Do not abbreviate "min", as pre-2017c zic erroneously diagnoses "mi"
77325324Sgordon  # as ambiguous.
78325324Sgordon  gsub(/ max /, " ma ", line)
79325324Sgordon  gsub(/ only /, " o ", line)
80325324Sgordon  gsub(/ Jan /, " Ja ", line)
81325324Sgordon  gsub(/ Feb /, " F ", line)
82325324Sgordon  gsub(/ Apr /, " Ap ", line)
83325324Sgordon  gsub(/ Aug /, " Au ", line)
84325324Sgordon  gsub(/ Sep /, " S ", line)
85325324Sgordon  gsub(/ Oct /, " O ", line)
86325324Sgordon  gsub(/ Nov /, " N ", line)
87325324Sgordon  gsub(/ Dec /, " D ", line)
88325324Sgordon
89325324Sgordon  # Strip leading and trailing space.
90325324Sgordon  sub(/^ /, "", line)
91325324Sgordon  sub(/ $/, "", line)
92325324Sgordon
93325324Sgordon  # Remove unnecessary trailing zero fields.
94325324Sgordon  sub(/ 0+$/, "", line)
95325324Sgordon
96325324Sgordon  # Remove unnecessary trailing days-of-month "1".
97325324Sgordon  if (match(line, /[[:alpha:]] 1$/))
98325324Sgordon    line = substr(line, 1, RSTART)
99325324Sgordon
100325324Sgordon  # Remove unnecessary trailing " Ja" (for January).
101325324Sgordon  sub(/ Ja$/, "", line)
102325324Sgordon
103325324Sgordon  n = split(line, field)
104325324Sgordon
105325324Sgordon  # Abbreviate rule names.
106325324Sgordon  i = field[1] == "Z" ? 4 : field[1] == "Li" ? 0 : 2
107325324Sgordon  if (i && field[i] ~ /^[^-+0-9]/) {
108325324Sgordon    if (!rule[field[i]])
109325324Sgordon      rule[field[i]] = gen_rule_name()
110325324Sgordon    field[i] = rule[field[i]]
111325324Sgordon  }
112325324Sgordon
113325324Sgordon  # If this zone supersedes an earlier one, delete the earlier one
114325324Sgordon  # from the saved output lines.
115325324Sgordon  startdef = ""
116325324Sgordon  if (field[1] == "Z")
117325324Sgordon    zonename = startdef = field[2]
118325324Sgordon  else if (field[1] == "Li")
119325324Sgordon    zonename = startdef = field[3]
120325324Sgordon  else if (field[1] == "R")
121325324Sgordon    zonename = ""
122325324Sgordon  if (startdef) {
123325324Sgordon    i = zonedef[startdef]
124325324Sgordon    if (i) {
125325324Sgordon      do
126325324Sgordon	output_line[i - 1] = ""
127325324Sgordon      while (output_line[i++] ~ /^[-+0-9]/);
128325324Sgordon    }
129325324Sgordon  }
130325324Sgordon  zonedef[zonename] = nout + 1
131325324Sgordon
132325324Sgordon  # Save the line for later output.
133325324Sgordon  line = field[1]
134325324Sgordon  for (i = 2; i <= n; i++)
135325324Sgordon    line = line " " field[i]
136325324Sgordon  output_line[nout++] = line
137325324Sgordon}
138325324Sgordon
139325324Sgordonfunction output_saved_lines(i)
140325324Sgordon{
141325324Sgordon  for (i = 0; i < nout; i++)
142325324Sgordon    if (output_line[i])
143325324Sgordon      print output_line[i]
144325324Sgordon}
145325324Sgordon
146325324SgordonBEGIN {
147325324Sgordon  print "# This zic input file is in the public domain."
148325324Sgordon}
149325324Sgordon
150325324Sgordon/^[[:space:]]*[^#[:space:]]/ {
151325324Sgordon  process_input_line($0)
152325324Sgordon}
153325324Sgordon
154325324SgordonEND {
155325324Sgordon  output_saved_lines()
156325324Sgordon}
157