1# Convert tzdata source into vanguard or rearguard form.
2
3# Contributed by Paul Eggert.  This file is in the public domain.
4
5# This is not a general-purpose converter; it is designed for current tzdata.
6# It just converts from current source to main, vanguard, and rearguard forms.
7# Although it might be nice for it to be idempotent, or to be useful
8# for converting back and forth between vanguard and rearguard formats,
9# it does not do these nonessential tasks now.
10#
11# Although main and vanguard forms are currently equivalent,
12# this need not always be the case.  When the two forms differ,
13# this script can convert either from main to vanguard form (needed then),
14# or from vanguard to main form (this conversion would be needed later,
15# after main became rearguard and vanguard became main).
16# There is no need to convert rearguard to other forms.
17#
18# When converting to vanguard form, the output can use the line
19# "Zone GMT 0 - GMT" which TZUpdater 2.3.2 mistakenly rejects.
20#
21# When converting to vanguard form, the output can use negative SAVE
22# values.
23#
24# When converting to rearguard form, the output uses only nonnegative
25# SAVE values.  The idea is for the output data to simulate the behavior
26# of the input data as best it can within the constraints of the
27# rearguard format.
28
29# Given a FIELD like "-0:30", return a minute count like -30.
30function get_minutes(field, \
31		     sign, hours, minutes)
32{
33  sign = field ~ /^-/ ? -1 : 1
34  hours = +field
35  if (field ~ /:/) {
36    minutes = field
37    sub(/[^:]*:/, "", minutes)
38  }
39  return 60 * hours + sign * minutes
40}
41
42# Given an OFFSET, which is a minute count like 300 or 330,
43# return a %z-style abbreviation like "+05" or "+0530".
44function offset_abbr(offset, \
45		     hours, minutes, sign)
46{
47  hours = int(offset / 60)
48  minutes = offset % 60
49  if (minutes) {
50    return sprintf("%+.4d", hours * 100 + minutes);
51  } else {
52    return sprintf("%+.2d", hours)
53  }
54}
55
56# Round TIMESTAMP (a +-hh:mm:ss.dddd string) to the nearest second.
57function round_to_second(timestamp, \
58			 hh, mm, ss, seconds, dot_dddd, subseconds)
59{
60  dot_dddd = timestamp
61  if (!sub(/^[+-]?[0-9]+:[0-9]+:[0-9]+\./, ".", dot_dddd))
62    return timestamp
63  hh = mm = ss = timestamp
64  sub(/^[-+]?[0-9]+:[0-9]+:/, "", ss)
65  sub(/^[-+]?[0-9]+:/, "", mm)
66  sub(/^[-+]?/, "", hh)
67  seconds = 3600 * hh + 60 * mm + ss
68  subseconds = +dot_dddd
69  seconds += 0.5 < subseconds || ((subseconds == 0.5) && (seconds % 2));
70  return sprintf("%s%d:%.2d:%.2d", timestamp ~ /^-/ ? "-" : "", \
71		 seconds / 3600, seconds / 60 % 60, seconds % 60)
72}
73
74BEGIN {
75  dataform_type["vanguard"] = 1
76  dataform_type["main"] = 1
77  dataform_type["rearguard"] = 1
78
79  if (PACKRATLIST) {
80    while (getline <PACKRATLIST) {
81      if ($0 ~ /^#/) continue
82      packratlist[$3] = 1
83    }
84  }
85
86  # The command line should set DATAFORM.
87  if (!dataform_type[DATAFORM]) exit 1
88}
89
90$1 == "#PACKRATLIST" && $2 == PACKRATLIST {
91  sub(/^#PACKRATLIST[\t ]+[^\t ]+[\t ]+/, "")
92}
93
94/^Zone/ { zone = $2 }
95
96DATAFORM != "main" {
97  in_comment = $0 ~ /^#/
98  uncomment = comment_out = 0
99
100  # If this line should differ due to Czechoslovakia using negative SAVE values,
101  # uncomment the desired version and comment out the undesired one.
102  if (zone == "Europe/Prague" && $0 ~ /^#?[\t ]+[01]:00[\t ]/ \
103      && $0 ~ /1947 Feb 23/) {
104    if (($(in_comment + 2) != "-") == (DATAFORM != "rearguard")) {
105      uncomment = in_comment
106    } else {
107      comment_out = !in_comment
108    }
109  }
110
111  # If this line should differ due to Ireland using negative SAVE values,
112  # uncomment the desired version and comment out the undesired one.
113  Rule_Eire = $0 ~ /^#?Rule[\t ]+Eire[\t ]/
114  Zone_Dublin_post_1968 \
115    = (zone == "Europe/Dublin" && $0 ~ /^#?[\t ]+[01]:00[\t ]/ \
116       && (!$(in_comment + 4) || 1968 < $(in_comment + 4)))
117  if (Rule_Eire || Zone_Dublin_post_1968) {
118    if ((Rule_Eire \
119	 || (Zone_Dublin_post_1968 && $(in_comment + 3) == "IST/GMT"))	\
120	== (DATAFORM != "rearguard")) {
121      uncomment = in_comment
122    } else {
123      comment_out = !in_comment
124    }
125  }
126
127  # If this line should differ due to Namibia using negative SAVE values,
128  # uncomment the desired version and comment out the undesired one.
129  Rule_Namibia = $0 ~ /^#?Rule[\t ]+Namibia[\t ]/
130  Zone_using_Namibia_rule \
131    = (zone == "Africa/Windhoek" && $0 ~ /^#?[\t ]+[12]:00[\t ]/ \
132       && ($(in_comment + 2) == "Namibia" \
133	   || ($(in_comment + 2) == "-" && $(in_comment + 3) == "CAT" \
134	       && ((1994 <= $(in_comment + 4) && $(in_comment + 4) <= 2017) \
135		   || in_comment + 3 == NF))))
136  if (Rule_Namibia || Zone_using_Namibia_rule) {
137    if ((Rule_Namibia \
138	 ? ($9 ~ /^-/ || ($9 == 0 && $10 == "CAT")) \
139	 : $(in_comment + 1) == "2:00" && $(in_comment + 2) == "Namibia") \
140	== (DATAFORM != "rearguard")) {
141      uncomment = in_comment
142    } else {
143      comment_out = !in_comment
144    }
145  }
146
147  # If this line should differ due to Portugal benefiting from %z if supported,
148  # uncomment the desired version and comment out the undesired one.
149  if ($0 ~ /^#?[\t ]+-[12]:00[\t ]+Port[\t ]+[%+-]/) {
150    if (($0 ~ /%z/) == (DATAFORM == "vanguard")) {
151      uncomment = in_comment
152    } else {
153      comment_out = !in_comment
154    }
155  }
156
157  # In vanguard form, use the line "Zone GMT 0 - GMT" instead of
158  # "Zone Etc/GMT 0 - GMT" and adjust Link lines accordingly.
159  # This works around a bug in TZUpdater 2.3.2.
160  if (/^#?(Zone|Link)[\t ]+(Etc\/)?GMT[\t ]/) {
161    if (($2 == "GMT") == (DATAFORM == "vanguard")) {
162      uncomment = in_comment
163    } else {
164      comment_out = !in_comment
165    }
166  }
167
168  if (uncomment) {
169    sub(/^#/, "")
170  }
171  if (comment_out) {
172    sub(/^/, "#")
173  }
174
175  # Prefer %z in vanguard form, explicit abbreviations otherwise.
176  if (DATAFORM == "vanguard") {
177    sub(/^(Zone[\t ]+[^\t ]+)?[\t ]+[^\t ]+[\t ]+[^\t ]+[\t ]+[-+][^\t ]+/, \
178	"&CHANGE-TO-%z")
179    sub(/-00CHANGE-TO-%z/, "-00")
180    sub(/[-+][^\t ]+CHANGE-TO-/, "")
181  } else {
182    if ($0 ~ /^[^#]*%z/) {
183      stdoff_column = 2 * ($0 ~ /^Zone/) + 1
184      rules_column = stdoff_column + 1
185      stdoff = get_minutes($stdoff_column)
186      rules = $rules_column
187      stdabbr = offset_abbr(stdoff)
188      if (rules == "-") {
189	abbr = stdabbr
190      } else {
191	dstabbr_only = rules ~ /^[+0-9-]/
192	if (dstabbr_only) {
193	  dstoff = get_minutes(rules)
194	} else {
195	  # The DST offset is normally an hour, but there are special cases.
196	  if (rules == "Morocco" && NF == 3) {
197	    dstoff = -60
198	  } else if (rules == "NBorneo") {
199	    dstoff = 20
200	  } else if (((rules == "Cook" || rules == "LH") && NF == 3) \
201		     || (rules == "Uruguay" \
202			 && $0 ~ /[\t ](1942 Dec 14|1960|1970|1974 Dec 22)$/)) {
203	    dstoff = 30
204	  } else if (rules == "Uruguay" && $0 ~ /[\t ]1974 Mar 10$/) {
205	    dstoff = 90
206	  } else {
207	    dstoff = 60
208	  }
209	}
210	dstabbr = offset_abbr(stdoff + dstoff)
211	if (dstabbr_only) {
212	  abbr = dstabbr
213	} else {
214	  abbr = stdabbr "/" dstabbr
215	}
216      }
217      sub(/%z/, abbr)
218    }
219  }
220
221  # Normally, prefer whole seconds.  However, prefer subseconds
222  # if generating vanguard form and the otherwise-undocumented
223  # VANGUARD_SUBSECONDS environment variable is set.
224  # This relies on #STDOFF comment lines in the data.
225  # It is for hypothetical clients that support UT offsets that are
226  # not integer multiples of one second (e.g., Europe/Lisbon, 1884 to 1912).
227  # No known clients need this currently, and this experimental
228  # feature may be changed or withdrawn in future releases.
229  if ($1 == "#STDOFF") {
230    stdoff = $2
231    rounded_stdoff = round_to_second(stdoff)
232    if (DATAFORM == "vanguard" && ENVIRON["VANGUARD_SUBSECONDS"]) {
233      stdoff_subst[0] = rounded_stdoff
234      stdoff_subst[1] = stdoff
235    } else {
236      stdoff_subst[0] = stdoff
237      stdoff_subst[1] = rounded_stdoff
238    }
239  } else if (stdoff_subst[0]) {
240    stdoff_column = 2 * ($0 ~ /^Zone/) + 1
241    stdoff_column_val = $stdoff_column
242    if (stdoff_column_val == stdoff_subst[0]) {
243      sub(stdoff_subst[0], stdoff_subst[1])
244    } else if (stdoff_column_val != stdoff_subst[1]) {
245      stdoff_subst[0] = 0
246    }
247  }
248
249  # In rearguard form, change the Japan rule line with "Sat>=8 25:00"
250  # to "Sun>=9 1:00", to cater to zic before 2007 and to older Java.
251  if ($0 ~ /^Rule/ && $2 == "Japan") {
252    if (DATAFORM == "rearguard") {
253      if ($7 == "Sat>=8" && $8 == "25:00") {
254	sub(/Sat>=8/, "Sun>=9")
255	sub(/25:00/, " 1:00")
256      }
257    } else {
258      if ($7 == "Sun>=9" && $8 == "1:00") {
259	sub(/Sun>=9/, "Sat>=8")
260	sub(/ 1:00/, "25:00")
261      }
262    }
263  }
264
265  # In rearguard form, change the Morocco lines with negative SAVE values
266  # to use positive SAVE values.
267  if ($2 == "Morocco") {
268    if ($0 ~ /^Rule/) {
269      if ($4 ~ /^201[78]$/ && $6 == "Oct") {
270	if (DATAFORM == "rearguard") {
271	  sub(/\t2018\t/, "\t2017\t")
272	} else {
273	  sub(/\t2017\t/, "\t2018\t")
274	}
275      }
276
277      if (2019 <= $3) {
278	if ($8 == "2:00") {
279	  if (DATAFORM == "rearguard") {
280	    sub(/\t0\t/, "\t1:00\t")
281	  } else {
282	    sub(/\t1:00\t/, "\t0\t")
283	  }
284	} else {
285	  if (DATAFORM == "rearguard") {
286	    sub(/\t-1:00\t/, "\t0\t")
287	  } else {
288	    sub(/\t0\t/, "\t-1:00\t")
289	  }
290	}
291      }
292    }
293    if ($1 ~ /^[+0-9-]/ && NF == 3) {
294      if (DATAFORM == "rearguard") {
295	sub(/1:00\tMorocco/, "0:00\tMorocco")
296	sub(/\t\+01\/\+00$/, "\t+00/+01")
297      } else {
298	sub(/0:00\tMorocco/, "1:00\tMorocco")
299	sub(/\t\+00\/+01$/, "\t+01/+00")
300      }
301    }
302  }
303}
304
305/^Zone/ {
306  packrat_ignored = FILENAME == PACKRATDATA && PACKRATLIST && !packratlist[$2];
307}
308{
309  if (packrat_ignored && $0 !~ /^Rule/) {
310    sub(/^/, "#")
311  }
312}
313
314# Return a link line resulting by changing OLDLINE to link to TARGET
315# from LINKNAME, instead of linking to OLDTARGET from LINKNAME.
316# Align data columns the same as they were in OLDLINE.
317# Also, replace any existing white space followed by comment with COMMENT.
318function make_linkline(oldline, target, linkname, oldtarget, comment, \
319		       oldprefix, oldprefixlen, oldtargettabs, \
320		       replsuffix, targettabs)
321{
322  oldprefix = "Link\t" oldtarget "\t"
323  oldprefixlen = length(oldprefix)
324  if (substr(oldline, 1, oldprefixlen) == oldprefix) {
325    # Use tab stops to preserve LINKNAME's column.
326    replsuffix = substr(oldline, oldprefixlen + 1)
327    sub(/[\t ]*#.*/, "", replsuffix)
328    oldtargettabs = int(length(oldtarget) / 8) + 1
329    targettabs = int(length(target) / 8) + 1
330    for (; targettabs < oldtargettabs; targettabs++) {
331      replsuffix = "\t" replsuffix
332    }
333    for (; oldtargettabs < targettabs && replsuffix ~ /^\t/; targettabs--) {
334      replsuffix = substr(replsuffix, 2)
335    }
336  } else {
337    # Odd format line; don't bother lining up its replacement nicely.
338    replsuffix = linkname
339  }
340  return "Link\t" target "\t" replsuffix comment
341}
342
343/^Link/ && $4 == "#=" && DATAFORM == "vanguard" {
344  $0 = make_linkline($0, $5, $3, $2)
345}
346
347# If a Link line is followed by a Link or Zone line for the same data, comment
348# out the Link line.  This can happen if backzone overrides a Link
349# with a Zone or a different Link.
350/^Zone/ {
351  sub(/^Link/, "#Link", line[linkline[$2]])
352}
353/^Link/ {
354  sub(/^Link/, "#Link", line[linkline[$3]])
355  linkline[$3] = NR
356  linktarget[$3] = $2
357}
358
359{ line[NR] = $0 }
360
361function cut_link_chains_short( \
362			       l, linkname, t, target)
363{
364  for (linkname in linktarget) {
365    target = linktarget[linkname]
366    t = linktarget[target]
367    if (t) {
368      # TARGET is itself a link name.  Replace the line "Link TARGET LINKNAME"
369      # with "Link T LINKNAME #= TARGET", where T is at the end of the chain
370      # of links that LINKNAME points to.
371      while ((u = linktarget[t])) {
372	t = u
373      }
374      l = linkline[linkname]
375      line[l] = make_linkline(line[l], t, linkname, target, "\t#= " target)
376    }
377  }
378}
379
380END {
381  if (DATAFORM != "vanguard") {
382    cut_link_chains_short()
383  }
384  for (i = 1; i <= NR; i++)
385    print line[i]
386}
387