1129198Scognet# Convert tzdata source into vanguard or rearguard form.
2129198Scognet
3139735Simp# Contributed by Paul Eggert.  This file is in the public domain.
4129198Scognet
5129198Scognet# This is not a general-purpose converter; it is designed for current tzdata.
6129198Scognet# It just converts from current source to main, vanguard, and rearguard forms.
7129198Scognet# Although it might be nice for it to be idempotent, or to be useful
8129198Scognet# for converting back and forth between vanguard and rearguard formats,
9129198Scognet# it does not do these nonessential tasks now.
10129198Scognet#
11129198Scognet# Although main and vanguard forms are currently equivalent,
12129198Scognet# this need not always be the case.  When the two forms differ,
13129198Scognet# this script can convert either from main to vanguard form (needed then),
14129198Scognet# or from vanguard to main form (this conversion would be needed later,
15129198Scognet# after main became rearguard and vanguard became main).
16129198Scognet# There is no need to convert rearguard to other forms.
17129198Scognet#
18129198Scognet# When converting to vanguard form, the output can use the line
19129198Scognet# "Zone GMT 0 - GMT" which TZUpdater 2.3.2 mistakenly rejects.
20129198Scognet#
21129198Scognet# When converting to vanguard form, the output can use negative SAVE
22129198Scognet# values.
23129198Scognet#
24129198Scognet# When converting to rearguard form, the output uses only nonnegative
25129198Scognet# SAVE values.  The idea is for the output data to simulate the behavior
26129198Scognet# of the input data as best it can within the constraints of the
27129198Scognet# rearguard format.
28129198Scognet
29129198Scognet# Given a FIELD like "-0:30", return a minute count like -30.
30129198Scognetfunction get_minutes(field, \
31129198Scognet		     sign, hours, minutes)
32129198Scognet{
33129198Scognet  sign = field ~ /^-/ ? -1 : 1
34129198Scognet  hours = +field
35129198Scognet  if (field ~ /:/) {
36129198Scognet    minutes = field
37139735Simp    sub(/[^:]*:/, "", minutes)
38129198Scognet  }
39129198Scognet  return 60 * hours + sign * minutes
40129198Scognet}
41129198Scognet
42129198Scognet# Given an OFFSET, which is a minute count like 300 or 330,
43129198Scognet# return a %z-style abbreviation like "+05" or "+0530".
44129198Scognetfunction offset_abbr(offset, \
45129198Scognet		     hours, minutes, sign)
46129198Scognet{
47129198Scognet  hours = int(offset / 60)
48129198Scognet  minutes = offset % 60
49129198Scognet  if (minutes) {
50129198Scognet    return sprintf("%+.4d", hours * 100 + minutes);
51129198Scognet  } else {
52129198Scognet    return sprintf("%+.2d", hours)
53129198Scognet  }
54129198Scognet}
55129198Scognet
56129198Scognet# Round TIMESTAMP (a +-hh:mm:ss.dddd string) to the nearest second.
57129198Scognetfunction round_to_second(timestamp, \
58129198Scognet			 hh, mm, ss, seconds, dot_dddd, subseconds)
59129198Scognet{
60129198Scognet  dot_dddd = timestamp
61129198Scognet  if (!sub(/^[+-]?[0-9]+:[0-9]+:[0-9]+\./, ".", dot_dddd))
62129198Scognet    return timestamp
63129198Scognet  hh = mm = ss = timestamp
64129198Scognet  sub(/^[-+]?[0-9]+:[0-9]+:/, "", ss)
65129198Scognet  sub(/^[-+]?[0-9]+:/, "", mm)
66129198Scognet  sub(/^[-+]?/, "", hh)
67129198Scognet  seconds = 3600 * hh + 60 * mm + ss
68129198Scognet  subseconds = +dot_dddd
69129198Scognet  seconds += 0.5 < subseconds || ((subseconds == 0.5) && (seconds % 2));
70129198Scognet  return sprintf("%s%d:%.2d:%.2d", timestamp ~ /^-/ ? "-" : "", \
71129198Scognet		 seconds / 3600, seconds / 60 % 60, seconds % 60)
72129198Scognet}
73129198Scognet
74129198ScognetBEGIN {
75129198Scognet  dataform_type["vanguard"] = 1
76129198Scognet  dataform_type["main"] = 1
77129198Scognet  dataform_type["rearguard"] = 1
78129198Scognet
79129198Scognet  if (PACKRATLIST) {
80129198Scognet    while (getline <PACKRATLIST) {
81137274Scognet      if ($0 ~ /^#/) continue
82137274Scognet      packratlist[$3] = 1
83129198Scognet    }
84129198Scognet  }
85129198Scognet
86129198Scognet  # The command line should set DATAFORM.
87129198Scognet  if (!dataform_type[DATAFORM]) exit 1
88129198Scognet}
89129198Scognet
90129198Scognet$1 == "#PACKRATLIST" && $2 == PACKRATLIST {
91129198Scognet  sub(/^#PACKRATLIST[\t ]+[^\t ]+[\t ]+/, "")
92129198Scognet}
93129198Scognet
94129198Scognet/^Zone/ { zone = $2 }
95129198Scognet
96129198ScognetDATAFORM != "main" {
97129198Scognet  in_comment = $0 ~ /^#/
98129198Scognet  uncomment = comment_out = 0
99129198Scognet
100129198Scognet  # If this line should differ due to Czechoslovakia using negative SAVE values,
101129198Scognet  # uncomment the desired version and comment out the undesired one.
102129198Scognet  if (zone == "Europe/Prague" && $0 ~ /^#?[\t ]+[01]:00[\t ]/ \
103129198Scognet      && $0 ~ /1947 Feb 23/) {
104129198Scognet    if (($(in_comment + 2) != "-") == (DATAFORM != "rearguard")) {
105129198Scognet      uncomment = in_comment
106129198Scognet    } else {
107129198Scognet      comment_out = !in_comment
108129198Scognet    }
109129198Scognet  }
110129198Scognet
111129198Scognet  # If this line should differ due to Ireland using negative SAVE values,
112129198Scognet  # uncomment the desired version and comment out the undesired one.
113129198Scognet  Rule_Eire = $0 ~ /^#?Rule[\t ]+Eire[\t ]/
114129198Scognet  Zone_Dublin_post_1968 \
115129198Scognet    = (zone == "Europe/Dublin" && $0 ~ /^#?[\t ]+[01]:00[\t ]/ \
116129198Scognet       && (!$(in_comment + 4) || 1968 < $(in_comment + 4)))
117129198Scognet  if (Rule_Eire || Zone_Dublin_post_1968) {
118129198Scognet    if ((Rule_Eire \
119129198Scognet	 || (Zone_Dublin_post_1968 && $(in_comment + 3) == "IST/GMT"))	\
120129198Scognet	== (DATAFORM != "rearguard")) {
121129198Scognet      uncomment = in_comment
122129198Scognet    } else {
123129198Scognet      comment_out = !in_comment
124129198Scognet    }
125129198Scognet  }
126129198Scognet
127129198Scognet  # If this line should differ due to Namibia using negative SAVE values,
128129198Scognet  # uncomment the desired version and comment out the undesired one.
129129198Scognet  Rule_Namibia = $0 ~ /^#?Rule[\t ]+Namibia[\t ]/
130171780Scognet  Zone_using_Namibia_rule \
131171780Scognet    = (zone == "Africa/Windhoek" && $0 ~ /^#?[\t ]+[12]:00[\t ]/ \
132135655Scognet       && ($(in_comment + 2) == "Namibia" \
133135655Scognet	   || ($(in_comment + 2) == "-" && $(in_comment + 3) == "CAT" \
134129198Scognet	       && ((1994 <= $(in_comment + 4) && $(in_comment + 4) <= 2017) \
135135655Scognet		   || in_comment + 3 == NF))))
136137274Scognet  if (Rule_Namibia || Zone_using_Namibia_rule) {
137135655Scognet    if ((Rule_Namibia \
138129198Scognet	 ? ($9 ~ /^-/ || ($9 == 0 && $10 == "CAT")) \
139137274Scognet	 : $(in_comment + 1) == "2:00" && $(in_comment + 2) == "Namibia") \
140135655Scognet	== (DATAFORM != "rearguard")) {
141135655Scognet      uncomment = in_comment
142135655Scognet    } else {
143135655Scognet      comment_out = !in_comment
144135655Scognet    }
145135655Scognet  }
146135655Scognet
147135655Scognet  # If this line should differ due to Portugal benefiting from %z if supported,
148135655Scognet  # uncomment the desired version and comment out the undesired one.
149135655Scognet  if ($0 ~ /^#?[\t ]+-[12]:00[\t ]+Port[\t ]+[%+-]/) {
150135655Scognet    if (($0 ~ /%z/) == (DATAFORM == "vanguard")) {
151135655Scognet      uncomment = in_comment
152135655Scognet    } else {
153135655Scognet      comment_out = !in_comment
154135655Scognet    }
155135655Scognet  }
156135655Scognet
157135655Scognet  # In vanguard form, use the line "Zone GMT 0 - GMT" instead of
158135655Scognet  # "Zone Etc/GMT 0 - GMT" and adjust Link lines accordingly.
159135655Scognet  # This works around a bug in TZUpdater 2.3.2.
160135655Scognet  if (/^#?(Zone|Link)[\t ]+(Etc\/)?GMT[\t ]/) {
161135655Scognet    if (($2 == "GMT") == (DATAFORM == "vanguard")) {
162135655Scognet      uncomment = in_comment
163135655Scognet    } else {
164135655Scognet      comment_out = !in_comment
165135655Scognet    }
166135655Scognet  }
167135655Scognet
168135655Scognet  if (uncomment) {
169135655Scognet    sub(/^#/, "")
170135655Scognet  }
171135655Scognet  if (comment_out) {
172135655Scognet    sub(/^/, "#")
173135655Scognet  }
174135655Scognet
175135655Scognet  # Prefer %z in vanguard form, explicit abbreviations otherwise.
176135655Scognet  if (DATAFORM == "vanguard") {
177135655Scognet    sub(/^(Zone[\t ]+[^\t ]+)?[\t ]+[^\t ]+[\t ]+[^\t ]+[\t ]+[-+][^\t ]+/, \
178135655Scognet	"&CHANGE-TO-%z")
179135655Scognet    sub(/-00CHANGE-TO-%z/, "-00")
180135655Scognet    sub(/[-+][^\t ]+CHANGE-TO-/, "")
181135655Scognet  } else {
182135655Scognet    if ($0 ~ /^[^#]*%z/) {
183135655Scognet      stdoff_column = 2 * ($0 ~ /^Zone/) + 1
184135655Scognet      rules_column = stdoff_column + 1
185135655Scognet      stdoff = get_minutes($stdoff_column)
186135655Scognet      rules = $rules_column
187135655Scognet      stdabbr = offset_abbr(stdoff)
188135655Scognet      if (rules == "-") {
189135655Scognet	abbr = stdabbr
190172614Scognet      } else {
191135655Scognet	dstabbr_only = rules ~ /^[+0-9-]/
192135655Scognet	if (dstabbr_only) {
193135655Scognet	  dstoff = get_minutes(rules)
194135655Scognet	} else {
195135655Scognet	  # The DST offset is normally an hour, but there are special cases.
196135655Scognet	  if (rules == "Morocco" && NF == 3) {
197135655Scognet	    dstoff = -60
198135655Scognet	  } else if (rules == "NBorneo") {
199135655Scognet	    dstoff = 20
200135655Scognet	  } else if (((rules == "Cook" || rules == "LH") && NF == 3) \
201135655Scognet		     || (rules == "Uruguay" \
202138751Scognet			 && $0 ~ /[\t ](1942 Dec 14|1960|1970|1974 Dec 22)$/)) {
203138751Scognet	    dstoff = 30
204138751Scognet	  } else if (rules == "Uruguay" && $0 ~ /[\t ]1974 Mar 10$/) {
205135655Scognet	    dstoff = 90
206142570Scognet	  } else {
207142955Scognet	    dstoff = 60
208188540Scognet	  }
209188540Scognet	}
210188540Scognet	dstabbr = offset_abbr(stdoff + dstoff)
211188540Scognet	if (dstabbr_only) {
212188540Scognet	  abbr = dstabbr
213188581Scognet	} else {
214142570Scognet	  abbr = stdabbr "/" dstabbr
215138751Scognet	}
216138751Scognet      }
217138751Scognet      sub(/%z/, abbr)
218138751Scognet    }
219138856Scognet  }
220138751Scognet
221129198Scognet  # Normally, prefer whole seconds.  However, prefer subseconds
222129198Scognet  # if generating vanguard form and the otherwise-undocumented
223171780Scognet  # VANGUARD_SUBSECONDS environment variable is set.
224129198Scognet  # This relies on #STDOFF comment lines in the data.
225135655Scognet  # It is for hypothetical clients that support UT offsets that are
226137274Scognet  # not integer multiples of one second (e.g., Europe/Lisbon, 1884 to 1912).
227129198Scognet  # No known clients need this currently, and this experimental
228129198Scognet  # feature may be changed or withdrawn in future releases.
229129198Scognet  if ($1 == "#STDOFF") {
230129198Scognet    stdoff = $2
231137274Scognet    rounded_stdoff = round_to_second(stdoff)
232137274Scognet    if (DATAFORM == "vanguard" && ENVIRON["VANGUARD_SUBSECONDS"]) {
233129198Scognet      stdoff_subst[0] = rounded_stdoff
234129198Scognet      stdoff_subst[1] = stdoff
235129198Scognet    } else {
236129198Scognet      stdoff_subst[0] = stdoff
237135655Scognet      stdoff_subst[1] = rounded_stdoff
238129198Scognet    }
239137274Scognet  } else if (stdoff_subst[0]) {
240129198Scognet    stdoff_column = 2 * ($0 ~ /^Zone/) + 1
241129198Scognet    stdoff_column_val = $stdoff_column
242129198Scognet    if (stdoff_column_val == stdoff_subst[0]) {
243137274Scognet      sub(stdoff_subst[0], stdoff_subst[1])
244137274Scognet    } else if (stdoff_column_val != stdoff_subst[1]) {
245129198Scognet      stdoff_subst[0] = 0
246137274Scognet    }
247129198Scognet  }
248129198Scognet
249129198Scognet  # In rearguard form, change the Japan rule line with "Sat>=8 25:00"
250129198Scognet  # to "Sun>=9 1:00", to cater to zic before 2007 and to older Java.
251171780Scognet  if ($0 ~ /^Rule/ && $2 == "Japan") {
252137274Scognet    if (DATAFORM == "rearguard") {
253171780Scognet      if ($7 == "Sat>=8" && $8 == "25:00") {
254129198Scognet	sub(/Sat>=8/, "Sun>=9")
255171780Scognet	sub(/25:00/, " 1:00")
256172614Scognet      }
257137274Scognet    } else {
258129198Scognet      if ($7 == "Sun>=9" && $8 == "1:00") {
259129198Scognet	sub(/Sun>=9/, "Sat>=8")
260137274Scognet	sub(/ 1:00/, "25:00")
261137274Scognet      }
262137274Scognet    }
263129198Scognet  }
264181144Scognet
265251866Sscottl  # In rearguard form, change the Morocco lines with negative SAVE values
266129198Scognet  # to use positive SAVE values.
267129198Scognet  if ($2 == "Morocco") {
268129198Scognet    if ($0 ~ /^Rule/) {
269129198Scognet      if ($4 ~ /^201[78]$/ && $6 == "Oct") {
270142570Scognet	if (DATAFORM == "rearguard") {
271175982Sraj	  sub(/\t2018\t/, "\t2017\t")
272188540Scognet	} else {
273142570Scognet	  sub(/\t2017\t/, "\t2018\t")
274188540Scognet	}
275188540Scognet      }
276188540Scognet
277188540Scognet      if (2019 <= $3) {
278129198Scognet	if ($8 == "2:00") {
279142570Scognet	  if (DATAFORM == "rearguard") {
280142570Scognet	    sub(/\t0\t/, "\t1:00\t")
281188540Scognet	  } else {
282188540Scognet	    sub(/\t1:00\t/, "\t0\t")
283188540Scognet	  }
284188540Scognet	} else {
285188540Scognet	  if (DATAFORM == "rearguard") {
286129198Scognet	    sub(/\t-1:00\t/, "\t0\t")
287129198Scognet	  } else {
288137274Scognet	    sub(/\t0\t/, "\t-1:00\t")
289129198Scognet	  }
290137274Scognet	}
291137274Scognet      }
292138751Scognet    }
293129198Scognet    if ($1 ~ /^[+0-9-]/ && NF == 3) {
294251866Sscottl      if (DATAFORM == "rearguard") {
295138751Scognet	sub(/1:00\tMorocco/, "0:00\tMorocco")
296129198Scognet	sub(/\t\+01\/\+00$/, "\t+00/+01")
297138751Scognet      } else {
298138751Scognet	sub(/0:00\tMorocco/, "1:00\tMorocco")
299129198Scognet	sub(/\t\+00\/+01$/, "\t+01/+00")
300138751Scognet      }
301129198Scognet    }
302129198Scognet  }
303129198Scognet}
304129198Scognet
305129198Scognet/^Zone/ {
306129198Scognet  packrat_ignored = FILENAME == PACKRATDATA && PACKRATLIST && !packratlist[$2];
307129198Scognet}
308129198Scognet{
309129198Scognet  if (packrat_ignored && $0 !~ /^Rule/) {
310129198Scognet    sub(/^/, "#")
311129198Scognet  }
312129198Scognet}
313129198Scognet
314129198Scognet# Return a link line resulting by changing OLDLINE to link to TARGET
315138414Scognet# from LINKNAME, instead of linking to OLDTARGET from LINKNAME.
316138414Scognet# Align data columns the same as they were in OLDLINE.
317138414Scognet# Also, replace any existing white space followed by comment with COMMENT.
318138414Scognetfunction make_linkline(oldline, target, linkname, oldtarget, comment, \
319138414Scognet		       oldprefix, oldprefixlen, oldtargettabs, \
320129198Scognet		       replsuffix, targettabs)
321129198Scognet{
322129198Scognet  oldprefix = "Link\t" oldtarget "\t"
323129198Scognet  oldprefixlen = length(oldprefix)
324129198Scognet  if (substr(oldline, 1, oldprefixlen) == oldprefix) {
325129198Scognet    # Use tab stops to preserve LINKNAME's column.
326129198Scognet    replsuffix = substr(oldline, oldprefixlen + 1)
327129198Scognet    sub(/[\t ]*#.*/, "", replsuffix)
328138414Scognet    oldtargettabs = int(length(oldtarget) / 8) + 1
329129198Scognet    targettabs = int(length(target) / 8) + 1
330129198Scognet    for (; targettabs < oldtargettabs; targettabs++) {
331129198Scognet      replsuffix = "\t" replsuffix
332129198Scognet    }
333137274Scognet    for (; oldtargettabs < targettabs && replsuffix ~ /^\t/; targettabs--) {
334129198Scognet      replsuffix = substr(replsuffix, 2)
335129198Scognet    }
336129198Scognet  } else {
337129198Scognet    # Odd format line; don't bother lining up its replacement nicely.
338129198Scognet    replsuffix = linkname
339129198Scognet  }
340129198Scognet  return "Link\t" target "\t" replsuffix comment
341129198Scognet}
342129198Scognet
343129198Scognet/^Link/ && $4 == "#=" && DATAFORM == "vanguard" {
344171780Scognet  $0 = make_linkline($0, $5, $3, $2)
345129198Scognet}
346129198Scognet
347129198Scognet# If a Link line is followed by a Link or Zone line for the same data, comment
348129198Scognet# out the Link line.  This can happen if backzone overrides a Link
349129198Scognet# with a Zone or a different Link.
350129198Scognet/^Zone/ {
351129198Scognet  sub(/^Link/, "#Link", line[linkline[$2]])
352129198Scognet}
353129198Scognet/^Link/ {
354129198Scognet  sub(/^Link/, "#Link", line[linkline[$3]])
355129198Scognet  linkline[$3] = NR
356129198Scognet  linktarget[$3] = $2
357129198Scognet}
358137274Scognet
359129198Scognet{ line[NR] = $0 }
360135655Scognet
361129198Scognetfunction cut_link_chains_short( \
362129198Scognet			       l, linkname, t, target)
363129198Scognet{
364129198Scognet  for (linkname in linktarget) {
365129198Scognet    target = linktarget[linkname]
366129198Scognet    t = linktarget[target]
367129198Scognet    if (t) {
368129198Scognet      # TARGET is itself a link name.  Replace the line "Link TARGET LINKNAME"
369129198Scognet      # with "Link T LINKNAME #= TARGET", where T is at the end of the chain
370129198Scognet      # of links that LINKNAME points to.
371129198Scognet      while ((u = linktarget[t])) {
372129198Scognet	t = u
373129198Scognet      }
374129198Scognet      l = linkline[linkname]
375129198Scognet      line[l] = make_linkline(line[l], t, linkname, target, "\t#= " target)
376129198Scognet    }
377129198Scognet  }
378129198Scognet}
379129198Scognet
380129198ScognetEND {
381129198Scognet  if (DATAFORM != "vanguard") {
382129198Scognet    cut_link_chains_short()
383129198Scognet  }
384129198Scognet  for (i = 1; i <= NR; i++)
385129198Scognet    print line[i]
386129198Scognet}
387129198Scognet